# HG changeset patch
# User davidvanzessen
# Date 1463473058 14400
# Node ID a778156dad3dbc4d014eaa4149f930e3e1d3973b
# Parent a4c2ddeadec0e3264b6afaf59f99fa92b3ce63bb
Uploaded
diff -r a4c2ddeadec0 -r a778156dad3d naive_output.r
--- a/naive_output.r Thu May 12 10:51:55 2016 -0400
+++ b/naive_output.r Tue May 17 04:17:38 2016 -0400
@@ -33,9 +33,6 @@
final.cm$Replicate = 1
}
-
-
-
#print(paste("nrow final:", nrow(final)))
#final2 = final
#final2$Sample = gsub("[0-9]", "", final2$Sample)
@@ -45,3 +42,10 @@
write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T)
write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T)
write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T)
+
+
+
+
+
+
+
diff -r a4c2ddeadec0 -r a778156dad3d sequence_overview.r
--- a/sequence_overview.r Thu May 12 10:51:55 2016 -0400
+++ b/sequence_overview.r Tue May 17 04:17:38 2016 -0400
@@ -5,6 +5,8 @@
gene.matches = args[1]
sequence.file = args[2]
outputdir = args[3]
+NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
+NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
main.html = "index.html"
setwd(outputdir)
@@ -21,11 +23,11 @@
#dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
-dat = data.frame(table(dat$best_match, dat$seq_conc, dat$Functionality))
+dat = data.frame(table(dat$seq_conc))
dat = dat[dat$Freq > 1,]
-names(dat) = c("best_match", "seq_conc", "Functionality", "Freq")
+names(dat) = c("seq_conc", "Freq")
dat$seq_conc = factor(dat$seq_conc)
@@ -100,3 +102,58 @@
}
cat("", file=main.html, append=T)
+
+
+#ACGT overview
+
+
+
+NToverview = genes[,c("Sequence.ID", "best_match")]
+sequences$seq = paste(sequences$CDR2.IMGT, sequences$CDR2.IMGT, sequences$FR2.IMGT, sequences$FR3.IMGT, sep="_")
+
+NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID")
+
+NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
+NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
+NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
+NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
+
+NTsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
+
+print(names(NToverview))
+print(names(NTsum))
+
+NToverview = rbind(NToverview, NTsum)
+
+write.table(NToverview, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
+#write.table(NTsum, NTsum.file, quote=F, sep="\t", row.names=F, col.names=T)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r a4c2ddeadec0 -r a778156dad3d wrapper.sh
--- a/wrapper.sh Thu May 12 10:51:55 2016 -0400
+++ b/wrapper.sh Tue May 17 04:17:38 2016 -0400
@@ -167,6 +167,7 @@
echo "AA mutations location by id
" >> $output
echo "Absant AA locations by id
" >> $output
echo "Sequence Overview
" >> $output
+echo "Base overview
" >> $output
echo "---------------- images ----------------"
@@ -229,7 +230,7 @@
#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
echo "---------------- naive_output.r ----------------"
- Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} 2>&1
+ Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
fi
echo "---------------- sequence_overview.r ----------------"
@@ -238,7 +239,16 @@
Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/sequence_overview 2>&1
+echo "
$ID | $class | $A | $C | $G | $T |