changeset 124:4a93146f87aa draft

Uploaded
author davidvanzessen
date Mon, 22 Aug 2016 09:11:17 -0400
parents 0453ea4d9f14
children e87dcca14bd6
files pattern_plots.r wrapper.sh
diffstat 2 files changed, 139 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/pattern_plots.r	Mon Aug 22 07:00:23 2016 -0400
+++ b/pattern_plots.r	Mon Aug 22 09:11:17 2016 -0400
@@ -1,17 +1,139 @@
 library(ggplot2)
 library(reshape2)
+library(scales)
 
 args <- commandArgs(trailingOnly = TRUE)
 
 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
-plot1.file = args[2] 
-plot2.file = args[3]
-plot3.file = args[4]
+
+plot1.path = args[2]
+plot1.png = paste(plot1.path, ".png", sep="")
+plot1.txt = paste(plot1.path, ".txt", sep="")
 
-dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T)
+plot2.path = args[3]
+plot2.png = paste(plot2.path, ".png", sep="")
+plot2.txt = paste(plot2.path, ".txt", sep="")
+
+plot3.path = args[4]
+plot3.png = paste(plot3.path, ".png", sep="")
+plot3.txt = paste(plot3.path, ".txt", sep="")
+
+dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
+
+
 
 classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm")
 xyz = c("x", "y", "z")
+new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
 
-names(dat) = c("info", paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
+names(dat) = new.names
+
+dat["RGYW.WRCY",] = colSums(dat[c(13,14),])
+dat["TW.WA",] = colSums(dat[c(15,16),])
+
+data1 = dat[c("RGYW.WRCY", "TW.WA"),]
+
+data1 = data1[,names(data1)[grepl(".z", names(data1))]]
+names(data1) = gsub("\\..*", "", names(data1))
+
+data1 = melt(t(data1))
+
+names(data1) = c("Class", "Type", "value")
+
+write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
+png(filename=plot1.png)
+print(p)
+dev.off()
+
+data2 = dat[5:8,]
+
+data2["sum",] = colSums(data2)
+
+data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
+names(data2) = gsub(".x", "", names(data2))
+
+data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
+data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
+
+data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
+data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
+
+
+data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
+data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
+data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
+data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
+
+data2 = melt(t(data2[6:8,]))
+
+names(data2) = c("Class", "Type", "value")
+
+write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
+png(filename=plot2.png)
+print(p)
+dev.off()
+
+data3 = dat[c(5, 6, 8, 17:20),]
+data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
+names(data3) = gsub(".x", "", names(data3))
 
+data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
+
+data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
+
+data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
+
+data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
+data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
+
+data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
+data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
+
+data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
+data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
+
+data3 = melt(t(data3[8:10,]))
+names(data3) = c("Class", "Type", "value")
+
+write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
+png(filename=plot3.png)
+print(p)
+dev.off()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/wrapper.sh	Mon Aug 22 07:00:23 2016 -0400
+++ b/wrapper.sh	Mon Aug 22 09:11:17 2016 -0400
@@ -237,6 +237,11 @@
 	
 	cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
 	
+	echo "---------------- pattern_plots.r ----------------"
+	echo "---------------- pattern_plots.r ----------------<br />" >> $log
+
+	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
+	
 	echo "<table class='pure-table pure-table-striped'>" >> $output
 	echo "<thead><tr><th>info</th>" >> $output
 	for gene in ${genes[@]}
@@ -262,6 +267,10 @@
 	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output
 done
 
+echo "<img src='plot1.png' /><br />" >> $output
+echo "<img src='plot2.png' /><br />" >> $output
+echo "<img src='plot3.png' /><br />" >> $output
+
 echo "</div>" >> $output #SHM overview tab end
 
 echo "---------------- images ----------------"
@@ -361,6 +370,9 @@
 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output