# HG changeset patch # User davidvanzessen # Date 1471871477 14400 # Node ID 4a93146f87aa84a0326b4c0166eca76da04a6ef3 # Parent 0453ea4d9f14dcff49372389406534308716eac3 Uploaded diff -r 0453ea4d9f14 -r 4a93146f87aa pattern_plots.r --- a/pattern_plots.r Mon Aug 22 07:00:23 2016 -0400 +++ b/pattern_plots.r Mon Aug 22 09:11:17 2016 -0400 @@ -1,17 +1,139 @@ library(ggplot2) library(reshape2) +library(scales) args <- commandArgs(trailingOnly = TRUE) input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" -plot1.file = args[2] -plot2.file = args[3] -plot3.file = args[4] + +plot1.path = args[2] +plot1.png = paste(plot1.path, ".png", sep="") +plot1.txt = paste(plot1.path, ".txt", sep="") -dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T) +plot2.path = args[3] +plot2.png = paste(plot2.path, ".png", sep="") +plot2.txt = paste(plot2.path, ".txt", sep="") + +plot3.path = args[4] +plot3.png = paste(plot3.path, ".png", sep="") +plot3.txt = paste(plot3.path, ".txt", sep="") + +dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) + + classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm") xyz = c("x", "y", "z") +new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) -names(dat) = c("info", paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) +names(dat) = new.names + +dat["RGYW.WRCY",] = colSums(dat[c(13,14),]) +dat["TW.WA",] = colSums(dat[c(15,16),]) + +data1 = dat[c("RGYW.WRCY", "TW.WA"),] + +data1 = data1[,names(data1)[grepl(".z", names(data1))]] +names(data1) = gsub("\\..*", "", names(data1)) + +data1 = melt(t(data1)) + +names(data1) = c("Class", "Type", "value") + +write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) +png(filename=plot1.png) +print(p) +dev.off() + +data2 = dat[5:8,] + +data2["sum",] = colSums(data2) + +data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] +names(data2) = gsub(".x", "", names(data2)) + +data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1) +data2["A/T",is.nan(unlist(data2["A/T",]))] = 0 + +data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1) +data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1) + + +data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 +data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 + +data2 = melt(t(data2[6:8,])) + +names(data2) = c("Class", "Type", "value") + +write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") +png(filename=plot2.png) +print(p) +dev.off() + +data3 = dat[c(5, 6, 8, 17:20),] +data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] +names(data3) = gsub(".x", "", names(data3)) +data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1) + +data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1) + +data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1) + +data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 +data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 + +data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 +data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 + +data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 +data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 + +data3 = melt(t(data3[8:10,])) +names(data3) = c("Class", "Type", "value") + +write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) +png(filename=plot3.png) +print(p) +dev.off() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 0453ea4d9f14 -r 4a93146f87aa wrapper.sh --- a/wrapper.sh Mon Aug 22 07:00:23 2016 -0400 +++ b/wrapper.sh Mon Aug 22 09:11:17 2016 -0400 @@ -237,6 +237,11 @@ cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt + echo "---------------- pattern_plots.r ----------------" + echo "---------------- pattern_plots.r ----------------
" >> $log + + Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1 + echo "" >> $output echo "" >> $output for gene in ${genes[@]} @@ -262,6 +267,10 @@ #echo "Download data" >> $output done +echo "
" >> $output +echo "
" >> $output +echo "
" >> $output + echo "" >> $output #SHM overview tab end echo "---------------- images ----------------" @@ -361,6 +370,9 @@ echo "" >> $output echo "" >> $output echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output echo "" >> $output
info
infolink
The complete datasetDownload
The SHM Overview table as a datasetDownload
The data used to generate the first SHM Overview plotDownload
The data used to generate the sexond SHM Overview plotDownload
The data used to generate the third SHM Overview plotDownload
The alignment info on the unmatched sequencesDownload
Motif data per sequence IDDownload
Mutation data per sequence IDDownload