# HG changeset patch # User davidvanzessen # Date 1410773836 14400 # Node ID f11df36f43bb1537d5d618570c40e3f9ec927168 # Parent f9316f7676cc273987a78f79829ddda587b040ae Uploaded diff -r f9316f7676cc -r f11df36f43bb RScript.r --- a/RScript.r Tue Aug 26 09:53:22 2014 -0400 +++ b/RScript.r Mon Sep 15 05:37:16 2014 -0400 @@ -8,10 +8,11 @@ cat("", file=logfile, append=F) -require(ggplot2) -require(reshape2) -require(data.table) -require(grid) +library(ggplot2) +library(reshape2) +library(data.table) +library(grid) +library(parallel) #require(xtable) cat("", file=logfile, append=T) dat = read.csv(inFile, sep="\t") @@ -33,7 +34,9 @@ cat("", file=logfile, append=T) dat = dat[!duplicated(dat$paste),] patients = split(dat, dat$Patient, drop=T) -intervalReads = rev(c(0,2,10,100,1000,10000)) +rm(dat) +patients = patients[1:5] +intervalReads = rev(c(0,10,25,50,100,1000,10000)) intervalFreq = rev(c(0,0.01,0.1,0.5,1,5)) V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") @@ -48,9 +51,11 @@ onShort = "freq" } splt = split(x, x$Sample, drop=T) + type="pair" if(length(splt) == 1){ print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) - splt[[2]] = data.frame("Patient" = 'NA', "Receptor" = 'NA', "Sample" = 'NA', "Cell_Count" = 100, "Clone_Molecule_Count_From_Spikes" = 10, "Log10_Frequency" = 1, "Total_Read_Count" = 100, "dsMol_per_1e6_cells" = 100, "J_Segment_Major_Gene" = 'NA', "V_Segment_Major_Gene" = 'NA', "Clone_Sequence" = 'NA', "CDR3_Sense_Sequence" = 'NA', "Related_to_leukemia_clone" = FALSE, "Frequency"= 0, "normalized_read_count" = 0, "paste" = 'a') + splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) + type="single" } patient1 = splt[[1]] patient2 = splt[[2]] @@ -76,7 +81,7 @@ switched = T } if(appendtxt){ - cat(paste(patient, oneSample, twoSample, sep="\t"), file="patients.txt", append=T, sep="", fill=3) + cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) } cat(paste("", sep=""), file=logfile, append=T) patientMerge = merge(patient1, patient2, by="Clone_Sequence") @@ -107,21 +112,21 @@ #threshhold = 0 if(threshhold != 0){ if(sum(one) > 0){ - dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence")] - colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence") + dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] + colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(two) > 0){ - dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence")] - colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence") + dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] + colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } } if(sum(both) > 0){ - dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y")] - colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), "Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample)) + dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] + colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } @@ -179,24 +184,24 @@ interval = intervalFreq intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) -product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6)) +product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) #patientFrequencyCount(patient1) #lapply(patients[c(5,6,10)], FUN=patientFrequencyCount) #lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) #lapply(patients[c(6,7,8)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) -lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) +mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) cat("", file=logfile, append=T) interval = intervalReads intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) -product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6)) +product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) #patientResult = patientReadCount(patient1) #lapply(patients[c(5,6,10)], FUN=patientReadCount) #lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") -lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") +mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") cat("
Starting analysis
Reading input
Removing duplicates
", patient, "
Starting Cell Count analysis
", file=logfile, append=T) diff -r f9316f7676cc -r f11df36f43bb wrapper.sh --- a/wrapper.sh Tue Aug 26 09:53:22 2014 -0400 +++ b/wrapper.sh Mon Sep 15 05:37:16 2014 -0400 @@ -21,36 +21,42 @@ cd $outputDir -html="index.html" -echo "Result" > $html -echo "" >> $html -echo "" >> $html -echo "" >> $html -echo "" >> $html -echo "" >> $html -echo "" >> $html -echo "
" >> $html -while read patient sample1 sample2 +header="" +singles=() +pairs_BM_PB=() +pairs_Left_Right=() +pairs_R_Dx=() +while read patient sample1 sample2 type do echo "$patient" + html="${patient}.html" + echo "$header" > $html + if [[ "$type" == *pair* ]] ; then + if [[ "$sample1" == *_BM* ]] || [[ "$sample1" == *_PB* ]] ; then + pairs_BM_PB+=( "$patient" ) + elif [[ "$sample1" == *_Left* ]] || [[ "$sample1" == *_Right* ]] ; then + pairs_Left_Right+=( "$patient" ) + else + pairs_R_Dx+=( "$patient" ) + fi + else + singles+=( "$patient" ) + fi oldLocus="" sample1="$(echo ${sample1} | tr -d '\r' | tr -d '\n')" sample2="$(echo ${sample2} | tr -d '\r' | tr -d '\n')" tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt - echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "" >> $html - echo "" >> $html + echo "" >> $html echo "" >> $html - readsumtable="
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Normalized Read Count $sample1Number of sequences_$sample2Normalized Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then echo "" >> $html echo "" >> $html - readsumtable="${readsumtable}" else echo "" >> $html fi @@ -81,7 +87,7 @@ done < tmp.txt echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentTotal normalized read count for $sample1Total normalized read count for $sample2
$locus
$locus$v_segment$j_segment$locusreadsum1$locusreadsum2
" >> $html echo "
" >> $html - echo "${readsumtable}
" >> $html + echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "
" >> $html @@ -93,13 +99,11 @@ echo "" >> $html echo "" >> $html echo "" >> $html - readsumtable="
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then echo "" >> $html echo "" >> $html - readsumtable="${readsumtable}" else echo "" >> $html fi @@ -130,13 +134,42 @@ done < tmp.txt echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentTotal normalized read count for $sample1Total normalized read count for $sample2
$locus
$locus$v_segment$j_segment$locusreadsum1$locusreadsum2
" >> $html echo "
" >> $html - echo "${readsumtable}
" >> $html + echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "
" >> $html echo "" >> $html + echo "" >> $html done < patients.txt -echo "" >> $html rm tmp.txt + +html="index.html" +echo "" > $html +echo "" >> $html +echo "" >> $html +for patient in "${singles[@]}" +do + echo "" >> $html +done +echo "" >> $html +for patient in "${pairs_Left_Right[@]}" +do + echo "" >> $html +done +echo "" >> $html +for patient in "${pairs_BM_PB[@]}" +do + echo "" >> $html +done +echo "" >> $html +for patient in "${pairs_R_Dx[@]}" +do + echo "" >> $html +done +echo "" >> $html + +echo "
Singles:
$patient
Pairs (Left & Right):
$patient
Pairs (BM & PB):
$patient
Pairs (Dx & R):
$patient
Triplets:
" >> $html +echo "" >> $html +