# HG changeset patch
# User davidvanzessen
# Date 1410773836 14400
# Node ID f11df36f43bb1537d5d618570c40e3f9ec927168
# Parent f9316f7676cc273987a78f79829ddda587b040ae
Uploaded
diff -r f9316f7676cc -r f11df36f43bb RScript.r
--- a/RScript.r Tue Aug 26 09:53:22 2014 -0400
+++ b/RScript.r Mon Sep 15 05:37:16 2014 -0400
@@ -8,10 +8,11 @@
cat("
Starting analysis ", file=logfile, append=F)
-require(ggplot2)
-require(reshape2)
-require(data.table)
-require(grid)
+library(ggplot2)
+library(reshape2)
+library(data.table)
+library(grid)
+library(parallel)
#require(xtable)
cat("Reading input ", file=logfile, append=T)
dat = read.csv(inFile, sep="\t")
@@ -33,7 +34,9 @@
cat("Removing duplicates ", file=logfile, append=T)
dat = dat[!duplicated(dat$paste),]
patients = split(dat, dat$Patient, drop=T)
-intervalReads = rev(c(0,2,10,100,1000,10000))
+rm(dat)
+patients = patients[1:5]
+intervalReads = rev(c(0,10,25,50,100,1000,10000))
intervalFreq = rev(c(0,0.01,0.1,0.5,1,5))
V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
@@ -48,9 +51,11 @@
onShort = "freq"
}
splt = split(x, x$Sample, drop=T)
+ type="pair"
if(length(splt) == 1){
print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
- splt[[2]] = data.frame("Patient" = 'NA', "Receptor" = 'NA', "Sample" = 'NA', "Cell_Count" = 100, "Clone_Molecule_Count_From_Spikes" = 10, "Log10_Frequency" = 1, "Total_Read_Count" = 100, "dsMol_per_1e6_cells" = 100, "J_Segment_Major_Gene" = 'NA', "V_Segment_Major_Gene" = 'NA', "Clone_Sequence" = 'NA', "CDR3_Sense_Sequence" = 'NA', "Related_to_leukemia_clone" = FALSE, "Frequency"= 0, "normalized_read_count" = 0, "paste" = 'a')
+ splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
+ type="single"
}
patient1 = splt[[1]]
patient2 = splt[[2]]
@@ -76,7 +81,7 @@
switched = T
}
if(appendtxt){
- cat(paste(patient, oneSample, twoSample, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
+ cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
}
cat(paste("", patient, " ", sep=""), file=logfile, append=T)
patientMerge = merge(patient1, patient2, by="Clone_Sequence")
@@ -107,21 +112,21 @@
#threshhold = 0
if(threshhold != 0){
if(sum(one) > 0){
- dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence")]
- colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence")
+ dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
+ colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
if(sum(two) > 0){
- dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence")]
- colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence")
+ dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
+ colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
}
if(sum(both) > 0){
- dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y")]
- colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), "Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample))
+ dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
+ colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
@@ -179,24 +184,24 @@
interval = intervalFreq
intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
-product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6))
+product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
#patientFrequencyCount(patient1)
#lapply(patients[c(5,6,10)], FUN=patientFrequencyCount)
#lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
#lapply(patients[c(6,7,8)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
#lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
+mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
cat("Starting Cell Count analysis ", file=logfile, append=T)
interval = intervalReads
intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
-product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6))
+product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
#patientResult = patientReadCount(patient1)
#lapply(patients[c(5,6,10)], FUN=patientReadCount)
#lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
#lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
+mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
cat("
", file=logfile, append=T)
diff -r f9316f7676cc -r f11df36f43bb wrapper.sh
--- a/wrapper.sh Tue Aug 26 09:53:22 2014 -0400
+++ b/wrapper.sh Mon Sep 15 05:37:16 2014 -0400
@@ -21,36 +21,42 @@
cd $outputDir
-html="index.html"
-echo "Result " > $html
-echo "" >> $html
-echo "" >> $html
-echo "" >> $html
-echo "" >> $html
-echo " " >> $html
-echo "
" >> $html
-echo "" >> $html
-while read patient sample1 sample2
+header="
"
+singles=()
+pairs_BM_PB=()
+pairs_Left_Right=()
+pairs_R_Dx=()
+while read patient sample1 sample2 type
do
echo "$patient"
+ html="${patient}.html"
+ echo "$header" > $html
+ if [[ "$type" == *pair* ]] ; then
+ if [[ "$sample1" == *_BM* ]] || [[ "$sample1" == *_PB* ]] ; then
+ pairs_BM_PB+=( "$patient" )
+ elif [[ "$sample1" == *_Left* ]] || [[ "$sample1" == *_Right* ]] ; then
+ pairs_Left_Right+=( "$patient" )
+ else
+ pairs_R_Dx+=( "$patient" )
+ fi
+ else
+ singles+=( "$patient" )
+ fi
oldLocus=""
sample1="$(echo ${sample1} | tr -d '\r' | tr -d '\n')"
sample2="$(echo ${sample2} | tr -d '\r' | tr -d '\n')"
tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt
- echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "" >> $html
- echo "Ig/TCR gene rearrangement type Proximal gene segment Distal gene segment Cut off value Number of sequences ${patient}_Both Number of sequences_$sample1 Normalized Read Count $sample1 Number of sequences_$sample2 Normalized Read Count $sample2 Sum number of sequences $patient Percentage of sequences ${patient}_both " >> $html
+ echo "Ig/TCR gene rearrangement type Proximal gene segment Distal gene segment Cut off value Number of sequences ${patient}_Both Number of sequences_$sample1 Read Count $sample1 Number of sequences_$sample2 Read Count $sample2 Sum number of sequences $patient Percentage of sequences ${patient}_both " >> $html
echo "" >> $html
- readsumtable="Ig/TCR gene rearrangement type Proximal gene segment Distal gene segment Total normalized read count for $sample1 Total normalized read count for $sample2 "
while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2
do
if [ "$locus" != "$oldLocus" ] ; then
echo "" >> $html
echo " $locus " >> $html
- readsumtable="${readsumtable}$locus $v_segment $j_segment $locusreadsum1 $locusreadsum2 "
else
echo " " >> $html
fi
@@ -81,7 +87,7 @@
done < tmp.txt
echo "
" >> $html
echo "
" >> $html
- echo "${readsumtable}
" >> $html
+ echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
@@ -93,13 +99,11 @@
echo "
" >> $html
echo "Ig/TCR gene rearrangement type Proximal gene segment Distal gene segment Cut off value Number of sequences ${patient}_Both Number of sequences_$sample1 Read Count $sample1 Number of sequences_$sample2 Read Count $sample2 Sum number of sequences $patient Percentage of sequences ${patient}_both " >> $html
echo "" >> $html
- readsumtable="Ig/TCR gene rearrangement type Proximal gene segment Distal gene segment Total normalized read count for $sample1 Total normalized read count for $sample2 "
while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2
do
if [ "$locus" != "$oldLocus" ] ; then
echo "" >> $html
echo " $locus " >> $html
- readsumtable="${readsumtable}$locus $v_segment $j_segment $locusreadsum1 $locusreadsum2 "
else
echo " " >> $html
fi
@@ -130,13 +134,42 @@
done < tmp.txt
echo "
" >> $html
echo "
" >> $html
- echo "${readsumtable}
" >> $html
+ echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "
" >> $html
echo "" >> $html
+ echo "" >> $html
done < patients.txt
-echo "" >> $html
rm tmp.txt
+
+html="index.html"
+echo "" > $html
+echo "" >> $html
+echo "Singles: " >> $html
+for patient in "${singles[@]}"
+do
+ echo "$patient " >> $html
+done
+echo "Pairs (Left & Right): " >> $html
+for patient in "${pairs_Left_Right[@]}"
+do
+ echo "$patient " >> $html
+done
+echo "Pairs (BM & PB): " >> $html
+for patient in "${pairs_BM_PB[@]}"
+do
+ echo "$patient " >> $html
+done
+echo "Pairs (Dx & R): " >> $html
+for patient in "${pairs_R_Dx[@]}"
+do
+ echo "$patient " >> $html
+done
+echo "Triplets: " >> $html
+
+echo "
" >> $html
+echo "" >> $html
+