# HG changeset patch # User davidvanzessen # Date 1412086017 14400 # Node ID 58a28427930efae0457cb28558804553df19fe83 # Parent fa240d1c57a99deeadd522c8f9abbb5462c83728 Uploaded diff -r fa240d1c57a9 -r 58a28427930e RScript.r --- a/RScript.r Thu Sep 18 10:28:24 2014 -0400 +++ b/RScript.r Tue Sep 30 10:06:57 2014 -0400 @@ -15,13 +15,18 @@ library(parallel) #require(xtable) cat("Reading input", file=logfile, append=T) -dat = read.csv(inFile, sep="\t") -#dat = data.frame(fread(inFile)) #faster but with a dep +dat = read.table(inFile, header=T, sep="\t", dec=",", fill=T, stringsAsFactors=F) +dat = dat[!is.na(dat$Patient),] + setwd(outDir) cat("Selecting first V/J Genes", file=logfile, append=T) dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) +str(dat) +cat("Deduplication", file=logfile, append=T) +dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) + cat("Calculating Frequency", file=logfile, append=T) dat$Frequency = ((10^dat$Log10_Frequency)*100) @@ -31,10 +36,10 @@ dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2) dat = dat[dat$normalized_read_count >= min_cells,] dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence) -cat("Removing duplicates", file=logfile, append=T) -dat = dat[!duplicated(dat$paste),] +triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),] + patients = split(dat, dat$Patient, drop=T) -intervalReads = rev(c(0,10,25,50,100,1000,10000)) +intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") @@ -82,7 +87,11 @@ cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) } cat(paste("", patient, "", sep=""), file=logfile, append=T) - patientMerge = merge(patient1, patient2, by="Clone_Sequence") + + patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) + patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) + + patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") res1 = vector() res2 = vector() resBoth = vector() @@ -90,14 +99,16 @@ read2Count = vector() locussum1 = vector() locussum2 = vector() + + print(patient) #for(iter in 1){ for(iter in 1:length(product[,1])){ threshhold = product[iter,threshholdIndex] V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence)) read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x)) read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y)) res1 = append(res1, sum(one)) @@ -108,21 +119,21 @@ #threshhold = 0 if(threshhold != 0){ if(sum(one) > 0){ - dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] - colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") + dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] + colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(two) > 0){ - dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] - colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") + dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] + colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } } if(sum(both) > 0){ - dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] - colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) + dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] + colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3 Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } @@ -188,7 +199,7 @@ interval = intervalReads intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) -mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") +mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") cat("", file=logfile, append=T) @@ -210,9 +221,13 @@ twoSample = paste(patient2[1,sampleIndex], sep="") threeSample = paste(patient3[1,sampleIndex], sep="") - patientMerge = merge(patient1, patient2, by="Clone_Sequence") - patientMerge = merge(patientMerge, patient3, by="Clone_Sequence") - colnames(patientMerge)[32:length(colnames(patientMerge))] = paste(colnames(patientMerge)[32:length(colnames(patientMerge))], ".z", sep="") + patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) + patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) + patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) + + patientMerge = merge(patient1, patient2, by="merge") + patientMerge = merge(patientMerge, patient3, by="merge") + colnames(patientMerge)[28:length(colnames(patientMerge))] = paste(colnames(patientMerge)[28:length(colnames(patientMerge))], ".z", sep="") res1 = vector() res2 = vector() res3 = vector() @@ -222,16 +237,16 @@ read3Count = vector() if(appendTriplets){ - cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) + cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) } for(iter in 1:length(product[,1])){ threshhold = product[iter,threshholdIndex] V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) - three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) + three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) @@ -243,27 +258,27 @@ #threshhold = 0 if(threshhold != 0){ if(sum(one) > 0){ - dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] + dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(two) > 0){ - dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] + dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(three) > 0){ - dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] + dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } } if(sum(all) > 0){ - dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] - colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) + dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] + colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } @@ -310,23 +325,41 @@ dev.off() } + +triplets$uniqueID = "ID" + +triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" +triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" +triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" + +triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" +triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" +triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" + +triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" + +triplets = data.frame(data.table(triplets)[, list(Patient=unique(.SD$uniqueID), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) + +triplets$Frequency = (10^as.numeric(triplets$Log10_Frequency))*100 +triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * 1000000 / 2) + interval = intervalReads intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) -one = dat[dat$Patient == "VanDongen_cALL_14696.1",] -two = dat[dat$Patient == "VanDongen_cALL_14696.2",] -three = dat[dat$Patient == "VanDongen_cALL_14696.3",] +one = triplets[triplets$Sample == "14696_reg_BM",] +two = triplets[triplets$Sample == "24536_reg_BM",] +three = triplets[triplets$Sample == "24062_reg_BM",] tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) -one = dat[dat$Sample == "16278_Left",] -two = dat[dat$Sample == "26402_Left",] -three = dat[dat$Sample == "26759_Left",] +one = triplets[triplets$Sample == "16278_Left",] +two = triplets[triplets$Sample == "26402_Left",] +three = triplets[triplets$Sample == "26759_Left",] tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) -one = dat[dat$Sample == "16278_Right",] -two = dat[dat$Sample == "26402_Right",] -three = dat[dat$Sample == "26759_Right",] +one = triplets[triplets$Sample == "16278_Right",] +two = triplets[triplets$Sample == "26402_Right",] +three = triplets[triplets$Sample == "26759_Right",] tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) @@ -334,20 +367,17 @@ intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) -one = dat[dat$Patient == "VanDongen_cALL_14696.1",] -two = dat[dat$Patient == "VanDongen_cALL_14696.2",] -three = dat[dat$Patient == "VanDongen_cALL_14696.3",] +one = triplets[triplets$Sample == "14696_reg_BM",] +two = triplets[triplets$Sample == "24536_reg_BM",] +three = triplets[triplets$Sample == "24062_reg_BM",] tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) -one = dat[dat$Sample == "16278_Left",] -two = dat[dat$Sample == "26402_Left",] -three = dat[dat$Sample == "26759_Left",] +one = triplets[triplets$Sample == "16278_Left",] +two = triplets[triplets$Sample == "26402_Left",] +three = triplets[triplets$Sample == "26759_Left",] tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) -one = dat[dat$Sample == "16278_Right",] -two = dat[dat$Sample == "26402_Right",] -three = dat[dat$Sample == "26759_Right",] +one = triplets[triplets$Sample == "16278_Right",] +two = triplets[triplets$Sample == "26402_Right",] +three = triplets[triplets$Sample == "26759_Right",] tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) - - - diff -r fa240d1c57a9 -r 58a28427930e wrapper.sh --- a/wrapper.sh Thu Sep 18 10:28:24 2014 -0400 +++ b/wrapper.sh Tue Sep 30 10:06:57 2014 -0400 @@ -29,7 +29,7 @@ do echo "$patient" html="${patient}.html" - echo "$header" > $html + echo "$header" > "$html" if [[ "$type" == *pair* ]] ; then if [[ "$sample1" == *_BM* ]] || [[ "$sample1" == *_PB* ]] ; then pairs_BM_PB+=( "$patient" ) @@ -45,128 +45,128 @@ sample1="$(echo ${sample1} | tr -d '\r' | tr -d '\n')" sample2="$(echo ${sample2} | tr -d '\r' | tr -d '\n')" tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" if [ "$both" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi if [ "$one" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$two" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" oldLocus="$locus" done < tmp.txt - echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
$locus
$locus$v_segment$j_segment>$cut_off_value$v_segment$j_segment>$cut_off_value$both$both$both$both$one$one$one$one$read_count1$read_count1$two$two$two$two$read_count2$sum${percent}%
$read_count2$sum${percent}%
" >> $html - echo "
" >> $html - echo "" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt - echo "
" >> $html - echo "
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" if [ "$both" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi if [ "$one" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$two" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" oldLocus="$locus" done < tmp.txt - echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_BothNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Sum number of sequences $patientPercentage of sequences ${patient}_both
$locus
$locus$v_segment$j_segment>$cut_off_value$v_segment$j_segment>$cut_off_value$both$both$both$both$one$one$one$one$read_count1$read_count1$two$two$two$two$read_count2$sum${percent}%
$read_count2$sum${percent}%
" >> $html - echo "
" >> $html - echo "" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" done < patients.txt html="index.html" echo "" > $html -echo "" >> $html -echo "" >> $html +echo "
Singles:
" >> "$html" +echo "" >> "$html" for patient in "${singles[@]}" do - echo "" >> $html + echo "" >> "$html" done -echo "" >> $html +echo "" >> "$html" for patient in "${pairs_Left_Right[@]}" do - echo "" >> $html + echo "" >> "$html" done -echo "" >> $html +echo "" >> "$html" for patient in "${pairs_BM_PB[@]}" do - echo "" >> $html + echo "" >> "$html" done -echo "" >> $html +echo "" >> "$html" for patient in "${pairs_R_Dx[@]}" do - echo "" >> $html + echo "" >> "$html" done -echo "" >> $html +echo "" >> "$html" while read sample1 sample2 sample3 do @@ -180,114 +180,114 @@ echo "$header" > $html oldLocus="" tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt - echo "
" >> $html - echo "
" >> $html - echo "
Singles:
$patient
$patient
Pairs (Left & Right):
Pairs (Left & Right):
$patient
$patient
Pairs (BM & PB):
Pairs (BM & PB):
$patient
$patient
Pairs (Dx & R):
Pairs (Dx & R):
$patient
$patient
Triplets:
Triplets:
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_AllNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Number of sequences_$sample3Read Count $sample3
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" while read locus j_segment v_segment cut_off_value all one read_count1 two read_count2 three read_count3 do if [ "$locus" != "$oldLocus" ] ; then - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" if [ "$all" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi if [ "$one" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$two" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$three" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" oldLocus="$locus" done < tmp.txt - echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_AllNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Number of sequences_$sample3Read Count $sample3
$locus
$locus$v_segment$j_segment>$cut_off_value$v_segment$j_segment>$cut_off_value$all$all$all$all$one$one$one$one$read_count1$read_count1$two$two$two$two$read_count2$read_count2$three$three$three$three$read_count3
$read_count3
" >> $html - echo "
" >> $html - echo "" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt - echo "
" >> $html - echo "
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_AllNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Number of sequences_$sample3Read Count $sample3
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" while read locus j_segment v_segment cut_off_value all one read_count1 two read_count2 three read_count3 do if [ "$locus" != "$oldLocus" ] ; then - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" if [ "$all" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi if [ "$one" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$two" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html + echo "" >> "$html" if [ "$three" != "0" ] && [ "$cut_off_value" != "0" ] ; then - echo "" >> $html + echo "" >> "$html" else - echo "" >> $html + echo "" >> "$html" fi - echo "" >> $html - echo "" >> $html + echo "" >> "$html" + echo "" >> "$html" oldLocus="$locus" done < tmp.txt - echo "
Ig/TCR gene rearrangement typeProximal gene segmentDistal gene segmentCut off valueNumber of sequences ${patient}_AllNumber of sequences_$sample1Read Count $sample1Number of sequences_$sample2Read Count $sample2Number of sequences_$sample3Read Count $sample3
$locus
$locus$v_segment$j_segment>$cut_off_value$v_segment$j_segment>$cut_off_value$all$all$all$all$one$one$one$one$read_count1$read_count1$two$two$two$two$read_count2$read_count2$three$three$three$three$read_count3
$read_count3
" >> $html - echo "
" >> $html - echo "" >> $html - echo "
" >> $html - echo "
" >> $html - echo "
" >> $html - echo "" >> $html - echo "" >> $html - echo "" >> $html + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "
" >> "$html" + echo "" >> "$html" + echo "" >> "$html" + echo "" >> "$html" done < triplets.txt rm tmp.txt html="index.html" -echo "" >> $html -echo "" >> $html +echo "" >> "$html" +echo "" >> "$html"