# HG changeset patch # User davidvanzessen # Date 1409061202 14400 # Node ID f9316f7676cc273987a78f79829ddda587b040ae # Parent 8d562506f4f9c01c144bf61501e724ecd2ce572c Uploaded diff -r 8d562506f4f9 -r f9316f7676cc CLL.xml --- a/CLL.xml Mon Aug 25 03:38:07 2014 -0400 +++ b/CLL.xml Tue Aug 26 09:53:22 2014 -0400 @@ -1,10 +1,12 @@ Comparison of clonal sequences in paired samples - wrapper.sh $in_file $out_file $out_file.files_path + wrapper.sh $in_file $out_file $out_file.files_path $min_freq $min_cells - + + + diff -r 8d562506f4f9 -r f9316f7676cc RScript.r --- a/RScript.r Mon Aug 25 03:38:07 2014 -0400 +++ b/RScript.r Tue Aug 26 09:53:22 2014 -0400 @@ -2,22 +2,35 @@ inFile = args[1] outDir = args[2] +logfile = args[3] +min_freq = as.numeric(args[4]) +min_cells = as.numeric(args[5]) + +cat("", file=logfile, append=F) require(ggplot2) require(reshape2) require(data.table) require(grid) #require(xtable) +cat("", file=logfile, append=T) dat = read.csv(inFile, sep="\t") #dat = data.frame(fread(inFile)) #faster but with a dep setwd(outDir) +cat("", file=logfile, append=T) dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) +cat("", file=logfile, append=T) dat$Frequency = ((10^dat$Log10_Frequency)*100) +dat = dat[dat$Frequency >= min_freq,] + +cat("", file=logfile, append=T) dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2) +dat = dat[dat$normalized_read_count >= min_cells,] dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence) +cat("", file=logfile, append=T) dat = dat[!duplicated(dat$paste),] patients = split(dat, dat$Patient, drop=T) intervalReads = rev(c(0,2,10,100,1000,10000)) @@ -36,8 +49,8 @@ } splt = split(x, x$Sample, drop=T) if(length(splt) == 1){ - print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample, skipping")) - return() + print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) + splt[[2]] = data.frame("Patient" = 'NA', "Receptor" = 'NA', "Sample" = 'NA', "Cell_Count" = 100, "Clone_Molecule_Count_From_Spikes" = 10, "Log10_Frequency" = 1, "Total_Read_Count" = 100, "dsMol_per_1e6_cells" = 100, "J_Segment_Major_Gene" = 'NA', "V_Segment_Major_Gene" = 'NA', "Clone_Sequence" = 'NA', "CDR3_Sense_Sequence" = 'NA', "Related_to_leukemia_clone" = FALSE, "Frequency"= 0, "normalized_read_count" = 0, "paste" = 'a') } patient1 = splt[[1]] patient2 = splt[[2]] @@ -51,10 +64,7 @@ oneSample = paste(patient1[1,sampleIndex], sep="") twoSample = paste(patient2[1,sampleIndex], sep="") patient = paste(x[1,patientIndex]) - - #print(c(length(grep(".*_Right$", twoSample)) == 1, "Right")) - #print(c(length(grep(".*_Dx_BM$", twoSample)) == 1, "Dx_BM")) - #print(c(length(grep(".*_Dx$", twoSample)) == 1, "Dx")) + switched = F if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){ tmp = twoSample @@ -68,6 +78,7 @@ if(appendtxt){ cat(paste(patient, oneSample, twoSample, sep="\t"), file="patients.txt", append=T, sep="", fill=3) } + cat(paste("", sep=""), file=logfile, append=T) patientMerge = merge(patient1, patient2, by="Clone_Sequence") res1 = vector() res2 = vector() @@ -164,6 +175,8 @@ dev.off() } +cat("", file=logfile, append=T) + interval = intervalFreq intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6)) @@ -174,6 +187,8 @@ #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) +cat("", file=logfile, append=T) + interval = intervalReads intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6)) @@ -183,3 +198,5 @@ #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") +cat("

Starting analysis

Reading input

Selecting first V/J Genes

Calculating Frequency

Normalizing cell count to 1.000.000

Removing duplicates

", patient, "

Starting Frequency analysis

Starting Cell Count analysis

", file=logfile, append=T) + diff -r 8d562506f4f9 -r f9316f7676cc wrapper.sh --- a/wrapper.sh Mon Aug 25 03:38:07 2014 -0400 +++ b/wrapper.sh Tue Aug 26 09:53:22 2014 -0400 @@ -3,11 +3,15 @@ inputFile=$1 outputFile=$2 outputDir=$3 +min_freq=$4 +min_cells=$5 dir="$(cd "$(dirname "$0")" && pwd)" mkdir $outputDir -Rscript --verbose $dir/RScript.r $inputFile $outputDir 2>&1 +echo "testtestsetset" > $outputFile + +Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputFile $min_freq $min_cells 2>&1 cp $dir/jquery-1.11.0.min.js $outputDir cp $dir/script.js $outputDir cp $dir/style.css $outputDir