clonal_sequences_in_paired_samples: RScript.r comparison

comparison RScript.r @ 3:f9316f7676cc draft

Uploaded

author	davidvanzessen
date	Tue, 26 Aug 2014 09:53:22 -0400
parents	8d562506f4f9
children	f11df36f43bb

comparison

equal deleted inserted replaced

-:8d562506f4f9
+:f9316f7676cc
 args <- commandArgs(trailingOnly = TRUE)
 inFile = args[1]
 outDir = args[2]
+logfile = args[3]
+min_freq = as.numeric(args[4])
+min_cells = as.numeric(args[5])
+cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
 require(ggplot2)
 require(reshape2)
 require(data.table)
 require(grid)
 #require(xtable)
+cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
 dat = read.csv(inFile, sep="\t")
 #dat = data.frame(fread(inFile)) #faster but with a dep
 setwd(outDir)
+cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
+cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
 dat$Frequency = ((10^dat$Log10_Frequency)*100)
+dat = dat[dat$Frequency >= min_freq,]
+cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T)
 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2)
+dat = dat[dat$normalized_read_count >= min_cells,]
 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence)
+cat("<tr><td>Removing duplicates</td></tr>", file=logfile, append=T)
 dat = dat[!duplicated(dat$paste),]
 patients = split(dat, dat$Patient, drop=T)
 intervalReads = rev(c(0,2,10,100,1000,10000))
 intervalFreq = rev(c(0,0.01,0.1,0.5,1,5))
 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
 if(on == "Frequency"){
 onShort = "freq"
 }
 splt = split(x, x$Sample, drop=T)
 if(length(splt) == 1){
-print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample, skipping"))
+print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
-return()
+splt[[2]] = data.frame("Patient" = 'NA', "Receptor" = 'NA', "Sample" = 'NA', "Cell_Count" = 100, "Clone_Molecule_Count_From_Spikes" = 10, "Log10_Frequency" = 1, "Total_Read_Count" = 100, "dsMol_per_1e6_cells" = 100, "J_Segment_Major_Gene" = 'NA', "V_Segment_Major_Gene" = 'NA', "Clone_Sequence" = 'NA', "CDR3_Sense_Sequence" = 'NA', "Related_to_leukemia_clone" = FALSE, "Frequency"= 0, "normalized_read_count" = 0, "paste" = 'a')
 }
 patient1 = splt[[1]]
 patient2 = splt[[2]]
 threshholdIndex = which(colnames(product) == "interval")
 sampleIndex = which(colnames(x) == "Sample")
 patientIndex = which(colnames(x) == "Patient")
 oneSample = paste(patient1[1,sampleIndex], sep="")
 twoSample = paste(patient2[1,sampleIndex], sep="")
 patient = paste(x[1,patientIndex])
-#print(c(length(grep(".*_Right$", twoSample)) == 1, "Right"))
-#print(c(length(grep(".*_Dx_BM$", twoSample)) == 1, "Dx_BM"))
-#print(c(length(grep(".*_Dx$", twoSample)) == 1, "Dx"))
 switched = F
 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
 tmp = twoSample
 twoSample = oneSample
 oneSample = tmp
 switched = T
 }
 if(appendtxt){
 cat(paste(patient, oneSample, twoSample, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
 }
+cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
 patientMerge = merge(patient1, patient2, by="Clone_Sequence")
 res1 = vector()
 res2 = vector()
 resBoth = vector()
 read1Count = vector()
 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
 print(plt)
 dev.off()
 }
+cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
 interval = intervalFreq
 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6))
 #patientFrequencyCount(patient1)
 #lapply(patients[c(5,6,10)], FUN=patientFrequencyCount)
 #lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
 #lapply(patients[c(6,7,8)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
 #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
+cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
 interval = intervalReads
 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 product = data.frame("Titles"=rep(Titles, each=6), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=6), "J_Segments"=rep(J_Segments, each=6))
 #patientResult = patientReadCount(patient1)
 #lapply(patients[c(5,6,10)], FUN=patientReadCount)
 #lapply(patients[c(5,6,7,8,13)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
 #lapply(patients[c(6)], FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
+cat("</table></html>", file=logfile, append=T)

Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples

comparison RScript.r @ 3:f9316f7676cc draft