clonal_sequences_in_paired_samples: RScript.r comparison

comparison RScript.r @ 48:1b5b862b055b draft

Uploaded

author	davidvanzessen
date	Mon, 28 Sep 2015 08:08:33 -0400
parents	2cf89b865202
children	7658e9f3d416

comparison

equal deleted inserted replaced

-:2cf89b865202
+:1b5b862b055b
 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
-patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
+#patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
-patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
+#patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
+patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
+patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"])))
 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample
+sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
-match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter
+match.filter = merge.filter & sequence.filter & sample.filter
 if(sum(match.filter) == 1){
 second.match = which(match.filter)[1]
 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"]
 first.sample = patient.fuzzy[1,"Sample"]
 second.sample = patient.fuzzy[second.match,"Sample"]
-if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){
+				first.match.row = patient.fuzzy[1,]
-first.match.row = patient.fuzzy[1,]
+				second.match.row = patient.fuzzy[second.match,]
-second.match.row = patient.fuzzy[second.match,]
+				print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence))
-print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence))
+				patientMerge.new.row = data.frame(merge=first.clone.sequence,
-patientMerge.new.row = data.frame(merge=first.clone.sequence,
+																					min_cell_paste.x=first.match.row[1,"min_cell_paste"],
-min_cell_paste.x=first.match.row[1,"min_cell_paste"],
+																					Patient.x=first.match.row[1,"Patient"],
-Patient.x=first.match.row[1,"Patient"],
+																					Receptor.x=first.match.row[1,"Receptor"],
-Receptor.x=first.match.row[1,"Receptor"],
+																					Sample.x=first.match.row[1,"Sample"],
-Sample.x=first.match.row[1,"Sample"],
+																					Cell_Count.x=first.match.row[1,"Cell_Count"],
-Cell_Count.x=first.match.row[1,"Cell_Count"],
+																					Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"],
-Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"],
+																					Log10_Frequency.x=first.match.row[1,"Log10_Frequency"],
-Log10_Frequency.x=first.match.row[1,"Log10_Frequency"],
+																					Total_Read_Count.x=first.match.row[1,"Total_Read_Count"],
-Total_Read_Count.x=first.match.row[1,"Total_Read_Count"],
+																					dsPerM.x=first.match.row[1,"dsPerM"],
-dsPerM.x=first.match.row[1,"dsPerM"],
+																					J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"],
-J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"],
+																					V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"],
-V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"],
+																					Clone_Sequence.x=first.match.row[1,"Clone_Sequence"],
-Clone_Sequence.x=first.match.row[1,"Clone_Sequence"],
+																					CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"],
-CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"],
+																					Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"],
-Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"],
+																					Frequency.x=first.match.row[1,"Frequency"],
-Frequency.x=first.match.row[1,"Frequency"],
+																					locus_V.x=first.match.row[1,"locus_V"],
-locus_V.x=first.match.row[1,"locus_V"],
+																					locus_J.x=first.match.row[1,"locus_J"],
-locus_J.x=first.match.row[1,"locus_J"],
+																					min_cell_count.x=first.match.row[1,"min_cell_count"],
-min_cell_count.x=first.match.row[1,"min_cell_count"],
+																					normalized_read_count.x=first.match.row[1,"normalized_read_count"],
-normalized_read_count.x=first.match.row[1,"normalized_read_count"],
+																					paste.x=first.match.row[1,"paste"],
-paste.x=first.match.row[1,"paste"],
+																					min_cell_paste.y=second.match.row[1,"min_cell_paste"],
-min_cell_paste.y=second.match.row[1,"min_cell_paste"],
+																					Patient.y=second.match.row[1,"Patient"],
-Patient.y=second.match.row[1,"Patient"],
+																					Receptor.y=second.match.row[1,"Receptor"],
-Receptor.y=second.match.row[1,"Receptor"],
+																					Sample.y=second.match.row[1,"Sample"],
-Sample.y=second.match.row[1,"Sample"],
+																					Cell_Count.y=second.match.row[1,"Cell_Count"],
-Cell_Count.y=second.match.row[1,"Cell_Count"],
+																					Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"],
-Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"],
+																					Log10_Frequency.y=second.match.row[1,"Log10_Frequency"],
-Log10_Frequency.y=second.match.row[1,"Log10_Frequency"],
+																					Total_Read_Count.y=second.match.row[1,"Total_Read_Count"],
-Total_Read_Count.y=second.match.row[1,"Total_Read_Count"],
+																					dsPerM.y=second.match.row[1,"dsPerM"],
-dsPerM.y=second.match.row[1,"dsPerM"],
+																					J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"],
-J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"],
+																					V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"],
-V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"],
+																					Clone_Sequence.y=second.match.row[1,"Clone_Sequence"],
-Clone_Sequence.y=second.match.row[1,"Clone_Sequence"],
+																					CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"],
-CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"],
+																					Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"],
-Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"],
+																					Frequency.y=second.match.row[1,"Frequency"],
-Frequency.y=second.match.row[1,"Frequency"],
+																					locus_V.y=second.match.row[1,"locus_V"],
-locus_V.y=second.match.row[1,"locus_V"],
+																					locus_J.y=second.match.row[1,"locus_J"],
-locus_J.y=second.match.row[1,"locus_J"],
+																					min_cell_count.y=second.match.row[1,"min_cell_count"],
-min_cell_count.y=second.match.row[1,"min_cell_count"],
+																					normalized_read_count.y=second.match.row[1,"normalized_read_count"],
-normalized_read_count.y=second.match.row[1,"normalized_read_count"],
+																					paste.y=first.match.row[1,"paste"])
-paste.y=first.match.row[1,"paste"])
+				patientMerge = rbind(patientMerge, patientMerge.new.row)
-patientMerge = rbind(patientMerge, patientMerge.new.row)
+				patient.fuzzy = patient.fuzzy[-match.filter,]
-patient.fuzzy = patient.fuzzy[-match.filter,]
+				patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
-patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
+				patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
-patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
+				scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
-scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
-} else {
-					patient.fuzzy = patient.fuzzy[-1,]
-}
 } else if (sum(match.filter) > 1){
 				cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T)
 patient.fuzzy = patient.fuzzy[-1,]
 } else {

Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples

comparison RScript.r @ 48:1b5b862b055b draft