# HG changeset patch # User davidvanzessen # Date 1443442113 14400 # Node ID 1b5b862b055b9042b9c4dc233def5a6b73582f71 # Parent 2cf89b8652024c2359a14eea82602034a1c262b1 Uploaded diff -r 2cf89b865202 -r 1b5b862b055b RScript.r --- a/RScript.r Thu Sep 17 11:01:20 2015 -0400 +++ b/RScript.r Mon Sep 28 08:08:33 2015 -0400 @@ -159,8 +159,11 @@ #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) - patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) - patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) + #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) + #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) + + patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) + patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] @@ -181,8 +184,10 @@ sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample + sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) + #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter - match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter + match.filter = merge.filter & sequence.filter & sample.filter if(sum(match.filter) == 1){ second.match = which(match.filter)[1] @@ -190,64 +195,59 @@ first.sample = patient.fuzzy[1,"Sample"] second.sample = patient.fuzzy[second.match,"Sample"] - if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ - first.match.row = patient.fuzzy[1,] - second.match.row = patient.fuzzy[second.match,] - print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) - patientMerge.new.row = data.frame(merge=first.clone.sequence, - min_cell_paste.x=first.match.row[1,"min_cell_paste"], - Patient.x=first.match.row[1,"Patient"], - Receptor.x=first.match.row[1,"Receptor"], - Sample.x=first.match.row[1,"Sample"], - Cell_Count.x=first.match.row[1,"Cell_Count"], - Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], - Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], - Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], - dsPerM.x=first.match.row[1,"dsPerM"], - J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], - V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], - Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], - CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], - Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], - Frequency.x=first.match.row[1,"Frequency"], - locus_V.x=first.match.row[1,"locus_V"], - locus_J.x=first.match.row[1,"locus_J"], - min_cell_count.x=first.match.row[1,"min_cell_count"], - normalized_read_count.x=first.match.row[1,"normalized_read_count"], - paste.x=first.match.row[1,"paste"], - min_cell_paste.y=second.match.row[1,"min_cell_paste"], - Patient.y=second.match.row[1,"Patient"], - Receptor.y=second.match.row[1,"Receptor"], - Sample.y=second.match.row[1,"Sample"], - Cell_Count.y=second.match.row[1,"Cell_Count"], - Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], - Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], - Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], - dsPerM.y=second.match.row[1,"dsPerM"], - J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], - V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], - Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], - CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], - Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], - Frequency.y=second.match.row[1,"Frequency"], - locus_V.y=second.match.row[1,"locus_V"], - locus_J.y=second.match.row[1,"locus_J"], - min_cell_count.y=second.match.row[1,"min_cell_count"], - normalized_read_count.y=second.match.row[1,"normalized_read_count"], - paste.y=first.match.row[1,"paste"]) - - - patientMerge = rbind(patientMerge, patientMerge.new.row) - patient.fuzzy = patient.fuzzy[-match.filter,] - - patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] - patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] - - scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] - - } else { - patient.fuzzy = patient.fuzzy[-1,] - } + first.match.row = patient.fuzzy[1,] + second.match.row = patient.fuzzy[second.match,] + print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) + patientMerge.new.row = data.frame(merge=first.clone.sequence, + min_cell_paste.x=first.match.row[1,"min_cell_paste"], + Patient.x=first.match.row[1,"Patient"], + Receptor.x=first.match.row[1,"Receptor"], + Sample.x=first.match.row[1,"Sample"], + Cell_Count.x=first.match.row[1,"Cell_Count"], + Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], + Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], + Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], + dsPerM.x=first.match.row[1,"dsPerM"], + J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], + V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], + Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], + CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], + Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], + Frequency.x=first.match.row[1,"Frequency"], + locus_V.x=first.match.row[1,"locus_V"], + locus_J.x=first.match.row[1,"locus_J"], + min_cell_count.x=first.match.row[1,"min_cell_count"], + normalized_read_count.x=first.match.row[1,"normalized_read_count"], + paste.x=first.match.row[1,"paste"], + min_cell_paste.y=second.match.row[1,"min_cell_paste"], + Patient.y=second.match.row[1,"Patient"], + Receptor.y=second.match.row[1,"Receptor"], + Sample.y=second.match.row[1,"Sample"], + Cell_Count.y=second.match.row[1,"Cell_Count"], + Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], + Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], + Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], + dsPerM.y=second.match.row[1,"dsPerM"], + J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], + V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], + Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], + CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], + Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], + Frequency.y=second.match.row[1,"Frequency"], + locus_V.y=second.match.row[1,"locus_V"], + locus_J.y=second.match.row[1,"locus_J"], + min_cell_count.y=second.match.row[1,"min_cell_count"], + normalized_read_count.y=second.match.row[1,"normalized_read_count"], + paste.y=first.match.row[1,"paste"]) + + + patientMerge = rbind(patientMerge, patientMerge.new.row) + patient.fuzzy = patient.fuzzy[-match.filter,] + + patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] + patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] + + scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] } else if (sum(match.filter) > 1){ cat(paste("", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "", sep=" "), file=logfile, append=T)