Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 48:1b5b862b055b draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 28 Sep 2015 08:08:33 -0400 |
| parents | 2cf89b865202 |
| children | 7658e9f3d416 |
comparison
equal
deleted
inserted
replaced
| 47:2cf89b865202 | 48:1b5b862b055b |
|---|---|
| 157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] | 157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] |
| 158 | 158 |
| 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) | 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) |
| 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
| 161 | 161 |
| 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 162 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
| 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 163 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
| 164 | |
| 165 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | |
| 166 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | |
| 164 | 167 |
| 165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) | 168 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) |
| 166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 169 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
| 167 | 170 |
| 168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 171 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
| 179 | 182 |
| 180 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 | 183 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 |
| 181 | 184 |
| 182 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample | 185 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample |
| 183 | 186 |
| 187 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | |
| 188 | |
| 184 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter | 189 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter |
| 185 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter | 190 match.filter = merge.filter & sequence.filter & sample.filter |
| 186 | 191 |
| 187 if(sum(match.filter) == 1){ | 192 if(sum(match.filter) == 1){ |
| 188 second.match = which(match.filter)[1] | 193 second.match = which(match.filter)[1] |
| 189 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] | 194 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] |
| 190 first.sample = patient.fuzzy[1,"Sample"] | 195 first.sample = patient.fuzzy[1,"Sample"] |
| 191 second.sample = patient.fuzzy[second.match,"Sample"] | 196 second.sample = patient.fuzzy[second.match,"Sample"] |
| 192 | 197 |
| 193 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ | 198 first.match.row = patient.fuzzy[1,] |
| 194 first.match.row = patient.fuzzy[1,] | 199 second.match.row = patient.fuzzy[second.match,] |
| 195 second.match.row = patient.fuzzy[second.match,] | 200 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) |
| 196 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) | 201 patientMerge.new.row = data.frame(merge=first.clone.sequence, |
| 197 patientMerge.new.row = data.frame(merge=first.clone.sequence, | 202 min_cell_paste.x=first.match.row[1,"min_cell_paste"], |
| 198 min_cell_paste.x=first.match.row[1,"min_cell_paste"], | 203 Patient.x=first.match.row[1,"Patient"], |
| 199 Patient.x=first.match.row[1,"Patient"], | 204 Receptor.x=first.match.row[1,"Receptor"], |
| 200 Receptor.x=first.match.row[1,"Receptor"], | 205 Sample.x=first.match.row[1,"Sample"], |
| 201 Sample.x=first.match.row[1,"Sample"], | 206 Cell_Count.x=first.match.row[1,"Cell_Count"], |
| 202 Cell_Count.x=first.match.row[1,"Cell_Count"], | 207 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
| 203 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], | 208 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], |
| 204 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], | 209 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], |
| 205 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], | 210 dsPerM.x=first.match.row[1,"dsPerM"], |
| 206 dsPerM.x=first.match.row[1,"dsPerM"], | 211 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], |
| 207 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], | 212 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], |
| 208 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], | 213 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], |
| 209 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], | 214 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], |
| 210 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], | 215 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], |
| 211 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], | 216 Frequency.x=first.match.row[1,"Frequency"], |
| 212 Frequency.x=first.match.row[1,"Frequency"], | 217 locus_V.x=first.match.row[1,"locus_V"], |
| 213 locus_V.x=first.match.row[1,"locus_V"], | 218 locus_J.x=first.match.row[1,"locus_J"], |
| 214 locus_J.x=first.match.row[1,"locus_J"], | 219 min_cell_count.x=first.match.row[1,"min_cell_count"], |
| 215 min_cell_count.x=first.match.row[1,"min_cell_count"], | 220 normalized_read_count.x=first.match.row[1,"normalized_read_count"], |
| 216 normalized_read_count.x=first.match.row[1,"normalized_read_count"], | 221 paste.x=first.match.row[1,"paste"], |
| 217 paste.x=first.match.row[1,"paste"], | 222 min_cell_paste.y=second.match.row[1,"min_cell_paste"], |
| 218 min_cell_paste.y=second.match.row[1,"min_cell_paste"], | 223 Patient.y=second.match.row[1,"Patient"], |
| 219 Patient.y=second.match.row[1,"Patient"], | 224 Receptor.y=second.match.row[1,"Receptor"], |
| 220 Receptor.y=second.match.row[1,"Receptor"], | 225 Sample.y=second.match.row[1,"Sample"], |
| 221 Sample.y=second.match.row[1,"Sample"], | 226 Cell_Count.y=second.match.row[1,"Cell_Count"], |
| 222 Cell_Count.y=second.match.row[1,"Cell_Count"], | 227 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
| 223 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], | 228 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], |
| 224 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], | 229 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], |
| 225 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], | 230 dsPerM.y=second.match.row[1,"dsPerM"], |
| 226 dsPerM.y=second.match.row[1,"dsPerM"], | 231 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], |
| 227 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], | 232 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], |
| 228 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], | 233 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], |
| 229 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], | 234 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], |
| 230 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], | 235 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], |
| 231 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], | 236 Frequency.y=second.match.row[1,"Frequency"], |
| 232 Frequency.y=second.match.row[1,"Frequency"], | 237 locus_V.y=second.match.row[1,"locus_V"], |
| 233 locus_V.y=second.match.row[1,"locus_V"], | 238 locus_J.y=second.match.row[1,"locus_J"], |
| 234 locus_J.y=second.match.row[1,"locus_J"], | 239 min_cell_count.y=second.match.row[1,"min_cell_count"], |
| 235 min_cell_count.y=second.match.row[1,"min_cell_count"], | 240 normalized_read_count.y=second.match.row[1,"normalized_read_count"], |
| 236 normalized_read_count.y=second.match.row[1,"normalized_read_count"], | 241 paste.y=first.match.row[1,"paste"]) |
| 237 paste.y=first.match.row[1,"paste"]) | 242 |
| 238 | 243 |
| 239 | 244 patientMerge = rbind(patientMerge, patientMerge.new.row) |
| 240 patientMerge = rbind(patientMerge, patientMerge.new.row) | 245 patient.fuzzy = patient.fuzzy[-match.filter,] |
| 241 patient.fuzzy = patient.fuzzy[-match.filter,] | 246 |
| 242 | 247 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] |
| 243 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | 248 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] |
| 244 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | 249 |
| 245 | 250 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] |
| 246 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] | |
| 247 | |
| 248 } else { | |
| 249 patient.fuzzy = patient.fuzzy[-1,] | |
| 250 } | |
| 251 | 251 |
| 252 } else if (sum(match.filter) > 1){ | 252 } else if (sum(match.filter) > 1){ |
| 253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T) | 253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T) |
| 254 patient.fuzzy = patient.fuzzy[-1,] | 254 patient.fuzzy = patient.fuzzy[-1,] |
| 255 } else { | 255 } else { |
