Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 48:1b5b862b055b draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 28 Sep 2015 08:08:33 -0400 |
parents | 2cf89b865202 |
children | 7658e9f3d416 |
comparison
equal
deleted
inserted
replaced
47:2cf89b865202 | 48:1b5b862b055b |
---|---|
157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] | 157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] |
158 | 158 |
159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) | 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) |
160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
161 | 161 |
162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 162 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 163 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
164 | |
165 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | |
166 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | |
164 | 167 |
165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) | 168 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) |
166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 169 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
167 | 170 |
168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 171 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
179 | 182 |
180 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 | 183 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 |
181 | 184 |
182 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample | 185 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample |
183 | 186 |
187 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | |
188 | |
184 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter | 189 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter |
185 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter | 190 match.filter = merge.filter & sequence.filter & sample.filter |
186 | 191 |
187 if(sum(match.filter) == 1){ | 192 if(sum(match.filter) == 1){ |
188 second.match = which(match.filter)[1] | 193 second.match = which(match.filter)[1] |
189 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] | 194 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] |
190 first.sample = patient.fuzzy[1,"Sample"] | 195 first.sample = patient.fuzzy[1,"Sample"] |
191 second.sample = patient.fuzzy[second.match,"Sample"] | 196 second.sample = patient.fuzzy[second.match,"Sample"] |
192 | 197 |
193 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ | 198 first.match.row = patient.fuzzy[1,] |
194 first.match.row = patient.fuzzy[1,] | 199 second.match.row = patient.fuzzy[second.match,] |
195 second.match.row = patient.fuzzy[second.match,] | 200 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) |
196 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) | 201 patientMerge.new.row = data.frame(merge=first.clone.sequence, |
197 patientMerge.new.row = data.frame(merge=first.clone.sequence, | 202 min_cell_paste.x=first.match.row[1,"min_cell_paste"], |
198 min_cell_paste.x=first.match.row[1,"min_cell_paste"], | 203 Patient.x=first.match.row[1,"Patient"], |
199 Patient.x=first.match.row[1,"Patient"], | 204 Receptor.x=first.match.row[1,"Receptor"], |
200 Receptor.x=first.match.row[1,"Receptor"], | 205 Sample.x=first.match.row[1,"Sample"], |
201 Sample.x=first.match.row[1,"Sample"], | 206 Cell_Count.x=first.match.row[1,"Cell_Count"], |
202 Cell_Count.x=first.match.row[1,"Cell_Count"], | 207 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
203 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], | 208 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], |
204 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], | 209 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], |
205 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], | 210 dsPerM.x=first.match.row[1,"dsPerM"], |
206 dsPerM.x=first.match.row[1,"dsPerM"], | 211 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], |
207 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], | 212 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], |
208 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], | 213 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], |
209 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], | 214 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], |
210 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], | 215 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], |
211 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], | 216 Frequency.x=first.match.row[1,"Frequency"], |
212 Frequency.x=first.match.row[1,"Frequency"], | 217 locus_V.x=first.match.row[1,"locus_V"], |
213 locus_V.x=first.match.row[1,"locus_V"], | 218 locus_J.x=first.match.row[1,"locus_J"], |
214 locus_J.x=first.match.row[1,"locus_J"], | 219 min_cell_count.x=first.match.row[1,"min_cell_count"], |
215 min_cell_count.x=first.match.row[1,"min_cell_count"], | 220 normalized_read_count.x=first.match.row[1,"normalized_read_count"], |
216 normalized_read_count.x=first.match.row[1,"normalized_read_count"], | 221 paste.x=first.match.row[1,"paste"], |
217 paste.x=first.match.row[1,"paste"], | 222 min_cell_paste.y=second.match.row[1,"min_cell_paste"], |
218 min_cell_paste.y=second.match.row[1,"min_cell_paste"], | 223 Patient.y=second.match.row[1,"Patient"], |
219 Patient.y=second.match.row[1,"Patient"], | 224 Receptor.y=second.match.row[1,"Receptor"], |
220 Receptor.y=second.match.row[1,"Receptor"], | 225 Sample.y=second.match.row[1,"Sample"], |
221 Sample.y=second.match.row[1,"Sample"], | 226 Cell_Count.y=second.match.row[1,"Cell_Count"], |
222 Cell_Count.y=second.match.row[1,"Cell_Count"], | 227 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
223 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], | 228 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], |
224 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], | 229 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], |
225 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], | 230 dsPerM.y=second.match.row[1,"dsPerM"], |
226 dsPerM.y=second.match.row[1,"dsPerM"], | 231 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], |
227 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], | 232 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], |
228 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], | 233 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], |
229 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], | 234 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], |
230 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], | 235 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], |
231 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], | 236 Frequency.y=second.match.row[1,"Frequency"], |
232 Frequency.y=second.match.row[1,"Frequency"], | 237 locus_V.y=second.match.row[1,"locus_V"], |
233 locus_V.y=second.match.row[1,"locus_V"], | 238 locus_J.y=second.match.row[1,"locus_J"], |
234 locus_J.y=second.match.row[1,"locus_J"], | 239 min_cell_count.y=second.match.row[1,"min_cell_count"], |
235 min_cell_count.y=second.match.row[1,"min_cell_count"], | 240 normalized_read_count.y=second.match.row[1,"normalized_read_count"], |
236 normalized_read_count.y=second.match.row[1,"normalized_read_count"], | 241 paste.y=first.match.row[1,"paste"]) |
237 paste.y=first.match.row[1,"paste"]) | 242 |
238 | 243 |
239 | 244 patientMerge = rbind(patientMerge, patientMerge.new.row) |
240 patientMerge = rbind(patientMerge, patientMerge.new.row) | 245 patient.fuzzy = patient.fuzzy[-match.filter,] |
241 patient.fuzzy = patient.fuzzy[-match.filter,] | 246 |
242 | 247 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] |
243 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | 248 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] |
244 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | 249 |
245 | 250 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] |
246 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] | |
247 | |
248 } else { | |
249 patient.fuzzy = patient.fuzzy[-1,] | |
250 } | |
251 | 251 |
252 } else if (sum(match.filter) > 1){ | 252 } else if (sum(match.filter) > 1){ |
253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T) | 253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T) |
254 patient.fuzzy = patient.fuzzy[-1,] | 254 patient.fuzzy = patient.fuzzy[-1,] |
255 } else { | 255 } else { |