comparison RScript.r @ 48:1b5b862b055b draft

Uploaded
author davidvanzessen
date Mon, 28 Sep 2015 08:08:33 -0400
parents 2cf89b865202
children 7658e9f3d416
comparison
equal deleted inserted replaced
47:2cf89b865202 48:1b5b862b055b
157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] 157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
158 158
159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
161 161
162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) 162 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) 163 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
164
165 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
166 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
164 167
165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) 168 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"])))
166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] 169 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
167 170
168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] 171 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
179 182
180 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 183 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
181 184
182 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample 185 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample
183 186
187 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
188
184 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter 189 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
185 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter 190 match.filter = merge.filter & sequence.filter & sample.filter
186 191
187 if(sum(match.filter) == 1){ 192 if(sum(match.filter) == 1){
188 second.match = which(match.filter)[1] 193 second.match = which(match.filter)[1]
189 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] 194 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"]
190 first.sample = patient.fuzzy[1,"Sample"] 195 first.sample = patient.fuzzy[1,"Sample"]
191 second.sample = patient.fuzzy[second.match,"Sample"] 196 second.sample = patient.fuzzy[second.match,"Sample"]
192 197
193 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ 198 first.match.row = patient.fuzzy[1,]
194 first.match.row = patient.fuzzy[1,] 199 second.match.row = patient.fuzzy[second.match,]
195 second.match.row = patient.fuzzy[second.match,] 200 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence))
196 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) 201 patientMerge.new.row = data.frame(merge=first.clone.sequence,
197 patientMerge.new.row = data.frame(merge=first.clone.sequence, 202 min_cell_paste.x=first.match.row[1,"min_cell_paste"],
198 min_cell_paste.x=first.match.row[1,"min_cell_paste"], 203 Patient.x=first.match.row[1,"Patient"],
199 Patient.x=first.match.row[1,"Patient"], 204 Receptor.x=first.match.row[1,"Receptor"],
200 Receptor.x=first.match.row[1,"Receptor"], 205 Sample.x=first.match.row[1,"Sample"],
201 Sample.x=first.match.row[1,"Sample"], 206 Cell_Count.x=first.match.row[1,"Cell_Count"],
202 Cell_Count.x=first.match.row[1,"Cell_Count"], 207 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"],
203 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], 208 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"],
204 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], 209 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"],
205 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], 210 dsPerM.x=first.match.row[1,"dsPerM"],
206 dsPerM.x=first.match.row[1,"dsPerM"], 211 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"],
207 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], 212 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"],
208 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], 213 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"],
209 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], 214 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"],
210 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], 215 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"],
211 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], 216 Frequency.x=first.match.row[1,"Frequency"],
212 Frequency.x=first.match.row[1,"Frequency"], 217 locus_V.x=first.match.row[1,"locus_V"],
213 locus_V.x=first.match.row[1,"locus_V"], 218 locus_J.x=first.match.row[1,"locus_J"],
214 locus_J.x=first.match.row[1,"locus_J"], 219 min_cell_count.x=first.match.row[1,"min_cell_count"],
215 min_cell_count.x=first.match.row[1,"min_cell_count"], 220 normalized_read_count.x=first.match.row[1,"normalized_read_count"],
216 normalized_read_count.x=first.match.row[1,"normalized_read_count"], 221 paste.x=first.match.row[1,"paste"],
217 paste.x=first.match.row[1,"paste"], 222 min_cell_paste.y=second.match.row[1,"min_cell_paste"],
218 min_cell_paste.y=second.match.row[1,"min_cell_paste"], 223 Patient.y=second.match.row[1,"Patient"],
219 Patient.y=second.match.row[1,"Patient"], 224 Receptor.y=second.match.row[1,"Receptor"],
220 Receptor.y=second.match.row[1,"Receptor"], 225 Sample.y=second.match.row[1,"Sample"],
221 Sample.y=second.match.row[1,"Sample"], 226 Cell_Count.y=second.match.row[1,"Cell_Count"],
222 Cell_Count.y=second.match.row[1,"Cell_Count"], 227 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"],
223 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], 228 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"],
224 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], 229 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"],
225 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], 230 dsPerM.y=second.match.row[1,"dsPerM"],
226 dsPerM.y=second.match.row[1,"dsPerM"], 231 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"],
227 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], 232 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"],
228 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], 233 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"],
229 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], 234 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"],
230 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], 235 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"],
231 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], 236 Frequency.y=second.match.row[1,"Frequency"],
232 Frequency.y=second.match.row[1,"Frequency"], 237 locus_V.y=second.match.row[1,"locus_V"],
233 locus_V.y=second.match.row[1,"locus_V"], 238 locus_J.y=second.match.row[1,"locus_J"],
234 locus_J.y=second.match.row[1,"locus_J"], 239 min_cell_count.y=second.match.row[1,"min_cell_count"],
235 min_cell_count.y=second.match.row[1,"min_cell_count"], 240 normalized_read_count.y=second.match.row[1,"normalized_read_count"],
236 normalized_read_count.y=second.match.row[1,"normalized_read_count"], 241 paste.y=first.match.row[1,"paste"])
237 paste.y=first.match.row[1,"paste"]) 242
238 243
239 244 patientMerge = rbind(patientMerge, patientMerge.new.row)
240 patientMerge = rbind(patientMerge, patientMerge.new.row) 245 patient.fuzzy = patient.fuzzy[-match.filter,]
241 patient.fuzzy = patient.fuzzy[-match.filter,] 246
242 247 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
243 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] 248 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
244 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] 249
245 250 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
246 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
247
248 } else {
249 patient.fuzzy = patient.fuzzy[-1,]
250 }
251 251
252 } else if (sum(match.filter) > 1){ 252 } else if (sum(match.filter) > 1){
253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T) 253 cat(paste("<tr><td>", "Multiple matches (", sum(match.filter), ") found for", first.merge, "in", patient, "</td></tr>", sep=" "), file=logfile, append=T)
254 patient.fuzzy = patient.fuzzy[-1,] 254 patient.fuzzy = patient.fuzzy[-1,]
255 } else { 255 } else {