comparison RScript.r @ 37:623bbe972363 draft

Uploaded
author davidvanzessen
date Fri, 11 Sep 2015 08:26:18 -0400
parents d592dab2fca1
children f5b242a5337f
comparison
equal deleted inserted replaced
36:d592dab2fca1 37:623bbe972363
160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
161 161
162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
164 164
165 merge.freq.table = data.frame(table(c(patient1.fuzzy$merge, patient2.fuzzy$merge))) 165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"])))
166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] 166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
167 167
168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] 168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] 169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
170 170
171 while(nrow(patient1.fuzzy) > 0 & nrow(patient2.fuzzy) > 0){ 171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
172 current.merge.1 = patient1.fuzzy[1,"merge"] 172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
173 current.clone.seq.1 = patient1.fuzzy[1,"Clone_Sequence"] 173
174 current.merge.in.2 = patient2.fuzzy[patient2.fuzzy$merge == current.merge.1,] 174 while(nrow(patient.fuzzy) > 1){
175 175 first.merge = patient.fuzzy[1,"merge"]
176 #agrep/adist the two samples 176 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
177 agrep.match = agrep(current.clone.seq.1, current.merge.in.2$Clone_Sequence, max.distance = 9, costs=list(insertions=1, deletions=1, substitutions=10))
178 177
179 178 merge.filter = first.merge == patient.fuzzy$merge
180 if(length(agrep.match) == 1){ 179
181 current.clone.seq.2 = patient2.fuzzy[agrep.match,"Clone_Sequence"] 180 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence)
182 patientMerge.new.row = data.frame(merge=current.clone.seq.1, 181
183 min_cell_paste.x=patient1.fuzzy[1,"min_cell_paste"], 182 if(sum(match.filter) == 2){
184 Patient.x=patient1.fuzzy[1,"Patient"], 183 second.match = which(match.filter)[2]
185 Receptor.x=patient1.fuzzy[1,"Receptor"], 184 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"]
186 Sample.x=patient1.fuzzy[1,"Sample"], 185 first.sample = patient.fuzzy[1,"Sample"]
187 Cell_Count.x=patient1.fuzzy[1,"Cell_Count"], 186 second.sample = patient.fuzzy[second.match,"Sample"]
188 Clone_Molecule_Count_From_Spikes.x=patient1.fuzzy[1,"Clone_Molecule_Count_From_Spikes"],
189 Log10_Frequency.x=patient1.fuzzy[1,"Log10_Frequency"],
190 Total_Read_Count.x=patient1.fuzzy[1,"Total_Read_Count"],
191 dsPerM.x=patient1.fuzzy[1,"dsPerM"],
192 J_Segment_Major_Gene.x=patient1.fuzzy[1,"J_Segment_Major_Gene"],
193 V_Segment_Major_Gene.x=patient1.fuzzy[1,"V_Segment_Major_Gene"],
194 Clone_Sequence.x=patient1.fuzzy[1,"Clone_Sequence"],
195 CDR3_Sense_Sequence.x=patient1.fuzzy[1,"CDR3_Sense_Sequence"],
196 Related_to_leukemia_clone.x=patient1.fuzzy[1,"Related_to_leukemia_clone"],
197 Frequency.x=patient1.fuzzy[1,"Frequency"],
198 locus_V.x=patient1.fuzzy[1,"locus_V"],
199 locus_J.x=patient1.fuzzy[1,"locus_J"],
200 min_cell_count.x=patient1.fuzzy[1,"min_cell_count"],
201 normalized_read_count.x=patient1.fuzzy[1,"normalized_read_count"],
202 paste.x=patient1.fuzzy[1,"paste"],
203 min_cell_paste.y=patient2.fuzzy[agrep.match,"min_cell_paste"],
204 Patient.y=patient2.fuzzy[agrep.match,"Patient"],
205 Receptor.y=patient2.fuzzy[agrep.match,"Receptor"],
206 Sample.y=patient2.fuzzy[agrep.match,"Sample"],
207 Cell_Count.y=patient2.fuzzy[agrep.match,"Cell_Count"],
208 Clone_Molecule_Count_From_Spikes.y=patient2.fuzzy[agrep.match,"Clone_Molecule_Count_From_Spikes"],
209 Log10_Frequency.y=patient2.fuzzy[agrep.match,"Log10_Frequency"],
210 Total_Read_Count.y=patient2.fuzzy[agrep.match,"Total_Read_Count"],
211 dsPerM.y=patient2.fuzzy[agrep.match,"dsPerM"],
212 J_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"J_Segment_Major_Gene"],
213 V_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"V_Segment_Major_Gene"],
214 Clone_Sequence.y=patient2.fuzzy[agrep.match,"Clone_Sequence"],
215 CDR3_Sense_Sequence.y=patient2.fuzzy[agrep.match,"CDR3_Sense_Sequence"],
216 Related_to_leukemia_clone.y=patient2.fuzzy[agrep.match,"Related_to_leukemia_clone"],
217 Frequency.y=patient2.fuzzy[agrep.match,"Frequency"],
218 locus_V.y=patient2.fuzzy[agrep.match,"locus_V"],
219 locus_J.y=patient2.fuzzy[agrep.match,"locus_J"],
220 min_cell_count.y=patient2.fuzzy[agrep.match,"min_cell_count"],
221 normalized_read_count.y=patient2.fuzzy[agrep.match,"normalized_read_count"],
222 paste.y=patient2.fuzzy[agrep.match,"paste"])
223 #add to patientMerge
224 patientMerge = rbind(patientMerge, patientMerge.new.row)
225 #remove from patient*.fuzzy
226 187
227 188 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){
228 #remove the fuzzy merged clone sequences from the original datasets 189 first.match.row = patient.fuzzy[1,]
229 patient1 = patient1[patient1$Clone_Sequence != patient1.fuzzy[1,"Clone_Sequence"],] 190 second.match.row = patient.fuzzy[second.match,]
230 patient2 = patient2[patient2$Clone_Sequence != patient2.fuzzy[agrep.match,"Clone_Sequence"],] 191 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence))
231 192 patientMerge.new.row = data.frame(merge=first.clone.sequence,
232 scatterplot_data = scatterplot_data[scatterplot_data$merge != current.clone.seq.2,] 193 min_cell_paste.x=first.match.row[1,"min_cell_paste"],
233 194 Patient.x=first.match.row[1,"Patient"],
234 patient2.fuzzy <<- patient2.fuzzy[-c(agrep.match),] 195 Receptor.x=first.match.row[1,"Receptor"],
235 196 Sample.x=first.match.row[1,"Sample"],
236 197 Cell_Count.x=first.match.row[1,"Cell_Count"],
237 } else if (length(agrep.match) > 1){ 198 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"],
238 #multiple matches, whatdo? 199 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"],
239 cat(paste("<tr><td>", "Multiple matches found for ", current.merge.1, ", ", current.clone.seq.1, " in ", patient, ", ", oneSample, "</td></tr>", sep=""), file=logfile, append=T) 200 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"],
240 } 201 dsPerM.x=first.match.row[1,"dsPerM"],
241 patient1.fuzzy = patient1.fuzzy[-1,] 202 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"],
203 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"],
204 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"],
205 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"],
206 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"],
207 Frequency.x=first.match.row[1,"Frequency"],
208 locus_V.x=first.match.row[1,"locus_V"],
209 locus_J.x=first.match.row[1,"locus_J"],
210 min_cell_count.x=first.match.row[1,"min_cell_count"],
211 normalized_read_count.x=first.match.row[1,"normalized_read_count"],
212 paste.x=first.match.row[1,"paste"],
213 min_cell_paste.y=second.match.row[1,"min_cell_paste"],
214 Patient.y=second.match.row[1,"Patient"],
215 Receptor.y=second.match.row[1,"Receptor"],
216 Sample.y=second.match.row[1,"Sample"],
217 Cell_Count.y=second.match.row[1,"Cell_Count"],
218 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"],
219 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"],
220 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"],
221 dsPerM.y=second.match.row[1,"dsPerM"],
222 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"],
223 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"],
224 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"],
225 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"],
226 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"],
227 Frequency.y=second.match.row[1,"Frequency"],
228 locus_V.y=second.match.row[1,"locus_V"],
229 locus_J.y=second.match.row[1,"locus_J"],
230 min_cell_count.y=second.match.row[1,"min_cell_count"],
231 normalized_read_count.y=second.match.row[1,"normalized_read_count"],
232 paste.y=first.match.row[1,"paste"])
233
234
235 patientMerge = rbind(patientMerge, patientMerge.new.row)
236 patient.fuzzy = patient.fuzzy[-match.filter,]
237
238 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
239 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
240
241 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
242
243 } else {
244 patient.fuzzy = patient.fuzzy[-1,]
245 }
246
247 } else if (sum(match.filter) > 2){
248 print(paste("Multiple matches found for", first.merge, "in", patient))
249 } else {
250 patient.fuzzy = patient.fuzzy[-1,]
251 }
252
253
242 } 254 }
243 255
244 #adist(patient1.fuzzy$Clone_Sequence, patient2.fuzzy$Clone_Sequence, list(insertions=0.1, deletions=0.1, substitutions=1)) 256 }
245 } 257
246 258
247 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) 259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
248 res1 = vector() 260 res1 = vector()
249 res2 = vector() 261 res2 = vector()
250 resBoth = vector() 262 resBoth = vector()