Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 37:623bbe972363 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 11 Sep 2015 08:26:18 -0400 |
parents | d592dab2fca1 |
children | f5b242a5337f |
comparison
equal
deleted
inserted
replaced
36:d592dab2fca1 | 37:623bbe972363 |
---|---|
160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
161 | 161 |
162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
164 | 164 |
165 merge.freq.table = data.frame(table(c(patient1.fuzzy$merge, patient2.fuzzy$merge))) | 165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) |
166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
167 | 167 |
168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
170 | 170 |
171 while(nrow(patient1.fuzzy) > 0 & nrow(patient2.fuzzy) > 0){ | 171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) |
172 current.merge.1 = patient1.fuzzy[1,"merge"] | 172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
173 current.clone.seq.1 = patient1.fuzzy[1,"Clone_Sequence"] | 173 |
174 current.merge.in.2 = patient2.fuzzy[patient2.fuzzy$merge == current.merge.1,] | 174 while(nrow(patient.fuzzy) > 1){ |
175 | 175 first.merge = patient.fuzzy[1,"merge"] |
176 #agrep/adist the two samples | 176 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
177 agrep.match = agrep(current.clone.seq.1, current.merge.in.2$Clone_Sequence, max.distance = 9, costs=list(insertions=1, deletions=1, substitutions=10)) | |
178 | 177 |
179 | 178 merge.filter = first.merge == patient.fuzzy$merge |
180 if(length(agrep.match) == 1){ | 179 |
181 current.clone.seq.2 = patient2.fuzzy[agrep.match,"Clone_Sequence"] | 180 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) |
182 patientMerge.new.row = data.frame(merge=current.clone.seq.1, | 181 |
183 min_cell_paste.x=patient1.fuzzy[1,"min_cell_paste"], | 182 if(sum(match.filter) == 2){ |
184 Patient.x=patient1.fuzzy[1,"Patient"], | 183 second.match = which(match.filter)[2] |
185 Receptor.x=patient1.fuzzy[1,"Receptor"], | 184 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] |
186 Sample.x=patient1.fuzzy[1,"Sample"], | 185 first.sample = patient.fuzzy[1,"Sample"] |
187 Cell_Count.x=patient1.fuzzy[1,"Cell_Count"], | 186 second.sample = patient.fuzzy[second.match,"Sample"] |
188 Clone_Molecule_Count_From_Spikes.x=patient1.fuzzy[1,"Clone_Molecule_Count_From_Spikes"], | |
189 Log10_Frequency.x=patient1.fuzzy[1,"Log10_Frequency"], | |
190 Total_Read_Count.x=patient1.fuzzy[1,"Total_Read_Count"], | |
191 dsPerM.x=patient1.fuzzy[1,"dsPerM"], | |
192 J_Segment_Major_Gene.x=patient1.fuzzy[1,"J_Segment_Major_Gene"], | |
193 V_Segment_Major_Gene.x=patient1.fuzzy[1,"V_Segment_Major_Gene"], | |
194 Clone_Sequence.x=patient1.fuzzy[1,"Clone_Sequence"], | |
195 CDR3_Sense_Sequence.x=patient1.fuzzy[1,"CDR3_Sense_Sequence"], | |
196 Related_to_leukemia_clone.x=patient1.fuzzy[1,"Related_to_leukemia_clone"], | |
197 Frequency.x=patient1.fuzzy[1,"Frequency"], | |
198 locus_V.x=patient1.fuzzy[1,"locus_V"], | |
199 locus_J.x=patient1.fuzzy[1,"locus_J"], | |
200 min_cell_count.x=patient1.fuzzy[1,"min_cell_count"], | |
201 normalized_read_count.x=patient1.fuzzy[1,"normalized_read_count"], | |
202 paste.x=patient1.fuzzy[1,"paste"], | |
203 min_cell_paste.y=patient2.fuzzy[agrep.match,"min_cell_paste"], | |
204 Patient.y=patient2.fuzzy[agrep.match,"Patient"], | |
205 Receptor.y=patient2.fuzzy[agrep.match,"Receptor"], | |
206 Sample.y=patient2.fuzzy[agrep.match,"Sample"], | |
207 Cell_Count.y=patient2.fuzzy[agrep.match,"Cell_Count"], | |
208 Clone_Molecule_Count_From_Spikes.y=patient2.fuzzy[agrep.match,"Clone_Molecule_Count_From_Spikes"], | |
209 Log10_Frequency.y=patient2.fuzzy[agrep.match,"Log10_Frequency"], | |
210 Total_Read_Count.y=patient2.fuzzy[agrep.match,"Total_Read_Count"], | |
211 dsPerM.y=patient2.fuzzy[agrep.match,"dsPerM"], | |
212 J_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"J_Segment_Major_Gene"], | |
213 V_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"V_Segment_Major_Gene"], | |
214 Clone_Sequence.y=patient2.fuzzy[agrep.match,"Clone_Sequence"], | |
215 CDR3_Sense_Sequence.y=patient2.fuzzy[agrep.match,"CDR3_Sense_Sequence"], | |
216 Related_to_leukemia_clone.y=patient2.fuzzy[agrep.match,"Related_to_leukemia_clone"], | |
217 Frequency.y=patient2.fuzzy[agrep.match,"Frequency"], | |
218 locus_V.y=patient2.fuzzy[agrep.match,"locus_V"], | |
219 locus_J.y=patient2.fuzzy[agrep.match,"locus_J"], | |
220 min_cell_count.y=patient2.fuzzy[agrep.match,"min_cell_count"], | |
221 normalized_read_count.y=patient2.fuzzy[agrep.match,"normalized_read_count"], | |
222 paste.y=patient2.fuzzy[agrep.match,"paste"]) | |
223 #add to patientMerge | |
224 patientMerge = rbind(patientMerge, patientMerge.new.row) | |
225 #remove from patient*.fuzzy | |
226 | 187 |
227 | 188 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ |
228 #remove the fuzzy merged clone sequences from the original datasets | 189 first.match.row = patient.fuzzy[1,] |
229 patient1 = patient1[patient1$Clone_Sequence != patient1.fuzzy[1,"Clone_Sequence"],] | 190 second.match.row = patient.fuzzy[second.match,] |
230 patient2 = patient2[patient2$Clone_Sequence != patient2.fuzzy[agrep.match,"Clone_Sequence"],] | 191 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) |
231 | 192 patientMerge.new.row = data.frame(merge=first.clone.sequence, |
232 scatterplot_data = scatterplot_data[scatterplot_data$merge != current.clone.seq.2,] | 193 min_cell_paste.x=first.match.row[1,"min_cell_paste"], |
233 | 194 Patient.x=first.match.row[1,"Patient"], |
234 patient2.fuzzy <<- patient2.fuzzy[-c(agrep.match),] | 195 Receptor.x=first.match.row[1,"Receptor"], |
235 | 196 Sample.x=first.match.row[1,"Sample"], |
236 | 197 Cell_Count.x=first.match.row[1,"Cell_Count"], |
237 } else if (length(agrep.match) > 1){ | 198 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
238 #multiple matches, whatdo? | 199 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], |
239 cat(paste("<tr><td>", "Multiple matches found for ", current.merge.1, ", ", current.clone.seq.1, " in ", patient, ", ", oneSample, "</td></tr>", sep=""), file=logfile, append=T) | 200 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], |
240 } | 201 dsPerM.x=first.match.row[1,"dsPerM"], |
241 patient1.fuzzy = patient1.fuzzy[-1,] | 202 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], |
203 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], | |
204 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], | |
205 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], | |
206 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], | |
207 Frequency.x=first.match.row[1,"Frequency"], | |
208 locus_V.x=first.match.row[1,"locus_V"], | |
209 locus_J.x=first.match.row[1,"locus_J"], | |
210 min_cell_count.x=first.match.row[1,"min_cell_count"], | |
211 normalized_read_count.x=first.match.row[1,"normalized_read_count"], | |
212 paste.x=first.match.row[1,"paste"], | |
213 min_cell_paste.y=second.match.row[1,"min_cell_paste"], | |
214 Patient.y=second.match.row[1,"Patient"], | |
215 Receptor.y=second.match.row[1,"Receptor"], | |
216 Sample.y=second.match.row[1,"Sample"], | |
217 Cell_Count.y=second.match.row[1,"Cell_Count"], | |
218 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], | |
219 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], | |
220 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], | |
221 dsPerM.y=second.match.row[1,"dsPerM"], | |
222 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], | |
223 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], | |
224 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], | |
225 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], | |
226 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], | |
227 Frequency.y=second.match.row[1,"Frequency"], | |
228 locus_V.y=second.match.row[1,"locus_V"], | |
229 locus_J.y=second.match.row[1,"locus_J"], | |
230 min_cell_count.y=second.match.row[1,"min_cell_count"], | |
231 normalized_read_count.y=second.match.row[1,"normalized_read_count"], | |
232 paste.y=first.match.row[1,"paste"]) | |
233 | |
234 | |
235 patientMerge = rbind(patientMerge, patientMerge.new.row) | |
236 patient.fuzzy = patient.fuzzy[-match.filter,] | |
237 | |
238 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | |
239 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | |
240 | |
241 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] | |
242 | |
243 } else { | |
244 patient.fuzzy = patient.fuzzy[-1,] | |
245 } | |
246 | |
247 } else if (sum(match.filter) > 2){ | |
248 print(paste("Multiple matches found for", first.merge, "in", patient)) | |
249 } else { | |
250 patient.fuzzy = patient.fuzzy[-1,] | |
251 } | |
252 | |
253 | |
242 } | 254 } |
243 | 255 |
244 #adist(patient1.fuzzy$Clone_Sequence, patient2.fuzzy$Clone_Sequence, list(insertions=0.1, deletions=0.1, substitutions=1)) | 256 } |
245 } | 257 |
246 | 258 |
247 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
248 res1 = vector() | 260 res1 = vector() |
249 res2 = vector() | 261 res2 = vector() |
250 resBoth = vector() | 262 resBoth = vector() |