Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 37:623bbe972363 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Fri, 11 Sep 2015 08:26:18 -0400 |
| parents | d592dab2fca1 |
| children | f5b242a5337f |
comparison
equal
deleted
inserted
replaced
| 36:d592dab2fca1 | 37:623bbe972363 |
|---|---|
| 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
| 161 | 161 |
| 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
| 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
| 164 | 164 |
| 165 merge.freq.table = data.frame(table(c(patient1.fuzzy$merge, patient2.fuzzy$merge))) | 165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) |
| 166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
| 167 | 167 |
| 168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
| 169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
| 170 | 170 |
| 171 while(nrow(patient1.fuzzy) > 0 & nrow(patient2.fuzzy) > 0){ | 171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) |
| 172 current.merge.1 = patient1.fuzzy[1,"merge"] | 172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
| 173 current.clone.seq.1 = patient1.fuzzy[1,"Clone_Sequence"] | 173 |
| 174 current.merge.in.2 = patient2.fuzzy[patient2.fuzzy$merge == current.merge.1,] | 174 while(nrow(patient.fuzzy) > 1){ |
| 175 | 175 first.merge = patient.fuzzy[1,"merge"] |
| 176 #agrep/adist the two samples | 176 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
| 177 agrep.match = agrep(current.clone.seq.1, current.merge.in.2$Clone_Sequence, max.distance = 9, costs=list(insertions=1, deletions=1, substitutions=10)) | |
| 178 | 177 |
| 179 | 178 merge.filter = first.merge == patient.fuzzy$merge |
| 180 if(length(agrep.match) == 1){ | 179 |
| 181 current.clone.seq.2 = patient2.fuzzy[agrep.match,"Clone_Sequence"] | 180 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) |
| 182 patientMerge.new.row = data.frame(merge=current.clone.seq.1, | 181 |
| 183 min_cell_paste.x=patient1.fuzzy[1,"min_cell_paste"], | 182 if(sum(match.filter) == 2){ |
| 184 Patient.x=patient1.fuzzy[1,"Patient"], | 183 second.match = which(match.filter)[2] |
| 185 Receptor.x=patient1.fuzzy[1,"Receptor"], | 184 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] |
| 186 Sample.x=patient1.fuzzy[1,"Sample"], | 185 first.sample = patient.fuzzy[1,"Sample"] |
| 187 Cell_Count.x=patient1.fuzzy[1,"Cell_Count"], | 186 second.sample = patient.fuzzy[second.match,"Sample"] |
| 188 Clone_Molecule_Count_From_Spikes.x=patient1.fuzzy[1,"Clone_Molecule_Count_From_Spikes"], | |
| 189 Log10_Frequency.x=patient1.fuzzy[1,"Log10_Frequency"], | |
| 190 Total_Read_Count.x=patient1.fuzzy[1,"Total_Read_Count"], | |
| 191 dsPerM.x=patient1.fuzzy[1,"dsPerM"], | |
| 192 J_Segment_Major_Gene.x=patient1.fuzzy[1,"J_Segment_Major_Gene"], | |
| 193 V_Segment_Major_Gene.x=patient1.fuzzy[1,"V_Segment_Major_Gene"], | |
| 194 Clone_Sequence.x=patient1.fuzzy[1,"Clone_Sequence"], | |
| 195 CDR3_Sense_Sequence.x=patient1.fuzzy[1,"CDR3_Sense_Sequence"], | |
| 196 Related_to_leukemia_clone.x=patient1.fuzzy[1,"Related_to_leukemia_clone"], | |
| 197 Frequency.x=patient1.fuzzy[1,"Frequency"], | |
| 198 locus_V.x=patient1.fuzzy[1,"locus_V"], | |
| 199 locus_J.x=patient1.fuzzy[1,"locus_J"], | |
| 200 min_cell_count.x=patient1.fuzzy[1,"min_cell_count"], | |
| 201 normalized_read_count.x=patient1.fuzzy[1,"normalized_read_count"], | |
| 202 paste.x=patient1.fuzzy[1,"paste"], | |
| 203 min_cell_paste.y=patient2.fuzzy[agrep.match,"min_cell_paste"], | |
| 204 Patient.y=patient2.fuzzy[agrep.match,"Patient"], | |
| 205 Receptor.y=patient2.fuzzy[agrep.match,"Receptor"], | |
| 206 Sample.y=patient2.fuzzy[agrep.match,"Sample"], | |
| 207 Cell_Count.y=patient2.fuzzy[agrep.match,"Cell_Count"], | |
| 208 Clone_Molecule_Count_From_Spikes.y=patient2.fuzzy[agrep.match,"Clone_Molecule_Count_From_Spikes"], | |
| 209 Log10_Frequency.y=patient2.fuzzy[agrep.match,"Log10_Frequency"], | |
| 210 Total_Read_Count.y=patient2.fuzzy[agrep.match,"Total_Read_Count"], | |
| 211 dsPerM.y=patient2.fuzzy[agrep.match,"dsPerM"], | |
| 212 J_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"J_Segment_Major_Gene"], | |
| 213 V_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"V_Segment_Major_Gene"], | |
| 214 Clone_Sequence.y=patient2.fuzzy[agrep.match,"Clone_Sequence"], | |
| 215 CDR3_Sense_Sequence.y=patient2.fuzzy[agrep.match,"CDR3_Sense_Sequence"], | |
| 216 Related_to_leukemia_clone.y=patient2.fuzzy[agrep.match,"Related_to_leukemia_clone"], | |
| 217 Frequency.y=patient2.fuzzy[agrep.match,"Frequency"], | |
| 218 locus_V.y=patient2.fuzzy[agrep.match,"locus_V"], | |
| 219 locus_J.y=patient2.fuzzy[agrep.match,"locus_J"], | |
| 220 min_cell_count.y=patient2.fuzzy[agrep.match,"min_cell_count"], | |
| 221 normalized_read_count.y=patient2.fuzzy[agrep.match,"normalized_read_count"], | |
| 222 paste.y=patient2.fuzzy[agrep.match,"paste"]) | |
| 223 #add to patientMerge | |
| 224 patientMerge = rbind(patientMerge, patientMerge.new.row) | |
| 225 #remove from patient*.fuzzy | |
| 226 | 187 |
| 227 | 188 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){ |
| 228 #remove the fuzzy merged clone sequences from the original datasets | 189 first.match.row = patient.fuzzy[1,] |
| 229 patient1 = patient1[patient1$Clone_Sequence != patient1.fuzzy[1,"Clone_Sequence"],] | 190 second.match.row = patient.fuzzy[second.match,] |
| 230 patient2 = patient2[patient2$Clone_Sequence != patient2.fuzzy[agrep.match,"Clone_Sequence"],] | 191 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence)) |
| 231 | 192 patientMerge.new.row = data.frame(merge=first.clone.sequence, |
| 232 scatterplot_data = scatterplot_data[scatterplot_data$merge != current.clone.seq.2,] | 193 min_cell_paste.x=first.match.row[1,"min_cell_paste"], |
| 233 | 194 Patient.x=first.match.row[1,"Patient"], |
| 234 patient2.fuzzy <<- patient2.fuzzy[-c(agrep.match),] | 195 Receptor.x=first.match.row[1,"Receptor"], |
| 235 | 196 Sample.x=first.match.row[1,"Sample"], |
| 236 | 197 Cell_Count.x=first.match.row[1,"Cell_Count"], |
| 237 } else if (length(agrep.match) > 1){ | 198 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"], |
| 238 #multiple matches, whatdo? | 199 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"], |
| 239 cat(paste("<tr><td>", "Multiple matches found for ", current.merge.1, ", ", current.clone.seq.1, " in ", patient, ", ", oneSample, "</td></tr>", sep=""), file=logfile, append=T) | 200 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"], |
| 240 } | 201 dsPerM.x=first.match.row[1,"dsPerM"], |
| 241 patient1.fuzzy = patient1.fuzzy[-1,] | 202 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"], |
| 203 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"], | |
| 204 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"], | |
| 205 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"], | |
| 206 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"], | |
| 207 Frequency.x=first.match.row[1,"Frequency"], | |
| 208 locus_V.x=first.match.row[1,"locus_V"], | |
| 209 locus_J.x=first.match.row[1,"locus_J"], | |
| 210 min_cell_count.x=first.match.row[1,"min_cell_count"], | |
| 211 normalized_read_count.x=first.match.row[1,"normalized_read_count"], | |
| 212 paste.x=first.match.row[1,"paste"], | |
| 213 min_cell_paste.y=second.match.row[1,"min_cell_paste"], | |
| 214 Patient.y=second.match.row[1,"Patient"], | |
| 215 Receptor.y=second.match.row[1,"Receptor"], | |
| 216 Sample.y=second.match.row[1,"Sample"], | |
| 217 Cell_Count.y=second.match.row[1,"Cell_Count"], | |
| 218 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"], | |
| 219 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"], | |
| 220 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"], | |
| 221 dsPerM.y=second.match.row[1,"dsPerM"], | |
| 222 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"], | |
| 223 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"], | |
| 224 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"], | |
| 225 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"], | |
| 226 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"], | |
| 227 Frequency.y=second.match.row[1,"Frequency"], | |
| 228 locus_V.y=second.match.row[1,"locus_V"], | |
| 229 locus_J.y=second.match.row[1,"locus_J"], | |
| 230 min_cell_count.y=second.match.row[1,"min_cell_count"], | |
| 231 normalized_read_count.y=second.match.row[1,"normalized_read_count"], | |
| 232 paste.y=first.match.row[1,"paste"]) | |
| 233 | |
| 234 | |
| 235 patientMerge = rbind(patientMerge, patientMerge.new.row) | |
| 236 patient.fuzzy = patient.fuzzy[-match.filter,] | |
| 237 | |
| 238 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | |
| 239 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),] | |
| 240 | |
| 241 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,] | |
| 242 | |
| 243 } else { | |
| 244 patient.fuzzy = patient.fuzzy[-1,] | |
| 245 } | |
| 246 | |
| 247 } else if (sum(match.filter) > 2){ | |
| 248 print(paste("Multiple matches found for", first.merge, "in", patient)) | |
| 249 } else { | |
| 250 patient.fuzzy = patient.fuzzy[-1,] | |
| 251 } | |
| 252 | |
| 253 | |
| 242 } | 254 } |
| 243 | 255 |
| 244 #adist(patient1.fuzzy$Clone_Sequence, patient2.fuzzy$Clone_Sequence, list(insertions=0.1, deletions=0.1, substitutions=1)) | 256 } |
| 245 } | 257 |
| 246 | 258 |
| 247 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
| 248 res1 = vector() | 260 res1 = vector() |
| 249 res2 = vector() | 261 res2 = vector() |
| 250 resBoth = vector() | 262 resBoth = vector() |
