Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 30:7e44617c9ca4 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 09 Apr 2015 07:59:06 -0400 |
| parents | 362ef99f9405 |
| children | c623690e3b81 |
comparison
equal
deleted
inserted
replaced
| 29:57d197f149c3 | 30:7e44617c9ca4 |
|---|---|
| 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
| 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
| 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
| 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
| 55 | 55 |
| 56 print mutationdic[ID + "_FR1"] | |
| 57 | |
| 58 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
| 59 | 57 |
| 60 IDlist += [ID] | 58 IDlist += [ID] |
| 61 | 59 |
| 62 | 60 |
| 63 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent | 61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent |
| 64 | 62 |
| 65 for mutation in mutationList: | 63 for mutation in mutationList: |
| 66 if mutation[4]: #if non silent mutation | 64 if mutation[4]: #if non silent mutation |
| 67 AA_mutation[int(mutation[4])] += 1 | 65 AA_mutation[int(mutation[4])] += 1 |
| 68 | |
| 69 print AA_mutation | |
| 70 | 66 |
| 71 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | 67 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" |
| 72 with open(aa_mutations_file, 'w') as o: | 68 with open(aa_mutations_file, 'w') as o: |
| 73 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | 69 o.write(",".join([str(x) for x in AA_mutation]) + "\n") |
| 74 | 70 |
| 113 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 109 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
| 114 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 | 110 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 |
| 115 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 111 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 116 if not z or z == "CDR3": | 112 if not z or z == "CDR3": |
| 117 continue | 113 continue |
| 118 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 114 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
| 119 if in_mutations > 0: | 115 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
| 120 RGYWCount[ID] += 1.0 / in_mutations | 116 if mutations_in_motif > 0: |
| 117 RGYWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
| 118 if in_other_motifs > 1: | |
| 119 print in_other_motifs, ID, "RGYW", x, y, ([(x,y,z) for (xother, yother, zother) in WRCY if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in WA if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in TW if ((x <= int(xother) <= y) and (x <= int(yother) <= y))]) | |
| 121 | 120 |
| 122 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 121 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 123 if not z or z == "CDR3": | 122 if not z or z == "CDR3": |
| 124 continue | 123 continue |
| 125 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 124 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
| 126 if in_mutations > 0: | 125 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
| 127 WRCYCount[ID] += 1.0 / in_mutations | 126 if mutations_in_motif > 0: |
| 127 WRCYCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
| 128 if in_other_motifs > 1: | |
| 129 print in_other_motifs, ID, "WRCY", x, y | |
| 128 | 130 |
| 129 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 131 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 130 if not z or z == "CDR3": | 132 if not z or z == "CDR3": |
| 131 continue | 133 continue |
| 132 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 134 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
| 133 if in_mutations > 0: | 135 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
| 134 WACount[ID] += 1.0 / in_mutations | 136 if mutations_in_motif > 0: |
| 137 WACount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
| 138 if in_other_motifs > 1: | |
| 139 print in_other_motifs, ID, "WA", x, y | |
| 135 | 140 |
| 136 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 141 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 137 if not z or z == "CDR3": | 142 if not z or z == "CDR3": |
| 138 continue | 143 continue |
| 139 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 144 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
| 140 if in_mutations > 0: | 145 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) |
| 141 TWCount[ID] += 1.0 / in_mutations | 146 if mutations_in_motif > 0: |
| 142 | 147 TWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 |
| 148 if in_other_motifs > 1: | |
| 149 print in_other_motifs, ID, "TW", x, y | |
| 143 | 150 |
| 144 | 151 |
| 145 directory = outfile[:outfile.rfind("/") + 1] | 152 directory = outfile[:outfile.rfind("/") + 1] |
| 146 value = 0 | 153 value = 0 |
| 147 valuedic = dict() | 154 valuedic = dict() |
