Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 30:7e44617c9ca4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 09 Apr 2015 07:59:06 -0400 |
parents | 362ef99f9405 |
children | c623690e3b81 |
comparison
equal
deleted
inserted
replaced
29:57d197f149c3 | 30:7e44617c9ca4 |
---|---|
51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
55 | 55 |
56 print mutationdic[ID + "_FR1"] | |
57 | |
58 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
59 | 57 |
60 IDlist += [ID] | 58 IDlist += [ID] |
61 | 59 |
62 | 60 |
63 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent | 61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent |
64 | 62 |
65 for mutation in mutationList: | 63 for mutation in mutationList: |
66 if mutation[4]: #if non silent mutation | 64 if mutation[4]: #if non silent mutation |
67 AA_mutation[int(mutation[4])] += 1 | 65 AA_mutation[int(mutation[4])] += 1 |
68 | |
69 print AA_mutation | |
70 | 66 |
71 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | 67 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" |
72 with open(aa_mutations_file, 'w') as o: | 68 with open(aa_mutations_file, 'w') as o: |
73 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | 69 o.write(",".join([str(x) for x in AA_mutation]) + "\n") |
74 | 70 |
113 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 109 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
114 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 | 110 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 |
115 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 111 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
116 if not z or z == "CDR3": | 112 if not z or z == "CDR3": |
117 continue | 113 continue |
118 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 114 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
119 if in_mutations > 0: | 115 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
120 RGYWCount[ID] += 1.0 / in_mutations | 116 if mutations_in_motif > 0: |
117 RGYWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
118 if in_other_motifs > 1: | |
119 print in_other_motifs, ID, "RGYW", x, y, ([(x,y,z) for (xother, yother, zother) in WRCY if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in WA if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in TW if ((x <= int(xother) <= y) and (x <= int(yother) <= y))]) | |
121 | 120 |
122 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 121 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
123 if not z or z == "CDR3": | 122 if not z or z == "CDR3": |
124 continue | 123 continue |
125 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 124 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
126 if in_mutations > 0: | 125 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
127 WRCYCount[ID] += 1.0 / in_mutations | 126 if mutations_in_motif > 0: |
127 WRCYCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
128 if in_other_motifs > 1: | |
129 print in_other_motifs, ID, "WRCY", x, y | |
128 | 130 |
129 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 131 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
130 if not z or z == "CDR3": | 132 if not z or z == "CDR3": |
131 continue | 133 continue |
132 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 134 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
133 if in_mutations > 0: | 135 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW]) |
134 WACount[ID] += 1.0 / in_mutations | 136 if mutations_in_motif > 0: |
137 WACount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 | |
138 if in_other_motifs > 1: | |
139 print in_other_motifs, ID, "WA", x, y | |
135 | 140 |
136 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 141 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
137 if not z or z == "CDR3": | 142 if not z or z == "CDR3": |
138 continue | 143 continue |
139 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | 144 mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) |
140 if in_mutations > 0: | 145 in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) |
141 TWCount[ID] += 1.0 / in_mutations | 146 if mutations_in_motif > 0: |
142 | 147 TWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1 |
148 if in_other_motifs > 1: | |
149 print in_other_motifs, ID, "TW", x, y | |
143 | 150 |
144 | 151 |
145 directory = outfile[:outfile.rfind("/") + 1] | 152 directory = outfile[:outfile.rfind("/") + 1] |
146 value = 0 | 153 value = 0 |
147 valuedic = dict() | 154 valuedic = dict() |