Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 28:362ef99f9405 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 08 Apr 2015 10:14:46 -0400 |
parents | 2433a1e110e1 |
children | 7e44617c9ca4 |
comparison
equal
deleted
inserted
replaced
27:c9c95b96b7cc | 28:362ef99f9405 |
---|---|
51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
55 | 55 |
56 print mutationdic[ID + "_FR1"] | |
57 | |
56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 58 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
57 | 59 |
58 IDlist += [ID] | 60 IDlist += [ID] |
59 | 61 |
60 | 62 |
61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) | 63 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent |
62 | 64 |
63 for mutation in mutationList: | 65 for mutation in mutationList: |
64 if mutation[4]: #if non silent mutation | 66 if mutation[4]: #if non silent mutation |
65 AA_mutation[int(mutation[4])] += 1 | 67 AA_mutation[int(mutation[4])] += 1 |
66 | 68 |
107 ID = linesplt[IDIndex] | 109 ID = linesplt[IDIndex] |
108 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] | 110 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] |
109 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] | 111 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] |
110 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] | 112 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] |
111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 113 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 114 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 |
113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 115 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 116 if not z or z == "CDR3": |
115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 117 continue |
118 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
119 if in_mutations > 0: | |
120 RGYWCount[ID] += 1.0 / in_mutations | |
121 | |
122 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
123 if not z or z == "CDR3": | |
124 continue | |
125 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
126 if in_mutations > 0: | |
127 WRCYCount[ID] += 1.0 / in_mutations | |
128 | |
129 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
130 if not z or z == "CDR3": | |
131 continue | |
132 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
133 if in_mutations > 0: | |
134 WACount[ID] += 1.0 / in_mutations | |
135 | |
136 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
137 if not z or z == "CDR3": | |
138 continue | |
139 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
140 if in_mutations > 0: | |
141 TWCount[ID] += 1.0 / in_mutations | |
116 | 142 |
117 | 143 |
118 | 144 |
119 directory = outfile[:outfile.rfind("/") + 1] | 145 directory = outfile[:outfile.rfind("/") + 1] |
120 value = 0 | 146 value = 0 |
134 for gene in genes: | 160 for gene in genes: |
135 geneMatcher = re.compile(".*" + gene + ".*") | 161 geneMatcher = re.compile(".*" + gene + ".*") |
136 if valuedic[gene] is 0: | 162 if valuedic[gene] is 0: |
137 o.write(",0,0,0") | 163 o.write(",0,0,0") |
138 else: | 164 else: |
139 x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]) | 165 x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]))) |
140 y = valuedic[gene] | 166 y = valuedic[gene] |
141 z = str(round(x / float(valuedic[gene]) * 100, 1)) | 167 z = str(round(x / float(valuedic[gene]) * 100, 1)) |
142 o.write("," + str(x) + "," + str(y) + "," + z) | 168 o.write("," + str(x) + "," + str(y) + "," + z) |
143 #for total | 169 #for total |
144 x = sum([y for x,y in curr.iteritems()]) | 170 x = int(round(sum([y for x,y in curr.iteritems()]))) |
145 y = valuedic["total"] | 171 y = valuedic["total"] |
146 z = str(round(x / float(valuedic["total"]) * 100, 1)) | 172 z = str(round(x / float(valuedic["total"]) * 100, 1)) |
147 o.write("," + str(x) + "," + str(y) + "," + z + "\n") | 173 o.write("," + str(x) + "," + str(y) + "," + z + "\n") |
148 | 174 |
149 | 175 |