comparison mutation_analysis.py @ 28:362ef99f9405 draft

Uploaded
author davidvanzessen
date Wed, 08 Apr 2015 10:14:46 -0400
parents 2433a1e110e1
children 7e44617c9ca4
comparison
equal deleted inserted replaced
27:c9c95b96b7cc 28:362ef99f9405
51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
55 55
56 print mutationdic[ID + "_FR1"]
57
56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 58 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
57 59
58 IDlist += [ID] 60 IDlist += [ID]
59 61
60 62
61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) 63 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent
62 64
63 for mutation in mutationList: 65 for mutation in mutationList:
64 if mutation[4]: #if non silent mutation 66 if mutation[4]: #if non silent mutation
65 AA_mutation[int(mutation[4])] += 1 67 AA_mutation[int(mutation[4])] += 1
66 68
107 ID = linesplt[IDIndex] 109 ID = linesplt[IDIndex]
108 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] 110 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]]
109 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] 111 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]]
110 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] 112 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]]
111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] 113 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 114 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0
113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 115 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 116 if not z or z == "CDR3":
115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 117 continue
118 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
119 if in_mutations > 0:
120 RGYWCount[ID] += 1.0 / in_mutations
121
122 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
123 if not z or z == "CDR3":
124 continue
125 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
126 if in_mutations > 0:
127 WRCYCount[ID] += 1.0 / in_mutations
128
129 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
130 if not z or z == "CDR3":
131 continue
132 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
133 if in_mutations > 0:
134 WACount[ID] += 1.0 / in_mutations
135
136 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
137 if not z or z == "CDR3":
138 continue
139 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
140 if in_mutations > 0:
141 TWCount[ID] += 1.0 / in_mutations
116 142
117 143
118 144
119 directory = outfile[:outfile.rfind("/") + 1] 145 directory = outfile[:outfile.rfind("/") + 1]
120 value = 0 146 value = 0
134 for gene in genes: 160 for gene in genes:
135 geneMatcher = re.compile(".*" + gene + ".*") 161 geneMatcher = re.compile(".*" + gene + ".*")
136 if valuedic[gene] is 0: 162 if valuedic[gene] is 0:
137 o.write(",0,0,0") 163 o.write(",0,0,0")
138 else: 164 else:
139 x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]) 165 x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]])))
140 y = valuedic[gene] 166 y = valuedic[gene]
141 z = str(round(x / float(valuedic[gene]) * 100, 1)) 167 z = str(round(x / float(valuedic[gene]) * 100, 1))
142 o.write("," + str(x) + "," + str(y) + "," + z) 168 o.write("," + str(x) + "," + str(y) + "," + z)
143 #for total 169 #for total
144 x = sum([y for x,y in curr.iteritems()]) 170 x = int(round(sum([y for x,y in curr.iteritems()])))
145 y = valuedic["total"] 171 y = valuedic["total"]
146 z = str(round(x / float(valuedic["total"]) * 100, 1)) 172 z = str(round(x / float(valuedic["total"]) * 100, 1))
147 o.write("," + str(x) + "," + str(y) + "," + z + "\n") 173 o.write("," + str(x) + "," + str(y) + "," + z + "\n")
148 174
149 175