Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 98:5ffbf40cdd4b draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 16 Jun 2016 05:05:47 -0400 |
parents | 07f7da724a77 |
children | 603a10976e9c |
comparison
equal
deleted
inserted
replaced
97:6e8dfbe164c6 | 98:5ffbf40cdd4b |
---|---|
159 import sys | 159 import sys |
160 | 160 |
161 sys.exit() | 161 sys.exit() |
162 | 162 |
163 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") | 163 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") |
164 RGYWCount = {g: 0 for g in genes} | 164 RGYWCount = {} |
165 WRCYCount = {g: 0 for g in genes} | 165 WRCYCount = {} |
166 WACount = {g: 0 for g in genes} | 166 WACount = {} |
167 TWCount = {g: 0 for g in genes} | 167 TWCount = {} |
168 | 168 |
169 IDIndex = 0 | 169 #IDIndex = 0 |
170 ataIndex = 0 | 170 ataIndex = 0 |
171 tatIndex = 0 | 171 tatIndex = 0 |
172 aggctatIndex = 0 | 172 aggctatIndex = 0 |
173 atagcctIndex = 0 | 173 atagcctIndex = 0 |
174 first = True | 174 first = True |
183 first = False | 183 first = False |
184 continue | 184 continue |
185 linesplt = line.split("\t") | 185 linesplt = line.split("\t") |
186 gene = linesplt[best_matchIndex] | 186 gene = linesplt[best_matchIndex] |
187 ID = linesplt[IDIndex] | 187 ID = linesplt[IDIndex] |
188 if ID == "ca2": | |
189 print linesplt | |
188 RGYW = [(int(x), int(y), z) for (x, y, z) in | 190 RGYW = [(int(x), int(y), z) for (x, y, z) in |
189 [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] | 191 [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] |
190 WRCY = [(int(x), int(y), z) for (x, y, z) in | 192 WRCY = [(int(x), int(y), z) for (x, y, z) in |
191 [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] | 193 [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] |
192 WA = [(int(x), int(y), z) for (x, y, z) in | 194 WA = [(int(x), int(y), z) for (x, y, z) in |
247 | 249 |
248 | 250 |
249 def get_xyz(lst, gene, f, fname): | 251 def get_xyz(lst, gene, f, fname): |
250 x = int(round(f(lst))) | 252 x = int(round(f(lst))) |
251 y = valuedic[gene + "_" + fname] | 253 y = valuedic[gene + "_" + fname] |
252 z = str(round(x / float(valuedic[gene + "_" + fname]) * 100, 1)) if valuedic[gene + "_" + fname] != 0 else "0" | 254 z = str(round(x / float(y) * 100, 1)) if y != 0 else "0" |
253 return (str(x), str(y), z) | 255 return (str(x), str(y), z) |
254 | 256 |
255 dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} | 257 dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} |
256 arr = ["RGYW", "WRCY", "WA", "TW"] | 258 arr = ["RGYW", "WRCY", "WA", "TW"] |
259 | |
260 geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes} | |
257 | 261 |
258 for fname in funcs.keys(): | 262 for fname in funcs.keys(): |
259 func = funcs[fname] | 263 func = funcs[fname] |
260 foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" | 264 foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" |
261 with open(foutfile, 'w') as o: | 265 with open(foutfile, 'w') as o: |
262 for typ in arr: | 266 for typ in arr: |
263 o.write(typ + " (%)") | 267 o.write(typ + " (%)") |
264 curr = dic[typ] | 268 curr = dic[typ] |
265 for gene in genes: | 269 for gene in genes: |
266 geneMatcher = re.compile("^" + gene + ".*") | 270 geneMatcher = geneMatchers[gene] #re.compile("^" + gene + ".*") #recompile every loop.... |
267 if valuedic[gene + "_" + fname] is 0: | 271 if valuedic[gene + "_" + fname] is 0: |
268 o.write(",0,0,0") | 272 o.write(",0,0,0") |
269 else: | 273 else: |
270 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) | 274 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) |
271 o.write("," + x + "," + y + "," + z) | 275 o.write("," + x + "," + y + "," + z) |
272 # for total | 276 |
273 x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) | 277 x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname) |
274 o.write("," + x + "," + y + "," + z + "\n") | 278 o.write("," + x + "," + y + "," + z + "\n") |
275 | 279 |
276 | 280 |
277 # for testing | 281 # for testing |
278 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" | 282 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" |