comparison mutation_analysis.py @ 98:5ffbf40cdd4b draft

Uploaded
author davidvanzessen
date Thu, 16 Jun 2016 05:05:47 -0400
parents 07f7da724a77
children 603a10976e9c
comparison
equal deleted inserted replaced
97:6e8dfbe164c6 98:5ffbf40cdd4b
159 import sys 159 import sys
160 160
161 sys.exit() 161 sys.exit()
162 162
163 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") 163 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)")
164 RGYWCount = {g: 0 for g in genes} 164 RGYWCount = {}
165 WRCYCount = {g: 0 for g in genes} 165 WRCYCount = {}
166 WACount = {g: 0 for g in genes} 166 WACount = {}
167 TWCount = {g: 0 for g in genes} 167 TWCount = {}
168 168
169 IDIndex = 0 169 #IDIndex = 0
170 ataIndex = 0 170 ataIndex = 0
171 tatIndex = 0 171 tatIndex = 0
172 aggctatIndex = 0 172 aggctatIndex = 0
173 atagcctIndex = 0 173 atagcctIndex = 0
174 first = True 174 first = True
183 first = False 183 first = False
184 continue 184 continue
185 linesplt = line.split("\t") 185 linesplt = line.split("\t")
186 gene = linesplt[best_matchIndex] 186 gene = linesplt[best_matchIndex]
187 ID = linesplt[IDIndex] 187 ID = linesplt[IDIndex]
188 if ID == "ca2":
189 print linesplt
188 RGYW = [(int(x), int(y), z) for (x, y, z) in 190 RGYW = [(int(x), int(y), z) for (x, y, z) in
189 [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] 191 [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]]
190 WRCY = [(int(x), int(y), z) for (x, y, z) in 192 WRCY = [(int(x), int(y), z) for (x, y, z) in
191 [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] 193 [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]]
192 WA = [(int(x), int(y), z) for (x, y, z) in 194 WA = [(int(x), int(y), z) for (x, y, z) in
247 249
248 250
249 def get_xyz(lst, gene, f, fname): 251 def get_xyz(lst, gene, f, fname):
250 x = int(round(f(lst))) 252 x = int(round(f(lst)))
251 y = valuedic[gene + "_" + fname] 253 y = valuedic[gene + "_" + fname]
252 z = str(round(x / float(valuedic[gene + "_" + fname]) * 100, 1)) if valuedic[gene + "_" + fname] != 0 else "0" 254 z = str(round(x / float(y) * 100, 1)) if y != 0 else "0"
253 return (str(x), str(y), z) 255 return (str(x), str(y), z)
254 256
255 dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} 257 dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount}
256 arr = ["RGYW", "WRCY", "WA", "TW"] 258 arr = ["RGYW", "WRCY", "WA", "TW"]
259
260 geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes}
257 261
258 for fname in funcs.keys(): 262 for fname in funcs.keys():
259 func = funcs[fname] 263 func = funcs[fname]
260 foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" 264 foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt"
261 with open(foutfile, 'w') as o: 265 with open(foutfile, 'w') as o:
262 for typ in arr: 266 for typ in arr:
263 o.write(typ + " (%)") 267 o.write(typ + " (%)")
264 curr = dic[typ] 268 curr = dic[typ]
265 for gene in genes: 269 for gene in genes:
266 geneMatcher = re.compile("^" + gene + ".*") 270 geneMatcher = geneMatchers[gene] #re.compile("^" + gene + ".*") #recompile every loop....
267 if valuedic[gene + "_" + fname] is 0: 271 if valuedic[gene + "_" + fname] is 0:
268 o.write(",0,0,0") 272 o.write(",0,0,0")
269 else: 273 else:
270 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) 274 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname)
271 o.write("," + x + "," + y + "," + z) 275 o.write("," + x + "," + y + "," + z)
272 # for total 276
273 x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) 277 x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname)
274 o.write("," + x + "," + y + "," + z + "\n") 278 o.write("," + x + "," + y + "," + z + "\n")
275 279
276 280
277 # for testing 281 # for testing
278 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" 282 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt"