Mercurial > repos > davidvanzessen > mutation_analysis
diff mutation_analysis.py @ 98:5ffbf40cdd4b draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 16 Jun 2016 05:05:47 -0400 |
parents | 07f7da724a77 |
children | 603a10976e9c |
line wrap: on
line diff
--- a/mutation_analysis.py Wed Jun 15 04:48:41 2016 -0400 +++ b/mutation_analysis.py Thu Jun 16 05:05:47 2016 -0400 @@ -161,12 +161,12 @@ sys.exit() hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") -RGYWCount = {g: 0 for g in genes} -WRCYCount = {g: 0 for g in genes} -WACount = {g: 0 for g in genes} -TWCount = {g: 0 for g in genes} +RGYWCount = {} +WRCYCount = {} +WACount = {} +TWCount = {} -IDIndex = 0 +#IDIndex = 0 ataIndex = 0 tatIndex = 0 aggctatIndex = 0 @@ -185,6 +185,8 @@ linesplt = line.split("\t") gene = linesplt[best_matchIndex] ID = linesplt[IDIndex] + if ID == "ca2": + print linesplt RGYW = [(int(x), int(y), z) for (x, y, z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] WRCY = [(int(x), int(y), z) for (x, y, z) in @@ -249,12 +251,14 @@ def get_xyz(lst, gene, f, fname): x = int(round(f(lst))) y = valuedic[gene + "_" + fname] - z = str(round(x / float(valuedic[gene + "_" + fname]) * 100, 1)) if valuedic[gene + "_" + fname] != 0 else "0" + z = str(round(x / float(y) * 100, 1)) if y != 0 else "0" return (str(x), str(y), z) dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} arr = ["RGYW", "WRCY", "WA", "TW"] +geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes} + for fname in funcs.keys(): func = funcs[fname] foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" @@ -263,14 +267,14 @@ o.write(typ + " (%)") curr = dic[typ] for gene in genes: - geneMatcher = re.compile("^" + gene + ".*") + geneMatcher = geneMatchers[gene] #re.compile("^" + gene + ".*") #recompile every loop.... if valuedic[gene + "_" + fname] is 0: o.write(",0,0,0") else: x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) o.write("," + x + "," + y + "," + z) - # for total - x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) + + x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname) o.write("," + x + "," + y + "," + z + "\n")