diff mutation_analysis.py @ 98:5ffbf40cdd4b draft

Uploaded
author davidvanzessen
date Thu, 16 Jun 2016 05:05:47 -0400
parents 07f7da724a77
children 603a10976e9c
line wrap: on
line diff
--- a/mutation_analysis.py	Wed Jun 15 04:48:41 2016 -0400
+++ b/mutation_analysis.py	Thu Jun 16 05:05:47 2016 -0400
@@ -161,12 +161,12 @@
 	sys.exit()
 
 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)")
-RGYWCount = {g: 0 for g in genes}
-WRCYCount = {g: 0 for g in genes}
-WACount = {g: 0 for g in genes}
-TWCount = {g: 0 for g in genes}
+RGYWCount = {}
+WRCYCount = {}
+WACount = {}
+TWCount = {}
 
-IDIndex = 0
+#IDIndex = 0
 ataIndex = 0
 tatIndex = 0
 aggctatIndex = 0
@@ -185,6 +185,8 @@
 		linesplt = line.split("\t")
 		gene = linesplt[best_matchIndex]
 		ID = linesplt[IDIndex]
+		if ID == "ca2":
+			print linesplt
 		RGYW = [(int(x), int(y), z) for (x, y, z) in
 				[hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]]
 		WRCY = [(int(x), int(y), z) for (x, y, z) in
@@ -249,12 +251,14 @@
 def get_xyz(lst, gene, f, fname):
 	x = int(round(f(lst)))
 	y = valuedic[gene + "_" + fname]
-	z = str(round(x / float(valuedic[gene + "_" + fname]) * 100, 1)) if valuedic[gene + "_" + fname] != 0 else "0"
+	z = str(round(x / float(y) * 100, 1)) if y != 0 else "0"
 	return (str(x), str(y), z)
 
 dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount}
 arr = ["RGYW", "WRCY", "WA", "TW"]
 
+geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes}
+
 for fname in funcs.keys():
 	func = funcs[fname]
 	foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt"
@@ -263,14 +267,14 @@
 			o.write(typ + " (%)")
 			curr = dic[typ]
 			for gene in genes:
-				geneMatcher = re.compile("^" + gene + ".*")
+				geneMatcher = geneMatchers[gene] #re.compile("^" + gene + ".*") #recompile every loop....
 				if valuedic[gene + "_" + fname] is 0:
 					o.write(",0,0,0")
 				else:
 					x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname)
 					o.write("," + x + "," + y + "," + z)
-			# for total
-			x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname)
+
+			x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname)
 			o.write("," + x + "," + y + "," + z + "\n")