changeset 28:362ef99f9405 draft

Uploaded
author davidvanzessen
date Wed, 08 Apr 2015 10:14:46 -0400
parents c9c95b96b7cc
children 57d197f149c3
files mutation_analysis.py
diffstat 1 files changed, 33 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/mutation_analysis.py	Wed Apr 08 10:14:38 2015 -0400
+++ b/mutation_analysis.py	Wed Apr 08 10:14:46 2015 -0400
@@ -53,12 +53,14 @@
 		mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
 		mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
 		
+		print mutationdic[ID + "_FR1"]
+		
 		mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 		
 		IDlist += [ID]
 
 
-AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1)
+AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent
 
 for mutation in mutationList:
 	if mutation[4]: #if non silent mutation
@@ -109,10 +111,34 @@
 		WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]]
 		WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]]
 		TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
-		RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
-		WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
-		WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
-		TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
+		RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0
+		for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
+			if not z or z == "CDR3":
+				continue
+			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			if in_mutations > 0:
+				RGYWCount[ID] += 1.0 / in_mutations
+		
+		for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
+			if not z or z == "CDR3":
+				continue
+			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			if in_mutations > 0:
+				WRCYCount[ID] += 1.0 / in_mutations
+		
+		for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
+			if not z or z == "CDR3":
+				continue
+			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			if in_mutations > 0:
+				WACount[ID] += 1.0 / in_mutations
+		
+		for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
+			if not z or z == "CDR3":
+				continue
+			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			if in_mutations > 0:
+				TWCount[ID] += 1.0 / in_mutations
 		
 
 
@@ -136,12 +162,12 @@
 			if valuedic[gene] is 0:
 				o.write(",0,0,0")
 			else:
-				x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]])
+				x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]])))
 				y = valuedic[gene]
 				z = str(round(x / float(valuedic[gene]) * 100, 1))
 				o.write("," + str(x) + "," + str(y) + "," + z)
 		#for total
-		x = sum([y for x,y in curr.iteritems()])
+		x = int(round(sum([y for x,y in curr.iteritems()])))
 		y = valuedic["total"]
 		z = str(round(x / float(valuedic["total"]) * 100, 1))
 		o.write("," + str(x) + "," + str(y) + "," + z + "\n")