changeset 30:7e44617c9ca4 draft

Uploaded
author davidvanzessen
date Thu, 09 Apr 2015 07:59:06 -0400
parents 57d197f149c3
children c623690e3b81
files mutation_analysis.py
diffstat 1 files changed, 24 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/mutation_analysis.py	Thu Apr 09 04:56:43 2015 -0400
+++ b/mutation_analysis.py	Thu Apr 09 07:59:06 2015 -0400
@@ -53,8 +53,6 @@
 		mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
 		mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
 		
-		print mutationdic[ID + "_FR1"]
-		
 		mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 		
 		IDlist += [ID]
@@ -65,8 +63,6 @@
 for mutation in mutationList:
 	if mutation[4]: #if non silent mutation
 		AA_mutation[int(mutation[4])] += 1
-		
-print AA_mutation
 
 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt"
 with open(aa_mutations_file, 'w') as o:
@@ -115,31 +111,42 @@
 		for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
 			if not z or z == "CDR3":
 				continue
-			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
-			if in_mutations > 0:
-				RGYWCount[ID] += 1.0 / in_mutations
+			mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW])
+			if mutations_in_motif > 0:
+				RGYWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1
+				if in_other_motifs > 1:
+					print in_other_motifs, ID, "RGYW", x, y, ([(x,y,z) for (xother, yother, zother) in WRCY if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in WA if ((x <= int(xother) <= y) and (x <= int(yother) <= y))] + [(x,y,z) for (xother, yother, zother) in TW if ((x <= int(xother) <= y) and (x <= int(yother) <= y))])
 		
 		for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
 			if not z or z == "CDR3":
 				continue
-			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
-			if in_mutations > 0:
-				WRCYCount[ID] += 1.0 / in_mutations
+			mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW])
+			if mutations_in_motif > 0:
+				WRCYCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1
+				if in_other_motifs > 1:
+					print in_other_motifs, ID, "WRCY", x, y
 		
 		for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
 			if not z or z == "CDR3":
 				continue
-			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
-			if in_mutations > 0:
-				WACount[ID] += 1.0 / in_mutations
+			mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in TW])
+			if mutations_in_motif > 0:
+				WACount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1
+				if in_other_motifs > 1:
+					print in_other_motifs, ID, "WA", x, y
 		
 		for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
 			if not z or z == "CDR3":
 				continue
-			in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
-			if in_mutations > 0:
-				TWCount[ID] += 1.0 / in_mutations
-		
+			mutations_in_motif = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])
+			in_other_motifs = sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in RGYW]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WRCY]) + sum([((x <= int(xother) <= y) and (x <= int(yother) <= y)) for (xother, yother, zother) in WA])
+			if mutations_in_motif > 0:
+				TWCount[ID] += 1.0 / in_other_motifs if in_other_motifs > 0 else 1
+				if in_other_motifs > 1:
+					print in_other_motifs, ID, "TW", x, y
 
 
 directory = outfile[:outfile.rfind("/") + 1]