comparison mutation_analysis.py @ 26:2433a1e110e1 draft

Uploaded
author davidvanzessen
date Wed, 08 Apr 2015 05:25:52 -0400
parents d84c9791d8c4
children 362ef99f9405
comparison
equal deleted inserted replaced
25:58a62d2c0377 26:2433a1e110e1
24 cdr1Index = 0 24 cdr1Index = 0
25 fr2Index = 0 25 fr2Index = 0
26 cdr2Index = 0 26 cdr2Index = 0
27 fr3Index = 0 27 fr3Index = 0
28 first=True 28 first=True
29 IDlist = []
30 mutationList = []
31
29 with open(infile, 'r') as i: 32 with open(infile, 'r') as i:
30 for line in i: 33 for line in i:
31 if first: 34 if first:
32 linesplt = line.split("\t") 35 linesplt = line.split("\t")
33 IDIndex = linesplt.index("Sequence.ID") 36 IDIndex = linesplt.index("Sequence.ID")
47 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] 50 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
48 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
49 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
50 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
51 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
55
56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
57
58 IDlist += [ID]
59
60
61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1)
62
63 for mutation in mutationList:
64 if mutation[4]: #if non silent mutation
65 AA_mutation[int(mutation[4])] += 1
66
67 print AA_mutation
68
69 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt"
70 with open(aa_mutations_file, 'w') as o:
71 o.write(",".join([str(x) for x in AA_mutation]) + "\n")
52 72
53 if linecount == 0: 73 if linecount == 0:
54 print "No data, exiting" 74 print "No data, exiting"
55 with open(outfile, 'w') as o: 75 with open(outfile, 'w') as o:
56 o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) 76 o.write("RGYW (%)," + ("0,0,0\n" * len(genes)))
70 ataIndex = 0 90 ataIndex = 0
71 tatIndex = 0 91 tatIndex = 0
72 aggctatIndex = 0 92 aggctatIndex = 0
73 atagcctIndex = 0 93 atagcctIndex = 0
74 first = True 94 first = True
75 IDlist = []
76 with open(infile, 'r') as i: 95 with open(infile, 'r') as i:
77 for line in i: 96 for line in i:
78 if first: 97 if first:
79 linesplt = line.split("\t") 98 linesplt = line.split("\t")
80 ataIndex = linesplt.index("X.a.t.a") 99 ataIndex = linesplt.index("X.a.t.a")
92 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] 111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
93 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
94 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
95 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
96 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
97 IDlist += [ID] 116
98 117
99 118
100 directory = outfile[:outfile.rfind("/") + 1] 119 directory = outfile[:outfile.rfind("/") + 1]
101 value = 0 120 value = 0
102 valuedic = dict() 121 valuedic = dict()