Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 26:2433a1e110e1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 08 Apr 2015 05:25:52 -0400 |
parents | d84c9791d8c4 |
children | 362ef99f9405 |
comparison
equal
deleted
inserted
replaced
25:58a62d2c0377 | 26:2433a1e110e1 |
---|---|
24 cdr1Index = 0 | 24 cdr1Index = 0 |
25 fr2Index = 0 | 25 fr2Index = 0 |
26 cdr2Index = 0 | 26 cdr2Index = 0 |
27 fr3Index = 0 | 27 fr3Index = 0 |
28 first=True | 28 first=True |
29 IDlist = [] | |
30 mutationList = [] | |
31 | |
29 with open(infile, 'r') as i: | 32 with open(infile, 'r') as i: |
30 for line in i: | 33 for line in i: |
31 if first: | 34 if first: |
32 linesplt = line.split("\t") | 35 linesplt = line.split("\t") |
33 IDIndex = linesplt.index("Sequence.ID") | 36 IDIndex = linesplt.index("Sequence.ID") |
47 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] | 50 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] |
48 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
49 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
50 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
51 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
55 | |
56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | |
57 | |
58 IDlist += [ID] | |
59 | |
60 | |
61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) | |
62 | |
63 for mutation in mutationList: | |
64 if mutation[4]: #if non silent mutation | |
65 AA_mutation[int(mutation[4])] += 1 | |
66 | |
67 print AA_mutation | |
68 | |
69 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | |
70 with open(aa_mutations_file, 'w') as o: | |
71 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | |
52 | 72 |
53 if linecount == 0: | 73 if linecount == 0: |
54 print "No data, exiting" | 74 print "No data, exiting" |
55 with open(outfile, 'w') as o: | 75 with open(outfile, 'w') as o: |
56 o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) | 76 o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) |
70 ataIndex = 0 | 90 ataIndex = 0 |
71 tatIndex = 0 | 91 tatIndex = 0 |
72 aggctatIndex = 0 | 92 aggctatIndex = 0 |
73 atagcctIndex = 0 | 93 atagcctIndex = 0 |
74 first = True | 94 first = True |
75 IDlist = [] | |
76 with open(infile, 'r') as i: | 95 with open(infile, 'r') as i: |
77 for line in i: | 96 for line in i: |
78 if first: | 97 if first: |
79 linesplt = line.split("\t") | 98 linesplt = line.split("\t") |
80 ataIndex = linesplt.index("X.a.t.a") | 99 ataIndex = linesplt.index("X.a.t.a") |
92 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
93 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
94 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
95 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
96 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
97 IDlist += [ID] | 116 |
98 | 117 |
99 | 118 |
100 directory = outfile[:outfile.rfind("/") + 1] | 119 directory = outfile[:outfile.rfind("/") + 1] |
101 value = 0 | 120 value = 0 |
102 valuedic = dict() | 121 valuedic = dict() |