Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 43:7304b91757a8 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 03 Nov 2015 09:40:00 -0500 |
parents | 7377bf7e632d |
children | 4b1bab1a9ad2 |
comparison
equal
deleted
inserted
replaced
42:9afd8430de2c | 43:7304b91757a8 |
---|---|
30 cdr2Index = 0 | 30 cdr2Index = 0 |
31 fr3Index = 0 | 31 fr3Index = 0 |
32 first = True | 32 first = True |
33 IDlist = [] | 33 IDlist = [] |
34 mutationList = [] | 34 mutationList = [] |
35 mutationListByID = {} | |
35 | 36 |
36 with open(infile, 'r') as i: | 37 with open(infile, 'r') as i: |
37 for line in i: | 38 for line in i: |
38 if first: | 39 if first: |
39 linesplt = line.split("\t") | 40 linesplt = line.split("\t") |
40 IDIndex = linesplt.index("Sequence.ID") | 41 IDIndex = linesplt.index("Sequence.ID") |
41 best_matchIndex = linesplt.index("best_match") | 42 best_matchIndex = linesplt.index("best_match") |
42 fr1Index = linesplt.index("FR1.IMGT") | 43 fr1Index = linesplt.index("FR1.IMGT") |
43 cdr1Index = linesplt.index("CDR1.IMGT") | 44 cdr1Index = linesplt.index("CDR1.IMGT") |
44 fr2Index = linesplt.index("FR2.IMGT") | 45 fr2Index = linesplt.index("FR2.IMGT") |
45 cdr2Index = linesplt.index("CDR2.IMGT") | 46 cdr2Index = linesplt.index("CDR2.IMGT") |
46 fr3Index = linesplt.index("FR3.IMGT") | 47 fr3Index = linesplt.index("FR3.IMGT") |
47 first = False | 48 first = False |
48 continue | 49 continue |
49 linecount += 1 | 50 linecount += 1 |
50 linesplt = line.split("\t") | 51 linesplt = line.split("\t") |
51 ID = linesplt[IDIndex] | 52 ID = linesplt[IDIndex] |
52 genedic[ID] = linesplt[best_matchIndex] | 53 genedic[ID] = linesplt[best_matchIndex] |
53 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if | 54 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if |
54 x] if include_fr1 else [] | 55 x] if include_fr1 else [] |
55 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] | 56 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] |
56 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 57 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
57 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 58 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
58 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 59 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
59 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 60 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
60 | 61 |
61 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ | 62 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
62 ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 63 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
63 | 64 |
64 IDlist += [ID] | 65 IDlist += [ID] |
65 | 66 |
66 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[ | 67 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent |
67 4]) + 1) # [4] is the position of the AA mutation, None if silent | 68 AA_mutation_empty = AA_mutation[:] |
68 | 69 |
69 for mutation in mutationList: | 70 aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" |
70 if mutation[4]: # if non silent mutation | 71 with open(aa_mutations_by_id_file, 'w') as o: |
71 AA_mutation[int(mutation[4])] += 1 | 72 for ID in mutationListByID.keys(): |
73 AA_mutation_for_ID = AA_mutation_empty[:] | |
74 for mutation in mutationListByID[ID]: | |
75 if mutation[4]: | |
76 AA_mutation[int(mutation[4])] += 1 | |
77 AA_mutation_for_ID[int(mutation[4])] += 1 | |
78 o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n") | |
79 | |
80 | |
81 | |
82 #for mutation in mutationList: | |
83 # if mutation[4]: # if non silent mutation | |
84 # AA_mutation[int(mutation[4])] += 1 | |
72 | 85 |
73 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | 86 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" |
74 with open(aa_mutations_file, 'w') as o: | 87 with open(aa_mutations_file, 'w') as o: |
75 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | 88 o.write(",".join([str(x) for x in AA_mutation]) + "\n") |
76 | 89 |