comparison mutation_analysis.py @ 43:7304b91757a8 draft

Uploaded
author davidvanzessen
date Tue, 03 Nov 2015 09:40:00 -0500
parents 7377bf7e632d
children 4b1bab1a9ad2
comparison
equal deleted inserted replaced
42:9afd8430de2c 43:7304b91757a8
30 cdr2Index = 0 30 cdr2Index = 0
31 fr3Index = 0 31 fr3Index = 0
32 first = True 32 first = True
33 IDlist = [] 33 IDlist = []
34 mutationList = [] 34 mutationList = []
35 mutationListByID = {}
35 36
36 with open(infile, 'r') as i: 37 with open(infile, 'r') as i:
37 for line in i: 38 for line in i:
38 if first: 39 if first:
39 linesplt = line.split("\t") 40 linesplt = line.split("\t")
40 IDIndex = linesplt.index("Sequence.ID") 41 IDIndex = linesplt.index("Sequence.ID")
41 best_matchIndex = linesplt.index("best_match") 42 best_matchIndex = linesplt.index("best_match")
42 fr1Index = linesplt.index("FR1.IMGT") 43 fr1Index = linesplt.index("FR1.IMGT")
43 cdr1Index = linesplt.index("CDR1.IMGT") 44 cdr1Index = linesplt.index("CDR1.IMGT")
44 fr2Index = linesplt.index("FR2.IMGT") 45 fr2Index = linesplt.index("FR2.IMGT")
45 cdr2Index = linesplt.index("CDR2.IMGT") 46 cdr2Index = linesplt.index("CDR2.IMGT")
46 fr3Index = linesplt.index("FR3.IMGT") 47 fr3Index = linesplt.index("FR3.IMGT")
47 first = False 48 first = False
48 continue 49 continue
49 linecount += 1 50 linecount += 1
50 linesplt = line.split("\t") 51 linesplt = line.split("\t")
51 ID = linesplt[IDIndex] 52 ID = linesplt[IDIndex]
52 genedic[ID] = linesplt[best_matchIndex] 53 genedic[ID] = linesplt[best_matchIndex]
53 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if 54 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if
54 x] if include_fr1 else [] 55 x] if include_fr1 else []
55 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] 56 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
56 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] 57 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
57 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] 58 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
58 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] 59 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
59 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 60 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
60 61
61 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ 62 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
62 ID + "_CDR2"] + mutationdic[ID + "_FR3"] 63 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
63 64
64 IDlist += [ID] 65 IDlist += [ID]
65 66
66 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[ 67 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent
67 4]) + 1) # [4] is the position of the AA mutation, None if silent 68 AA_mutation_empty = AA_mutation[:]
68 69
69 for mutation in mutationList: 70 aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt"
70 if mutation[4]: # if non silent mutation 71 with open(aa_mutations_by_id_file, 'w') as o:
71 AA_mutation[int(mutation[4])] += 1 72 for ID in mutationListByID.keys():
73 AA_mutation_for_ID = AA_mutation_empty[:]
74 for mutation in mutationListByID[ID]:
75 if mutation[4]:
76 AA_mutation[int(mutation[4])] += 1
77 AA_mutation_for_ID[int(mutation[4])] += 1
78 o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n")
79
80
81
82 #for mutation in mutationList:
83 # if mutation[4]: # if non silent mutation
84 # AA_mutation[int(mutation[4])] += 1
72 85
73 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" 86 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt"
74 with open(aa_mutations_file, 'w') as o: 87 with open(aa_mutations_file, 'w') as o:
75 o.write(",".join([str(x) for x in AA_mutation]) + "\n") 88 o.write(",".join([str(x) for x in AA_mutation]) + "\n")
76 89