comparison mutation_analysis.py @ 110:ade5cf6fd2dc draft

Uploaded
author davidvanzessen
date Tue, 02 Aug 2016 08:30:23 -0400
parents 6add3e66f4fa
children
comparison
equal deleted inserted replaced
109:0096cd454380 110:ade5cf6fd2dc
20 20
21 genedic = dict() 21 genedic = dict()
22 22
23 mutationdic = dict() 23 mutationdic = dict()
24 mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") 24 mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?")
25 NAMatchResult = (None, None, None, None, None, None, '')
25 linecount = 0 26 linecount = 0
26 27
27 IDIndex = 0 28 IDIndex = 0
28 best_matchIndex = 0 29 best_matchIndex = 0
29 fr1Index = 0 30 fr1Index = 0
56 linecount += 1 57 linecount += 1
57 linesplt = line.split("\t") 58 linesplt = line.split("\t")
58 ID = linesplt[IDIndex] 59 ID = linesplt[IDIndex]
59 genedic[ID] = linesplt[best_matchIndex] 60 genedic[ID] = linesplt[best_matchIndex]
60 try: 61 try:
61 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] 62 if linesplt[fr1Index] != "NA":
62 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] 63 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
63 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] 64 else:
64 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] 65 mutationdic[ID + "_FR1"] = []
66 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if linesplt[cdr1Index] != "NA" else []
67 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if linesplt[fr2Index] != "NA" else []
68 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if linesplt[cdr2Index] != "NA" else []
65 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] 69 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
66 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 70 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] if linesplt[fr3Index] != "NA" else []
67 except: 71 except e:
68 print linesplt 72 print linesplt
69 print linecount 73 print linecount
74 print e
70 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 75 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
71 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 76 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
72 77
73 cdr1Length = linesplt[cdr1LengthIndex] 78 cdr1Length = linesplt[cdr1LengthIndex]
74 cdr2Length = linesplt[cdr2LengthIndex] 79 cdr2Length = linesplt[cdr2LengthIndex]
77 cdr2LengthDic[ID] = int(cdr2Length) if cdr2Length != "X" else 0 82 cdr2LengthDic[ID] = int(cdr2Length) if cdr2Length != "X" else 0
78 83
79 IDlist += [ID] 84 IDlist += [ID]
80 85
81 AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent 86 AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent
87 if AALength < 60:
88 AALength = 64
82 89
83 AA_mutation = [0] * AALength 90 AA_mutation = [0] * AALength
84 AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]} 91 AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]}
85 AA_mutation_empty = AA_mutation[:] 92 AA_mutation_empty = AA_mutation[:]
86 93