Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 110:ade5cf6fd2dc draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 02 Aug 2016 08:30:23 -0400 |
parents | 6add3e66f4fa |
children |
comparison
equal
deleted
inserted
replaced
109:0096cd454380 | 110:ade5cf6fd2dc |
---|---|
20 | 20 |
21 genedic = dict() | 21 genedic = dict() |
22 | 22 |
23 mutationdic = dict() | 23 mutationdic = dict() |
24 mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") | 24 mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") |
25 NAMatchResult = (None, None, None, None, None, None, '') | |
25 linecount = 0 | 26 linecount = 0 |
26 | 27 |
27 IDIndex = 0 | 28 IDIndex = 0 |
28 best_matchIndex = 0 | 29 best_matchIndex = 0 |
29 fr1Index = 0 | 30 fr1Index = 0 |
56 linecount += 1 | 57 linecount += 1 |
57 linesplt = line.split("\t") | 58 linesplt = line.split("\t") |
58 ID = linesplt[IDIndex] | 59 ID = linesplt[IDIndex] |
59 genedic[ID] = linesplt[best_matchIndex] | 60 genedic[ID] = linesplt[best_matchIndex] |
60 try: | 61 try: |
61 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] | 62 if linesplt[fr1Index] != "NA": |
62 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] | 63 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] |
63 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 64 else: |
64 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 65 mutationdic[ID + "_FR1"] = [] |
66 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if linesplt[cdr1Index] != "NA" else [] | |
67 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if linesplt[fr2Index] != "NA" else [] | |
68 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if linesplt[cdr2Index] != "NA" else [] | |
65 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 69 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
66 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 70 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] if linesplt[fr3Index] != "NA" else [] |
67 except: | 71 except e: |
68 print linesplt | 72 print linesplt |
69 print linecount | 73 print linecount |
74 print e | |
70 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 75 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
71 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 76 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
72 | 77 |
73 cdr1Length = linesplt[cdr1LengthIndex] | 78 cdr1Length = linesplt[cdr1LengthIndex] |
74 cdr2Length = linesplt[cdr2LengthIndex] | 79 cdr2Length = linesplt[cdr2LengthIndex] |
77 cdr2LengthDic[ID] = int(cdr2Length) if cdr2Length != "X" else 0 | 82 cdr2LengthDic[ID] = int(cdr2Length) if cdr2Length != "X" else 0 |
78 | 83 |
79 IDlist += [ID] | 84 IDlist += [ID] |
80 | 85 |
81 AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent | 86 AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent |
87 if AALength < 60: | |
88 AALength = 64 | |
82 | 89 |
83 AA_mutation = [0] * AALength | 90 AA_mutation = [0] * AALength |
84 AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]} | 91 AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]} |
85 AA_mutation_empty = AA_mutation[:] | 92 AA_mutation_empty = AA_mutation[:] |
86 | 93 |