Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 43:7304b91757a8 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 03 Nov 2015 09:40:00 -0500 |
| parents | 7377bf7e632d |
| children | 4b1bab1a9ad2 |
comparison
equal
deleted
inserted
replaced
| 42:9afd8430de2c | 43:7304b91757a8 |
|---|---|
| 30 cdr2Index = 0 | 30 cdr2Index = 0 |
| 31 fr3Index = 0 | 31 fr3Index = 0 |
| 32 first = True | 32 first = True |
| 33 IDlist = [] | 33 IDlist = [] |
| 34 mutationList = [] | 34 mutationList = [] |
| 35 mutationListByID = {} | |
| 35 | 36 |
| 36 with open(infile, 'r') as i: | 37 with open(infile, 'r') as i: |
| 37 for line in i: | 38 for line in i: |
| 38 if first: | 39 if first: |
| 39 linesplt = line.split("\t") | 40 linesplt = line.split("\t") |
| 40 IDIndex = linesplt.index("Sequence.ID") | 41 IDIndex = linesplt.index("Sequence.ID") |
| 41 best_matchIndex = linesplt.index("best_match") | 42 best_matchIndex = linesplt.index("best_match") |
| 42 fr1Index = linesplt.index("FR1.IMGT") | 43 fr1Index = linesplt.index("FR1.IMGT") |
| 43 cdr1Index = linesplt.index("CDR1.IMGT") | 44 cdr1Index = linesplt.index("CDR1.IMGT") |
| 44 fr2Index = linesplt.index("FR2.IMGT") | 45 fr2Index = linesplt.index("FR2.IMGT") |
| 45 cdr2Index = linesplt.index("CDR2.IMGT") | 46 cdr2Index = linesplt.index("CDR2.IMGT") |
| 46 fr3Index = linesplt.index("FR3.IMGT") | 47 fr3Index = linesplt.index("FR3.IMGT") |
| 47 first = False | 48 first = False |
| 48 continue | 49 continue |
| 49 linecount += 1 | 50 linecount += 1 |
| 50 linesplt = line.split("\t") | 51 linesplt = line.split("\t") |
| 51 ID = linesplt[IDIndex] | 52 ID = linesplt[IDIndex] |
| 52 genedic[ID] = linesplt[best_matchIndex] | 53 genedic[ID] = linesplt[best_matchIndex] |
| 53 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if | 54 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if |
| 54 x] if include_fr1 else [] | 55 x] if include_fr1 else [] |
| 55 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] | 56 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] |
| 56 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 57 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
| 57 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 58 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
| 58 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 59 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
| 59 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 60 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
| 60 | 61 |
| 61 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ | 62 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
| 62 ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 63 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
| 63 | 64 |
| 64 IDlist += [ID] | 65 IDlist += [ID] |
| 65 | 66 |
| 66 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[ | 67 AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent |
| 67 4]) + 1) # [4] is the position of the AA mutation, None if silent | 68 AA_mutation_empty = AA_mutation[:] |
| 68 | 69 |
| 69 for mutation in mutationList: | 70 aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" |
| 70 if mutation[4]: # if non silent mutation | 71 with open(aa_mutations_by_id_file, 'w') as o: |
| 71 AA_mutation[int(mutation[4])] += 1 | 72 for ID in mutationListByID.keys(): |
| 73 AA_mutation_for_ID = AA_mutation_empty[:] | |
| 74 for mutation in mutationListByID[ID]: | |
| 75 if mutation[4]: | |
| 76 AA_mutation[int(mutation[4])] += 1 | |
| 77 AA_mutation_for_ID[int(mutation[4])] += 1 | |
| 78 o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n") | |
| 79 | |
| 80 | |
| 81 | |
| 82 #for mutation in mutationList: | |
| 83 # if mutation[4]: # if non silent mutation | |
| 84 # AA_mutation[int(mutation[4])] += 1 | |
| 72 | 85 |
| 73 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | 86 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" |
| 74 with open(aa_mutations_file, 'w') as o: | 87 with open(aa_mutations_file, 'w') as o: |
| 75 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | 88 o.write(",".join([str(x) for x in AA_mutation]) + "\n") |
| 76 | 89 |
