Mercurial > repos > davidvanzessen > mutation_analysis
changeset 43:7304b91757a8 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 03 Nov 2015 09:40:00 -0500 |
parents | 9afd8430de2c |
children | 4b1bab1a9ad2 |
files | mutation_analysis.py wrapper.sh |
diffstat | 2 files changed, 45 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/mutation_analysis.py Mon Nov 02 09:48:00 2015 -0500 +++ b/mutation_analysis.py Tue Nov 03 09:40:00 2015 -0500 @@ -32,43 +32,56 @@ first = True IDlist = [] mutationList = [] +mutationListByID = {} with open(infile, 'r') as i: - for line in i: - if first: - linesplt = line.split("\t") - IDIndex = linesplt.index("Sequence.ID") - best_matchIndex = linesplt.index("best_match") - fr1Index = linesplt.index("FR1.IMGT") - cdr1Index = linesplt.index("CDR1.IMGT") - fr2Index = linesplt.index("FR2.IMGT") - cdr2Index = linesplt.index("CDR2.IMGT") - fr3Index = linesplt.index("FR3.IMGT") - first = False - continue - linecount += 1 - linesplt = line.split("\t") - ID = linesplt[IDIndex] - genedic[ID] = linesplt[best_matchIndex] - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if - x] if include_fr1 else [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] - mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + for line in i: + if first: + linesplt = line.split("\t") + IDIndex = linesplt.index("Sequence.ID") + best_matchIndex = linesplt.index("best_match") + fr1Index = linesplt.index("FR1.IMGT") + cdr1Index = linesplt.index("CDR1.IMGT") + fr2Index = linesplt.index("FR2.IMGT") + cdr2Index = linesplt.index("CDR2.IMGT") + fr3Index = linesplt.index("FR3.IMGT") + first = False + continue + linecount += 1 + linesplt = line.split("\t") + ID = linesplt[IDIndex] + genedic[ID] = linesplt[best_matchIndex] + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if + x] if include_fr1 else [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ - ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - IDlist += [ID] + IDlist += [ID] + +AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent +AA_mutation_empty = AA_mutation[:] -AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[ - 4]) + 1) # [4] is the position of the AA mutation, None if silent +aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" +with open(aa_mutations_by_id_file, 'w') as o: + for ID in mutationListByID.keys(): + AA_mutation_for_ID = AA_mutation_empty[:] + for mutation in mutationListByID[ID]: + if mutation[4]: + AA_mutation[int(mutation[4])] += 1 + AA_mutation_for_ID[int(mutation[4])] += 1 + o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n") -for mutation in mutationList: - if mutation[4]: # if non silent mutation - AA_mutation[int(mutation[4])] += 1 + + +#for mutation in mutationList: +# if mutation[4]: # if non silent mutation +# AA_mutation[int(mutation[4])] += 1 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" with open(aa_mutations_file, 'w') as o:
--- a/wrapper.sh Mon Nov 02 09:48:00 2015 -0500 +++ b/wrapper.sh Tue Nov 03 09:40:00 2015 -0500 @@ -89,7 +89,7 @@ fi done < $outdir/result.txt echo "</table>" >> $output -echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output +echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output echo "<img src='all.png'/><br />" >> $output