# HG changeset patch # User davidvanzessen # Date 1446561600 18000 # Node ID 7304b91757a841536ef619946019dbb82fc2274d # Parent 9afd8430de2cadef1b3b6252ffd201a053f59ce8 Uploaded diff -r 9afd8430de2c -r 7304b91757a8 mutation_analysis.py --- a/mutation_analysis.py Mon Nov 02 09:48:00 2015 -0500 +++ b/mutation_analysis.py Tue Nov 03 09:40:00 2015 -0500 @@ -32,43 +32,56 @@ first = True IDlist = [] mutationList = [] +mutationListByID = {} with open(infile, 'r') as i: - for line in i: - if first: - linesplt = line.split("\t") - IDIndex = linesplt.index("Sequence.ID") - best_matchIndex = linesplt.index("best_match") - fr1Index = linesplt.index("FR1.IMGT") - cdr1Index = linesplt.index("CDR1.IMGT") - fr2Index = linesplt.index("FR2.IMGT") - cdr2Index = linesplt.index("CDR2.IMGT") - fr3Index = linesplt.index("FR3.IMGT") - first = False - continue - linecount += 1 - linesplt = line.split("\t") - ID = linesplt[IDIndex] - genedic[ID] = linesplt[best_matchIndex] - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if - x] if include_fr1 else [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] - mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + for line in i: + if first: + linesplt = line.split("\t") + IDIndex = linesplt.index("Sequence.ID") + best_matchIndex = linesplt.index("best_match") + fr1Index = linesplt.index("FR1.IMGT") + cdr1Index = linesplt.index("CDR1.IMGT") + fr2Index = linesplt.index("FR2.IMGT") + cdr2Index = linesplt.index("CDR2.IMGT") + fr3Index = linesplt.index("FR3.IMGT") + first = False + continue + linecount += 1 + linesplt = line.split("\t") + ID = linesplt[IDIndex] + genedic[ID] = linesplt[best_matchIndex] + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if + x] if include_fr1 else [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ - ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - IDlist += [ID] + IDlist += [ID] + +AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent +AA_mutation_empty = AA_mutation[:] -AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[ - 4]) + 1) # [4] is the position of the AA mutation, None if silent +aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" +with open(aa_mutations_by_id_file, 'w') as o: + for ID in mutationListByID.keys(): + AA_mutation_for_ID = AA_mutation_empty[:] + for mutation in mutationListByID[ID]: + if mutation[4]: + AA_mutation[int(mutation[4])] += 1 + AA_mutation_for_ID[int(mutation[4])] += 1 + o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n") -for mutation in mutationList: - if mutation[4]: # if non silent mutation - AA_mutation[int(mutation[4])] += 1 + + +#for mutation in mutationList: +# if mutation[4]: # if non silent mutation +# AA_mutation[int(mutation[4])] += 1 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" with open(aa_mutations_file, 'w') as o: diff -r 9afd8430de2c -r 7304b91757a8 wrapper.sh --- a/wrapper.sh Mon Nov 02 09:48:00 2015 -0500 +++ b/wrapper.sh Tue Nov 03 09:40:00 2015 -0500 @@ -89,7 +89,7 @@ fi done < $outdir/result.txt echo "" >> $output -echo "unmatched
motif per sequence
all data
mutations by id
" >> $output +echo "unmatched
motif per sequence
all data
mutations by id
AA mutations location by id
" >> $output echo "
" >> $output