# HG changeset patch
# User davidvanzessen
# Date 1446561600 18000
# Node ID 7304b91757a841536ef619946019dbb82fc2274d
# Parent 9afd8430de2cadef1b3b6252ffd201a053f59ce8
Uploaded
diff -r 9afd8430de2c -r 7304b91757a8 mutation_analysis.py
--- a/mutation_analysis.py Mon Nov 02 09:48:00 2015 -0500
+++ b/mutation_analysis.py Tue Nov 03 09:40:00 2015 -0500
@@ -32,43 +32,56 @@
first = True
IDlist = []
mutationList = []
+mutationListByID = {}
with open(infile, 'r') as i:
- for line in i:
- if first:
- linesplt = line.split("\t")
- IDIndex = linesplt.index("Sequence.ID")
- best_matchIndex = linesplt.index("best_match")
- fr1Index = linesplt.index("FR1.IMGT")
- cdr1Index = linesplt.index("CDR1.IMGT")
- fr2Index = linesplt.index("FR2.IMGT")
- cdr2Index = linesplt.index("CDR2.IMGT")
- fr3Index = linesplt.index("FR3.IMGT")
- first = False
- continue
- linecount += 1
- linesplt = line.split("\t")
- ID = linesplt[IDIndex]
- genedic[ID] = linesplt[best_matchIndex]
- mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if
- x] if include_fr1 else []
- mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
- mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
- mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
- mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
- mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
+ for line in i:
+ if first:
+ linesplt = line.split("\t")
+ IDIndex = linesplt.index("Sequence.ID")
+ best_matchIndex = linesplt.index("best_match")
+ fr1Index = linesplt.index("FR1.IMGT")
+ cdr1Index = linesplt.index("CDR1.IMGT")
+ fr2Index = linesplt.index("FR2.IMGT")
+ cdr2Index = linesplt.index("CDR2.IMGT")
+ fr3Index = linesplt.index("FR3.IMGT")
+ first = False
+ continue
+ linecount += 1
+ linesplt = line.split("\t")
+ ID = linesplt[IDIndex]
+ genedic[ID] = linesplt[best_matchIndex]
+ mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if
+ x] if include_fr1 else []
+ mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
+ mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
+ mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+ mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
+ mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
- mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[
- ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+ mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+ mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
- IDlist += [ID]
+ IDlist += [ID]
+
+AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent
+AA_mutation_empty = AA_mutation[:]
-AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[
- 4]) + 1) # [4] is the position of the AA mutation, None if silent
+aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt"
+with open(aa_mutations_by_id_file, 'w') as o:
+ for ID in mutationListByID.keys():
+ AA_mutation_for_ID = AA_mutation_empty[:]
+ for mutation in mutationListByID[ID]:
+ if mutation[4]:
+ AA_mutation[int(mutation[4])] += 1
+ AA_mutation_for_ID[int(mutation[4])] += 1
+ o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n")
-for mutation in mutationList:
- if mutation[4]: # if non silent mutation
- AA_mutation[int(mutation[4])] += 1
+
+
+#for mutation in mutationList:
+# if mutation[4]: # if non silent mutation
+# AA_mutation[int(mutation[4])] += 1
aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt"
with open(aa_mutations_file, 'w') as o:
diff -r 9afd8430de2c -r 7304b91757a8 wrapper.sh
--- a/wrapper.sh Mon Nov 02 09:48:00 2015 -0500
+++ b/wrapper.sh Tue Nov 03 09:40:00 2015 -0500
@@ -89,7 +89,7 @@
fi
done < $outdir/result.txt
echo "" >> $output
-echo "unmatched
motif per sequence
all data
mutations by id
" >> $output
+echo "unmatched
motif per sequence
all data
mutations by id
AA mutations location by id
" >> $output
echo "
" >> $output