changeset 43:7304b91757a8 draft

Uploaded
author davidvanzessen
date Tue, 03 Nov 2015 09:40:00 -0500
parents 9afd8430de2c
children 4b1bab1a9ad2
files mutation_analysis.py wrapper.sh
diffstat 2 files changed, 45 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/mutation_analysis.py	Mon Nov 02 09:48:00 2015 -0500
+++ b/mutation_analysis.py	Tue Nov 03 09:40:00 2015 -0500
@@ -32,43 +32,56 @@
 first = True
 IDlist = []
 mutationList = []
+mutationListByID = {}
 
 with open(infile, 'r') as i:
-    for line in i:
-        if first:
-            linesplt = line.split("\t")
-            IDIndex = linesplt.index("Sequence.ID")
-            best_matchIndex = linesplt.index("best_match")
-            fr1Index = linesplt.index("FR1.IMGT")
-            cdr1Index = linesplt.index("CDR1.IMGT")
-            fr2Index = linesplt.index("FR2.IMGT")
-            cdr2Index = linesplt.index("CDR2.IMGT")
-            fr3Index = linesplt.index("FR3.IMGT")
-            first = False
-            continue
-        linecount += 1
-        linesplt = line.split("\t")
-        ID = linesplt[IDIndex]
-        genedic[ID] = linesplt[best_matchIndex]
-        mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if
-                                    x] if include_fr1 else []
-        mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
-        mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
-        mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
-        mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
-        mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
+	for line in i:
+		if first:
+			linesplt = line.split("\t")
+			IDIndex = linesplt.index("Sequence.ID")
+			best_matchIndex = linesplt.index("best_match")
+			fr1Index = linesplt.index("FR1.IMGT")
+			cdr1Index = linesplt.index("CDR1.IMGT")
+			fr2Index = linesplt.index("FR2.IMGT")
+			cdr2Index = linesplt.index("CDR2.IMGT")
+			fr3Index = linesplt.index("FR3.IMGT")
+			first = False
+			continue
+		linecount += 1
+		linesplt = line.split("\t")
+		ID = linesplt[IDIndex]
+		genedic[ID] = linesplt[best_matchIndex]
+		mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if
+									x] if include_fr1 else []
+		mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
+		mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
+		mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+		mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
+		mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
 
-        mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[
-            ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+		mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+		mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 
-        IDlist += [ID]
+		IDlist += [ID]
+
+AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1)  # [4] is the position of the AA mutation, None if silent
+AA_mutation_empty = AA_mutation[:]
 
-AA_mutation = [0] * (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[
-                             4]) + 1)  # [4] is the position of the AA mutation, None if silent
+aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt"
+with open(aa_mutations_by_id_file, 'w') as o:
+	for ID in mutationListByID.keys():
+		AA_mutation_for_ID = AA_mutation_empty[:]
+		for mutation in mutationListByID[ID]:
+			if mutation[4]:
+				AA_mutation[int(mutation[4])] += 1
+				AA_mutation_for_ID[int(mutation[4])] += 1
+		o.write(ID + "," + ",".join([str(x) for x in AA_mutation_for_ID]) + "\n")
 
-for mutation in mutationList:
-    if mutation[4]:  # if non silent mutation
-        AA_mutation[int(mutation[4])] += 1
+
+
+#for mutation in mutationList:
+#    if mutation[4]:  # if non silent mutation
+#        AA_mutation[int(mutation[4])] += 1
 
 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt"
 with open(aa_mutations_file, 'w') as o:
--- a/wrapper.sh	Mon Nov 02 09:48:00 2015 -0500
+++ b/wrapper.sh	Tue Nov 03 09:40:00 2015 -0500
@@ -89,7 +89,7 @@
 	fi
 done < $outdir/result.txt
 echo "</table>" >> $output
-echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
+echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
 
 
 echo "<img src='all.png'/><br />" >> $output