# HG changeset patch
# User davidvanzessen
# Date 1404740710 14400
# Node ID f2c4c7151016c248062893fa58776ecbdfff5721
# Parent 07a23652bc2a9ebc7c490b4e48741779176e1d5e
Uploaded
diff -r 07a23652bc2a -r f2c4c7151016 experimental_design.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.py Mon Jul 07 09:45:10 2014 -0400
@@ -0,0 +1,44 @@
+import sys
+import pandas as pd
+
+def main():
+ patients = {}
+ files = []
+ sample_id = sys.argv[1]
+ imgt_files = 0
+ blast_files = 0
+ #organize files
+ for arg in sys.argv[2:-2]:
+ if arg.find("/") is -1:
+ patients[sample_id] = files
+ files = []
+ sample_id = arg
+ else:
+ df = pd.read_csv(arg, sep="\t")
+ if "Functionality" in list(df.columns.values):
+ df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon"
+ imgt_files += 1
+ else:
+ blast_files += 1
+ files.append(df)
+ patients[sample_id] = files
+ columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate']
+ if blast_files is not 0:
+ print "Has a parsed blastn file, using limited columns."
+ columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate']
+
+ result = None
+ for patient_id, samples in patients.iteritems():
+ count = 1
+ for sample in samples:
+ sample['Sample'] = patient_id
+ sample['Replicate'] = str(count)
+ count += 1
+ if result is None:
+ result = sample[columns]
+ else:
+ result = result.append(sample[columns])
+ result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index")
+
+if __name__ == "__main__":
+ main()
diff -r 07a23652bc2a -r f2c4c7151016 experimental_design.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.xml Mon Jul 07 09:45:10 2014 -0400
@@ -0,0 +1,29 @@
+
+
+
+ experimental_design.py
+ #for $i, $f in enumerate($patients)
+ "$f.id"
+ #for $j, $g in enumerate($f.samples)
+ ${g.sample}
+ #end for
+
+ #end for
+ --output $out_file
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Step 3 of the Immune Repertoire tools, merges the parsed reports generated in step 2 into one file with an Sample ID.
+
+
+
diff -r 07a23652bc2a -r f2c4c7151016 igblastmerge.py
--- a/igblastmerge.py Tue Mar 25 06:59:26 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-import sys
-import pandas as pd
-
-def main():
- patients = {}
- files = []
- sample_id = sys.argv[1]
- imgt_files = 0
- blast_files = 0
- #organize files
- for arg in sys.argv[2:-2]:
- if arg.find("/") is -1:
- patients[sample_id] = files
- files = []
- sample_id = arg
- else:
- df = pd.read_csv(arg, sep="\t")
- if "Functionality" in list(df.columns.values):
- df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon"
- imgt_files += 1
- else:
- blast_files += 1
- files.append(df)
- patients[sample_id] = files
- columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate']
- if blast_files is not 0:
- print "Has a parsed blastn file, using limited columns."
- columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate']
-
- result = None
- for patient_id, samples in patients.iteritems():
- count = 1
- for sample in samples:
- sample['Sample'] = patient_id
- sample['Replicate'] = str(count)
- count += 1
- if result is None:
- result = sample[columns]
- else:
- result = result.append(sample[columns])
- result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index")
-
-if __name__ == "__main__":
- main()
diff -r 07a23652bc2a -r f2c4c7151016 igblastmerge.xml
--- a/igblastmerge.xml Tue Mar 25 06:59:26 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-
-
-
- igblastmerge.py
- #for $i, $f in enumerate($patients)
- "$f.id"
- #for $j, $g in enumerate($f.samples)
- ${g.sample}
- #end for
-
- #end for
- --output $out_file
-
-
-
-
-
-
-
-
-
-
-
-
-
- Step 3 of the Immune Repertoire tools, merges the parsed reports generated in step 2 into one file with an Sample ID.
-
-
-