diff merge_deletions.py @ 0:ff6683f8e9a1 draft

planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author mvdbeek
date Mon, 23 Jan 2017 10:05:02 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_deletions.py	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,59 @@
+#! /usr/bin/env python
+
+import os
+from argparse import ArgumentParser
+
+def create_master_dict(sample, fname):
+    with open(fname, 'r') as masterfile:
+        x = 0
+        master_dict = {}
+        for line in masterfile:
+            field = line.rsplit()
+            if not line[0] == 'ins_chr':
+                coords = '\t'.join(field[:5])
+                master_dict[x] = {'coords': coords, 'accessions': [sample]}
+                x += 1
+        return master_dict
+
+
+def merge_deletions(master, fname, sample):
+    with open(fname, 'r') as infile:
+        for line in infile:
+            field = line.rsplit()
+            coords = '\t'.join(field[:5])
+            i = len(master)-1
+            x = 0
+            while x <= i:
+                if master[x]['coords'] == coords:
+                    master[x]['accessions'].append(sample)
+                    break
+                elif x == i:
+                    master[x+1] = {'coords': coords, 'accessions': [sample]}
+                    break
+                else:
+                    x += 1
+
+
+def save_deletions(master, outf):
+    with open(outf, 'w+') as outfile:
+        for key, value in master.iteritems():
+            accessions = set(value['accessions'])
+            outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions)))
+
+def get_name_from_filename(filename):
+    return os.path.basename(filename).rsplit('.', 1)[0]
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description='Merge TE deletions calls')
+    parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True)
+    parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True)
+    options = parser.parse_args()
+
+    first_file = options.input[0]
+    first_samplename = get_name_from_filename(first_file)
+    master_dictionary = create_master_dict(first_samplename, first_file)
+    for filename in options.input[1:]:
+        samplename = get_name_from_filename(filename)
+        merge_deletions(master_dictionary, filename, samplename)
+    save_deletions(master_dictionary, options.output)