changeset 2:97fb2d36c482 draft

Uploaded
author greg
date Thu, 14 Apr 2016 11:18:57 -0400
parents a64feed0d508
children ce656b846c8e
files fimo_wrapper.py
diffstat 1 files changed, 6 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/fimo_wrapper.py	Thu Apr 14 09:47:22 2016 -0400
+++ b/fimo_wrapper.py	Thu Apr 14 11:18:57 2016 -0400
@@ -119,18 +119,17 @@
 gff_file = os.path.join(args.output_path, 'fimo.gff')
 if args.remove_duplicate_coords == 'yes':
     tmp_stderr = tempfile.NamedTemporaryFile()
-    # Sort GFF output by a combination of: score, start and coordinate.
-    # The output file is specified by -o FILE, and this operation is
-    # guaranteed safe (the file is read before being overwritten for output).
-    cmd = 'sort -k6,6n -k4,4n -o %s %s' % (gff_file, gff_file)
+    # Identify and eliminating identical motif occurrences.  These
+    # are identical if the combination of chrom, start, end and
+    # motif id are identical.
+    cmd = 'sort -k1,1 -k4,4n -k5,5n -k9.1,9.6 -u -o %s %s' % (gff_file, gff_file)
     proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)
     returncode = proc.wait()
     if returncode != 0:
         stderr = get_stderr(tmp_stderr)
         stop_err(stderr)
-    # Sort by chromosome id, identifying and eliminating identical
-    # motif occurrences.
-    cmd = 'sort -k1,1 -o %s %s' % (gff_file, gff_file)
+    # Sort GFF output by a combination of chrom, score, start.
+    cmd = 'sort -k6,6n -k4,4n -k1,1 -o %s %s' % (gff_file, gff_file)
     proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)
     returncode = proc.wait()
     if returncode != 0: