# HG changeset patch # User greg # Date 1460647137 14400 # Node ID 97fb2d36c482c8d3ef4a70c935cdd02383a07975 # Parent a64feed0d508d4711e1dc0257ee334a01fda876e Uploaded diff -r a64feed0d508 -r 97fb2d36c482 fimo_wrapper.py --- a/fimo_wrapper.py Thu Apr 14 09:47:22 2016 -0400 +++ b/fimo_wrapper.py Thu Apr 14 11:18:57 2016 -0400 @@ -119,18 +119,17 @@ gff_file = os.path.join(args.output_path, 'fimo.gff') if args.remove_duplicate_coords == 'yes': tmp_stderr = tempfile.NamedTemporaryFile() - # Sort GFF output by a combination of: score, start and coordinate. - # The output file is specified by -o FILE, and this operation is - # guaranteed safe (the file is read before being overwritten for output). - cmd = 'sort -k6,6n -k4,4n -o %s %s' % (gff_file, gff_file) + # Identify and eliminating identical motif occurrences. These + # are identical if the combination of chrom, start, end and + # motif id are identical. + cmd = 'sort -k1,1 -k4,4n -k5,5n -k9.1,9.6 -u -o %s %s' % (gff_file, gff_file) proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True) returncode = proc.wait() if returncode != 0: stderr = get_stderr(tmp_stderr) stop_err(stderr) - # Sort by chromosome id, identifying and eliminating identical - # motif occurrences. - cmd = 'sort -k1,1 -o %s %s' % (gff_file, gff_file) + # Sort GFF output by a combination of chrom, score, start. + cmd = 'sort -k6,6n -k4,4n -k1,1 -o %s %s' % (gff_file, gff_file) proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True) returncode = proc.wait() if returncode != 0: