bctools: rm_spurious_events.py comparison

comparison rm_spurious_events.py @ 50:0b9aab6aaebf draft

Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40

author	rnateam
date	Tue, 26 Jan 2016 04:38:27 -0500
parents	de4ea3aa1090
children

comparison

equal deleted inserted replaced

-:303f6402a035
+:0b9aab6aaebf
 #!/usr/bin/env python
+import argparse
+import logging
+from subprocess import check_call
+import os
 tool_description = """
 Remove spurious events originating from errors in random sequence tags.
 This script compares all events sharing the same coordinates. Among each group
 of events the maximum number of PCR duplicates is determined. All events that
 are supported by less than 10 percent of this maximum count are removed.
-By default output is written to stdout.
 Input:
 * bed6 file containing crosslinking events with score field set to number of PCR
 duplicates
 * bed6 file with spurious crosslinking events removed, sorted by fields chrom,
 start, stop, strand
 Example usage:
 - remove spurious events from spurious.bed and write results to file cleaned.bed
-rm_spurious_events.py spurious.bed --out cleaned.bed
+rm_spurious_events.py spurious.bed --oufile cleaned.bed
 """
 epilog = """
 Author: Daniel Maticzka
 Copyright: 2015
 License: Apache
 Email: maticzkd@informatik.uni-freiburg.de
 Status: Testing
 """
-import argparse
-import logging
-from sys import stdout
-import pandas as pd
 class DefaultsRawDescriptionHelpFormatter(argparse.ArgumentDefaultsHelpFormatter,
 argparse.RawDescriptionHelpFormatter):
 # To join the behaviour of RawDescriptionHelpFormatter with that of ArgumentDefaultsHelpFormatter
 pass
-# avoid ugly python IOError when stdout output is piped into another program
-# and then truncated (such as piping to head)
-from signal import signal, SIGPIPE, SIG_DFL
-signal(SIGPIPE, SIG_DFL)
-# parse command line arguments
+def main():
-parser = argparse.ArgumentParser(description=tool_description,
+# parse command line arguments
-epilog=epilog,
+parser = argparse.ArgumentParser(description=tool_description,
-formatter_class=DefaultsRawDescriptionHelpFormatter)
+epilog=epilog,
-# positional arguments
+formatter_class=DefaultsRawDescriptionHelpFormatter)
-parser.add_argument(
+# positional arguments
-"events",
+parser.add_argument(
-help="Path to bed6 file containing alignments.")
+"events",
-# optional arguments
+help="Path to bed6 file containing alignments.")
-parser.add_argument(
+# optional arguments
-"-o", "--outfile",
+parser.add_argument(
-help="Write results to this file.")
+"-o", "--outfile",
-parser.add_argument(
+required=True,
-"-t", "--threshold",
+help="Write results to this file.")
-type=float,
+parser.add_argument(
-default=0.1,
+"-t", "--threshold",
-help="Threshold for spurious event removal."
+type=float,
-)
+default=0.1,
-# misc arguments
+help="Threshold for spurious event removal."
-parser.add_argument(
+)
-"-v", "--verbose",
+# misc arguments
-help="Be verbose.",
+parser.add_argument(
-action="store_true")
+"-v", "--verbose",
-parser.add_argument(
+help="Be verbose.",
-"-d", "--debug",
+action="store_true")
-help="Print lots of debugging information",
+parser.add_argument(
-action="store_true")
+"-d", "--debug",
-parser.add_argument(
+help="Print lots of debugging information",
-'--version',
+action="store_true")
-action='version',
+parser.add_argument(
-version='0.1.0')
+'--version',
+action='version',
+version='0.1.0')
 args = parser.parse_args()
 if args.debug:
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
 elif args.verbose:
 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
 else:
 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
 logging.info("Parsed arguments:")
 logging.info("  alignments: '{}'".format(args.events))
 logging.info("  threshold: '{}'".format(args.threshold))
 if args.outfile:
 logging.info("  outfile: enabled writing to file")
 logging.info("  outfile: '{}'".format(args.outfile))
 logging.info("")
 # check threshold parameter value
 if args.threshold < 0 or args.threshold > 1:
 raise ValueError("Threshold must be in [0,1].")
-# load alignments
+if not os.path.isfile(args.events):
-alns = pd.read_csv(
+raise Exception("ERROR: file '{}' not found.")
-args.events,
-sep="\t",
-names=["chrom", "start", "stop", "read_id", "score", "strand"])
-# remove all alignments that not enough PCR duplicates with respect to
+# prepare barcode library
-# the group maximum
+syscall = "cat " + args.events + " | sort -k1,1V -k6,6 -k2,2n -k3,3 -k5,5nr | perl " + os.path.dirname(os.path.realpath(__file__)) + "/rm_spurious_events.pl --frac_max " + str(args.threshold) + "| sort -k1,1V -k2,2n -k3,3n -k6,6 -k4,4 -k5,5nr > " + args.outfile
-grouped = alns.groupby(['chrom', 'start', 'stop', 'strand'], group_keys=False)
+check_call(syscall, shell=True)
-alns_cleaned = grouped.apply(lambda g: g[g["score"] >= args.threshold * g["score"].max()])
-# write coordinates of crosslinking event alignments
-alns_cleaned_out = (open(args.outfile, "w") if args.outfile is not None else stdout)
+if __name__ == "__main__":
-alns_cleaned.to_csv(
+main()
-alns_cleaned_out,
-columns=['chrom', 'start', 'stop', 'read_id', 'score', 'strand'],
-sep="\t", index=False, header=False)
-alns_cleaned_out.close()

Mercurial > repos > rnateam > bctools

comparison rm_spurious_events.py @ 50:0b9aab6aaebf draft