Mercurial > repos > rnateam > bctools
comparison remove_tail.py @ 2:de4ea3aa1090 draft
Uploaded
| author | rnateam |
|---|---|
| date | Thu, 22 Oct 2015 10:26:45 -0400 |
| parents | |
| children | 0b9aab6aaebf |
comparison
equal
deleted
inserted
replaced
| 1:ae0f58d3318f | 2:de4ea3aa1090 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 tool_description = """ | |
| 4 Remove a certain number of nucleotides from the 3'-tails of sequences in fastq | |
| 5 format. | |
| 6 | |
| 7 Example usage: | |
| 8 - remove the last 7 nucleotides from file input.fastq, write result to file | |
| 9 output.fastq: | |
| 10 remove_tail.py input.fastq 7 --out output.fastq | |
| 11 """ | |
| 12 | |
| 13 epilog = """ | |
| 14 Author: Daniel Maticzka | |
| 15 Copyright: 2015 | |
| 16 License: Apache | |
| 17 Email: maticzkd@informatik.uni-freiburg.de | |
| 18 Status: Testing | |
| 19 """ | |
| 20 | |
| 21 import argparse | |
| 22 import logging | |
| 23 from sys import stdout | |
| 24 from Bio.SeqIO.QualityIO import FastqGeneralIterator | |
| 25 | |
| 26 # avoid ugly python IOError when stdout output is piped into another program | |
| 27 # and then truncated (such as piping to head) | |
| 28 from signal import signal, SIGPIPE, SIG_DFL | |
| 29 signal(SIGPIPE, SIG_DFL) | |
| 30 | |
| 31 # parse command line arguments | |
| 32 parser = argparse.ArgumentParser(description=tool_description, | |
| 33 epilog=epilog, | |
| 34 formatter_class=argparse.RawDescriptionHelpFormatter) | |
| 35 # positional arguments | |
| 36 parser.add_argument( | |
| 37 "infile", | |
| 38 help="Path to fastq file.") | |
| 39 parser.add_argument( | |
| 40 "length", | |
| 41 type=int, | |
| 42 help="Remove this many nts.") | |
| 43 # optional arguments | |
| 44 parser.add_argument( | |
| 45 "-o", "--outfile", | |
| 46 help="Write results to this file.") | |
| 47 parser.add_argument( | |
| 48 "-v", "--verbose", | |
| 49 help="Be verbose.", | |
| 50 action="store_true") | |
| 51 parser.add_argument( | |
| 52 "-d", "--debug", | |
| 53 help="Print lots of debugging information", | |
| 54 action="store_true") | |
| 55 parser.add_argument( | |
| 56 '--version', | |
| 57 action='version', | |
| 58 version='0.1.0') | |
| 59 | |
| 60 args = parser.parse_args() | |
| 61 if args.debug: | |
| 62 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | |
| 63 elif args.verbose: | |
| 64 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") | |
| 65 else: | |
| 66 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") | |
| 67 logging.info("Parsed arguments:") | |
| 68 logging.info(" infile: '{}'".format(args.infile)) | |
| 69 logging.info(" length: '{}'".format(args.length)) | |
| 70 if args.outfile: | |
| 71 logging.info(" outfile: enabled writing to file") | |
| 72 logging.info(" outfile: '{}'".format(args.outfile)) | |
| 73 logging.info("") | |
| 74 | |
| 75 # check length parameter | |
| 76 if args.length < 0: | |
| 77 raise ValueError("Length must be a positive integer, is '{}'.".format(args.length)) | |
| 78 | |
| 79 # remove tail | |
| 80 with (open(args.outfile, "w") if args.outfile is not None else stdout) as samout: | |
| 81 for header, seq, qual in FastqGeneralIterator(open(args.infile)): | |
| 82 | |
| 83 # if removing tail would lead to an empty sequence, | |
| 84 # set sequence to a single N to keep fastq synchronized | |
| 85 if len(seq) <= args.length: | |
| 86 seq = "N" | |
| 87 qual = "B" | |
| 88 logging.debug("read '{}' was too short to remove full tail".format(header)) | |
| 89 logging.debug("seq: {}".format(seq)) | |
| 90 logging.debug("len(seq): {}".format(len(seq))) | |
| 91 else: | |
| 92 seq = seq[0:-args.length] | |
| 93 qual = qual[0:-args.length] | |
| 94 | |
| 95 samout.write("@%s\n%s\n+\n%s\n" % (header, seq, qual)) |
