Mercurial > repos > rnateam > bctools
comparison remove_tail.py @ 2:de4ea3aa1090 draft
Uploaded
author | rnateam |
---|---|
date | Thu, 22 Oct 2015 10:26:45 -0400 |
parents | |
children | 0b9aab6aaebf |
comparison
equal
deleted
inserted
replaced
1:ae0f58d3318f | 2:de4ea3aa1090 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 tool_description = """ | |
4 Remove a certain number of nucleotides from the 3'-tails of sequences in fastq | |
5 format. | |
6 | |
7 Example usage: | |
8 - remove the last 7 nucleotides from file input.fastq, write result to file | |
9 output.fastq: | |
10 remove_tail.py input.fastq 7 --out output.fastq | |
11 """ | |
12 | |
13 epilog = """ | |
14 Author: Daniel Maticzka | |
15 Copyright: 2015 | |
16 License: Apache | |
17 Email: maticzkd@informatik.uni-freiburg.de | |
18 Status: Testing | |
19 """ | |
20 | |
21 import argparse | |
22 import logging | |
23 from sys import stdout | |
24 from Bio.SeqIO.QualityIO import FastqGeneralIterator | |
25 | |
26 # avoid ugly python IOError when stdout output is piped into another program | |
27 # and then truncated (such as piping to head) | |
28 from signal import signal, SIGPIPE, SIG_DFL | |
29 signal(SIGPIPE, SIG_DFL) | |
30 | |
31 # parse command line arguments | |
32 parser = argparse.ArgumentParser(description=tool_description, | |
33 epilog=epilog, | |
34 formatter_class=argparse.RawDescriptionHelpFormatter) | |
35 # positional arguments | |
36 parser.add_argument( | |
37 "infile", | |
38 help="Path to fastq file.") | |
39 parser.add_argument( | |
40 "length", | |
41 type=int, | |
42 help="Remove this many nts.") | |
43 # optional arguments | |
44 parser.add_argument( | |
45 "-o", "--outfile", | |
46 help="Write results to this file.") | |
47 parser.add_argument( | |
48 "-v", "--verbose", | |
49 help="Be verbose.", | |
50 action="store_true") | |
51 parser.add_argument( | |
52 "-d", "--debug", | |
53 help="Print lots of debugging information", | |
54 action="store_true") | |
55 parser.add_argument( | |
56 '--version', | |
57 action='version', | |
58 version='0.1.0') | |
59 | |
60 args = parser.parse_args() | |
61 if args.debug: | |
62 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | |
63 elif args.verbose: | |
64 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") | |
65 else: | |
66 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") | |
67 logging.info("Parsed arguments:") | |
68 logging.info(" infile: '{}'".format(args.infile)) | |
69 logging.info(" length: '{}'".format(args.length)) | |
70 if args.outfile: | |
71 logging.info(" outfile: enabled writing to file") | |
72 logging.info(" outfile: '{}'".format(args.outfile)) | |
73 logging.info("") | |
74 | |
75 # check length parameter | |
76 if args.length < 0: | |
77 raise ValueError("Length must be a positive integer, is '{}'.".format(args.length)) | |
78 | |
79 # remove tail | |
80 with (open(args.outfile, "w") if args.outfile is not None else stdout) as samout: | |
81 for header, seq, qual in FastqGeneralIterator(open(args.infile)): | |
82 | |
83 # if removing tail would lead to an empty sequence, | |
84 # set sequence to a single N to keep fastq synchronized | |
85 if len(seq) <= args.length: | |
86 seq = "N" | |
87 qual = "B" | |
88 logging.debug("read '{}' was too short to remove full tail".format(header)) | |
89 logging.debug("seq: {}".format(seq)) | |
90 logging.debug("len(seq): {}".format(len(seq))) | |
91 else: | |
92 seq = seq[0:-args.length] | |
93 qual = qual[0:-args.length] | |
94 | |
95 samout.write("@%s\n%s\n+\n%s\n" % (header, seq, qual)) |