Mercurial > repos > rnateam > bctools
comparison extract_bcs.py @ 7:bb59215dfd8f draft
Uploaded
author | rnateam |
---|---|
date | Tue, 10 Nov 2015 08:12:25 -0500 |
parents | e841de88235c |
children | 0b9aab6aaebf |
comparison
equal
deleted
inserted
replaced
6:1bfc5a5de843 | 7:bb59215dfd8f |
---|---|
6 By default output is written to stdout. | 6 By default output is written to stdout. |
7 | 7 |
8 Example usage: | 8 Example usage: |
9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified | 9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified |
10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa: | 10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa: |
11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fa | 11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fastq |
12 """ | 12 """ |
13 | 13 |
14 epilog = """ | 14 epilog = """ |
15 Author: Daniel Maticzka | 15 Author: Daniel Maticzka |
16 Copyright: 2015 | 16 Copyright: 2015 |
46 "-o", "--outfile", | 46 "-o", "--outfile", |
47 help="Write results to this file.") | 47 help="Write results to this file.") |
48 parser.add_argument( | 48 parser.add_argument( |
49 "-b", "--bcs", | 49 "-b", "--bcs", |
50 dest="out_bc_fasta", | 50 dest="out_bc_fasta", |
51 help="If set, barcodes are written to this file in FASTA format.") | 51 help="Write barcodes to this file in FASTQ format.") |
52 parser.add_argument( | |
53 "--fasta-barcodes", | |
54 dest="save_bcs_as_fa", | |
55 action="store_true", | |
56 help="Save extracted barcodes in FASTA format.") | |
52 parser.add_argument( | 57 parser.add_argument( |
53 "-a", "--add-bc-to-fastq", | 58 "-a", "--add-bc-to-fastq", |
54 dest="add_to_head", | 59 dest="add_to_head", |
55 help="If set, append extracted barcodes to the FASTQ headers.", | 60 help="Append extracted barcodes to the FASTQ headers.", |
56 action="store_true" | 61 action="store_true") |
57 ) | |
58 parser.add_argument( | 62 parser.add_argument( |
59 "-v", "--verbose", | 63 "-v", "--verbose", |
60 help="Be verbose.", | 64 help="Be verbose.", |
61 action="store_true") | 65 action="store_true") |
62 parser.add_argument( | 66 parser.add_argument( |
64 help="Print lots of debugging information", | 68 help="Print lots of debugging information", |
65 action="store_true") | 69 action="store_true") |
66 parser.add_argument( | 70 parser.add_argument( |
67 '--version', | 71 '--version', |
68 action='version', | 72 action='version', |
69 version='1.0.0') | 73 version='0.1.0') |
70 | 74 |
71 args = parser.parse_args() | 75 args = parser.parse_args() |
72 if args.debug: | 76 if args.debug: |
73 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | 77 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") |
74 elif args.verbose: | 78 elif args.verbose: |
80 logging.info(" pattern: '{}'".format(args.pattern)) | 84 logging.info(" pattern: '{}'".format(args.pattern)) |
81 if args.outfile: | 85 if args.outfile: |
82 logging.info(" outfile: enabled writing to file") | 86 logging.info(" outfile: enabled writing to file") |
83 logging.info(" outfile: '{}'".format(args.outfile)) | 87 logging.info(" outfile: '{}'".format(args.outfile)) |
84 if args.out_bc_fasta: | 88 if args.out_bc_fasta: |
85 logging.info(" bcs: enabled writing barcodes to fasta file") | 89 logging.info(" bcs: enabled writing barcodes to fastq file") |
86 logging.info(" bcs: {}".format(args.out_bc_fasta)) | 90 logging.info(" bcs: {}".format(args.out_bc_fasta)) |
91 if args.save_bcs_as_fa: | |
92 logging.info(" fasta-barcodes: write barcodes in fasta format instead of fastq") | |
87 logging.info("") | 93 logging.info("") |
88 | 94 |
89 # check if supplied pattern is valid | 95 # check if supplied pattern is valid |
90 valid_pattern = re.compile("^[XN]+$") | 96 valid_pattern = re.compile("^[XN]+$") |
91 pattern_match = valid_pattern.match(args.pattern) | 97 pattern_match = valid_pattern.match(args.pattern) |
134 logging.debug("len(seq): {}".format(len(seq))) | 140 logging.debug("len(seq): {}".format(len(seq))) |
135 continue | 141 continue |
136 | 142 |
137 # extract barcode nucleotides | 143 # extract barcode nucleotides |
138 barcode_list = [] | 144 barcode_list = [] |
145 barcode_qual_list = [] | |
139 for bcstart, bcstop in barcode_positions: | 146 for bcstart, bcstop in barcode_positions: |
140 barcode_list.append(seq[bcstart:bcstop]) | 147 barcode_list.append(seq[bcstart:bcstop]) |
148 barcode_qual_list.append(qual[bcstart:bcstop]) | |
141 barcode = "".join(barcode_list) | 149 barcode = "".join(barcode_list) |
150 barcode_quals = "".join(barcode_qual_list) | |
142 logging.debug("extracted barcode: {}".format(barcode)) | 151 logging.debug("extracted barcode: {}".format(barcode)) |
143 | 152 |
144 # create new sequence and quality string without barcode nucleotides | 153 # create new sequence and quality string without barcode nucleotides |
145 new_seq_list = [] | 154 new_seq_list = [] |
146 new_qual_list = [] | 155 new_qual_list = [] |
165 annotated_header = header | 174 annotated_header = header |
166 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual)) | 175 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual)) |
167 | 176 |
168 # write barcode to fasta if requested | 177 # write barcode to fasta if requested |
169 if args.out_bc_fasta is not None: | 178 if args.out_bc_fasta is not None: |
170 faout.write(">{}\n{}\n".format(header, barcode)) | 179 if args.save_bcs_as_fa: |
180 faout.write(">{}\n{}\n".format(header, barcode)) | |
181 else: | |
182 faout.write("@{}\n{}\n+\n{}\n".format(header, barcode, barcode_quals)) | |
171 | 183 |
172 # close files | 184 # close files |
173 samout.close() | 185 samout.close() |
174 if args.out_bc_fasta is not None: | 186 if args.out_bc_fasta is not None: |
175 faout.close() | 187 faout.close() |