comparison extract_bcs.py @ 7:bb59215dfd8f draft

Uploaded
author rnateam
date Tue, 10 Nov 2015 08:12:25 -0500
parents e841de88235c
children 0b9aab6aaebf
comparison
equal deleted inserted replaced
6:1bfc5a5de843 7:bb59215dfd8f
6 By default output is written to stdout. 6 By default output is written to stdout.
7 7
8 Example usage: 8 Example usage:
9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified 9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified
10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa: 10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa:
11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fa 11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fastq
12 """ 12 """
13 13
14 epilog = """ 14 epilog = """
15 Author: Daniel Maticzka 15 Author: Daniel Maticzka
16 Copyright: 2015 16 Copyright: 2015
46 "-o", "--outfile", 46 "-o", "--outfile",
47 help="Write results to this file.") 47 help="Write results to this file.")
48 parser.add_argument( 48 parser.add_argument(
49 "-b", "--bcs", 49 "-b", "--bcs",
50 dest="out_bc_fasta", 50 dest="out_bc_fasta",
51 help="If set, barcodes are written to this file in FASTA format.") 51 help="Write barcodes to this file in FASTQ format.")
52 parser.add_argument(
53 "--fasta-barcodes",
54 dest="save_bcs_as_fa",
55 action="store_true",
56 help="Save extracted barcodes in FASTA format.")
52 parser.add_argument( 57 parser.add_argument(
53 "-a", "--add-bc-to-fastq", 58 "-a", "--add-bc-to-fastq",
54 dest="add_to_head", 59 dest="add_to_head",
55 help="If set, append extracted barcodes to the FASTQ headers.", 60 help="Append extracted barcodes to the FASTQ headers.",
56 action="store_true" 61 action="store_true")
57 )
58 parser.add_argument( 62 parser.add_argument(
59 "-v", "--verbose", 63 "-v", "--verbose",
60 help="Be verbose.", 64 help="Be verbose.",
61 action="store_true") 65 action="store_true")
62 parser.add_argument( 66 parser.add_argument(
64 help="Print lots of debugging information", 68 help="Print lots of debugging information",
65 action="store_true") 69 action="store_true")
66 parser.add_argument( 70 parser.add_argument(
67 '--version', 71 '--version',
68 action='version', 72 action='version',
69 version='1.0.0') 73 version='0.1.0')
70 74
71 args = parser.parse_args() 75 args = parser.parse_args()
72 if args.debug: 76 if args.debug:
73 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") 77 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
74 elif args.verbose: 78 elif args.verbose:
80 logging.info(" pattern: '{}'".format(args.pattern)) 84 logging.info(" pattern: '{}'".format(args.pattern))
81 if args.outfile: 85 if args.outfile:
82 logging.info(" outfile: enabled writing to file") 86 logging.info(" outfile: enabled writing to file")
83 logging.info(" outfile: '{}'".format(args.outfile)) 87 logging.info(" outfile: '{}'".format(args.outfile))
84 if args.out_bc_fasta: 88 if args.out_bc_fasta:
85 logging.info(" bcs: enabled writing barcodes to fasta file") 89 logging.info(" bcs: enabled writing barcodes to fastq file")
86 logging.info(" bcs: {}".format(args.out_bc_fasta)) 90 logging.info(" bcs: {}".format(args.out_bc_fasta))
91 if args.save_bcs_as_fa:
92 logging.info(" fasta-barcodes: write barcodes in fasta format instead of fastq")
87 logging.info("") 93 logging.info("")
88 94
89 # check if supplied pattern is valid 95 # check if supplied pattern is valid
90 valid_pattern = re.compile("^[XN]+$") 96 valid_pattern = re.compile("^[XN]+$")
91 pattern_match = valid_pattern.match(args.pattern) 97 pattern_match = valid_pattern.match(args.pattern)
134 logging.debug("len(seq): {}".format(len(seq))) 140 logging.debug("len(seq): {}".format(len(seq)))
135 continue 141 continue
136 142
137 # extract barcode nucleotides 143 # extract barcode nucleotides
138 barcode_list = [] 144 barcode_list = []
145 barcode_qual_list = []
139 for bcstart, bcstop in barcode_positions: 146 for bcstart, bcstop in barcode_positions:
140 barcode_list.append(seq[bcstart:bcstop]) 147 barcode_list.append(seq[bcstart:bcstop])
148 barcode_qual_list.append(qual[bcstart:bcstop])
141 barcode = "".join(barcode_list) 149 barcode = "".join(barcode_list)
150 barcode_quals = "".join(barcode_qual_list)
142 logging.debug("extracted barcode: {}".format(barcode)) 151 logging.debug("extracted barcode: {}".format(barcode))
143 152
144 # create new sequence and quality string without barcode nucleotides 153 # create new sequence and quality string without barcode nucleotides
145 new_seq_list = [] 154 new_seq_list = []
146 new_qual_list = [] 155 new_qual_list = []
165 annotated_header = header 174 annotated_header = header
166 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual)) 175 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual))
167 176
168 # write barcode to fasta if requested 177 # write barcode to fasta if requested
169 if args.out_bc_fasta is not None: 178 if args.out_bc_fasta is not None:
170 faout.write(">{}\n{}\n".format(header, barcode)) 179 if args.save_bcs_as_fa:
180 faout.write(">{}\n{}\n".format(header, barcode))
181 else:
182 faout.write("@{}\n{}\n+\n{}\n".format(header, barcode, barcode_quals))
171 183
172 # close files 184 # close files
173 samout.close() 185 samout.close()
174 if args.out_bc_fasta is not None: 186 if args.out_bc_fasta is not None:
175 faout.close() 187 faout.close()