Mercurial > repos > greg > extract_genomic_dna
comparison extract_genomic_dna.py @ 17:24c055f48065 draft
Uploaded
author | greg |
---|---|
date | Thu, 03 Mar 2016 14:52:35 -0500 |
parents | e70f5ca82b63 |
children |
comparison
equal
deleted
inserted
replaced
16:030691e5cc86 | 17:24c055f48065 |
---|---|
15 parser.add_argument('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff") | 15 parser.add_argument('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff") |
16 parser.add_argument('--columns', dest='columns', help="Columns to use in input file") | 16 parser.add_argument('--columns', dest='columns', help="Columns to use in input file") |
17 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file") | 17 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file") |
18 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file") | 18 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file") |
19 parser.add_argument('--output_format', dest='output_format', help="Output format") | 19 parser.add_argument('--output_format', dest='output_format', help="Output format") |
20 parser.add_argument('--description_field_delimiter', dest='description_field_delimiter', default=None, help="Fasta description field delimiter") | 20 parser.add_argument('--fasta_header_type', dest='fasta_header_type', default=None, help="Fasta header format") |
21 parser.add_argument('--fasta_header_delimiter', dest='fasta_header_delimiter', default=None, help="Fasta header field delimiter") | |
21 parser.add_argument('--output', dest='output', help="Output dataset") | 22 parser.add_argument('--output', dest='output', help="Output dataset") |
22 args = parser.parse_args() | 23 args = parser.parse_args() |
23 | 24 |
24 input_is_gff = args.input_format == 'gff' | 25 input_is_gff = args.input_format == 'gff' |
25 interpret_features = input_is_gff and args.interpret_features == "yes" | 26 interpret_features = input_is_gff and args.interpret_features == "yes" |
157 if args.output_format == "fasta": | 158 if args.output_format == "fasta": |
158 l = len(sequence) | 159 l = len(sequence) |
159 c = 0 | 160 c = 0 |
160 if input_is_gff: | 161 if input_is_gff: |
161 start, end = egdu.convert_bed_coords_to_gff([start, end]) | 162 start, end = egdu.convert_bed_coords_to_gff([start, end]) |
162 fields = [args.genome, str(chrom), str(start), str(end), strand] | 163 if args.fasta_header_type == "bedtools_getfasta_default": |
163 field_delimiter = egdu.get_description_field_delimiter(args.description_field_delimiter) | 164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom), |
164 meta_data = field_delimiter.join(fields) | 165 str(start), |
165 if name.strip(): | 166 str(end), |
166 out.write(">%s %s\n" % (meta_data, name)) | 167 strand, |
167 else: | 168 includes_strand_col)) |
168 out.write(">%s\n" % meta_data) | 169 else: |
170 # args.fasta_header_type == "char_delimited": | |
171 fields = [args.genome, str(chrom), str(start), str(end), strand] | |
172 field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter) | |
173 meta_data = field_delimiter.join(fields) | |
174 if name.strip(): | |
175 out.write(">%s %s\n" % (meta_data, name)) | |
176 else: | |
177 out.write(">%s\n" % meta_data) | |
169 while c < l: | 178 while c < l: |
170 b = min(c + 50, l) | 179 b = min(c + 50, l) |
171 out.write("%s\n" % str(sequence[c:b])) | 180 out.write("%s\n" % str(sequence[c:b])) |
172 c = b | 181 c = b |
173 else: | 182 else: |
181 feature.score, | 190 feature.score, |
182 feature.strand, | 191 feature.strand, |
183 ".", | 192 ".", |
184 egdu.gff_attributes_to_str(feature.attributes, "GTF")]) | 193 egdu.gff_attributes_to_str(feature.attributes, "GTF")]) |
185 else: | 194 else: |
186 # Where is fields being set here? | 195 # Here fields was set up around line 73. |
187 meta_data = "\t".join(fields) | 196 meta_data = "\t".join(fields) |
188 if input_is_gff: | 197 if input_is_gff: |
189 format_str = "%s seq \"%s\";\n" | 198 format_str = "%s seq \"%s\";\n" |
190 else: | 199 else: |
191 format_str = "%s\t%s\n" | 200 format_str = "%s\t%s\n" |