comparison extract_genomic_dna.py @ 17:24c055f48065 draft

Uploaded
author greg
date Thu, 03 Mar 2016 14:52:35 -0500
parents e70f5ca82b63
children
comparison
equal deleted inserted replaced
16:030691e5cc86 17:24c055f48065
15 parser.add_argument('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff") 15 parser.add_argument('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff")
16 parser.add_argument('--columns', dest='columns', help="Columns to use in input file") 16 parser.add_argument('--columns', dest='columns', help="Columns to use in input file")
17 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file") 17 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file")
18 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file") 18 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file")
19 parser.add_argument('--output_format', dest='output_format', help="Output format") 19 parser.add_argument('--output_format', dest='output_format', help="Output format")
20 parser.add_argument('--description_field_delimiter', dest='description_field_delimiter', default=None, help="Fasta description field delimiter") 20 parser.add_argument('--fasta_header_type', dest='fasta_header_type', default=None, help="Fasta header format")
21 parser.add_argument('--fasta_header_delimiter', dest='fasta_header_delimiter', default=None, help="Fasta header field delimiter")
21 parser.add_argument('--output', dest='output', help="Output dataset") 22 parser.add_argument('--output', dest='output', help="Output dataset")
22 args = parser.parse_args() 23 args = parser.parse_args()
23 24
24 input_is_gff = args.input_format == 'gff' 25 input_is_gff = args.input_format == 'gff'
25 interpret_features = input_is_gff and args.interpret_features == "yes" 26 interpret_features = input_is_gff and args.interpret_features == "yes"
157 if args.output_format == "fasta": 158 if args.output_format == "fasta":
158 l = len(sequence) 159 l = len(sequence)
159 c = 0 160 c = 0
160 if input_is_gff: 161 if input_is_gff:
161 start, end = egdu.convert_bed_coords_to_gff([start, end]) 162 start, end = egdu.convert_bed_coords_to_gff([start, end])
162 fields = [args.genome, str(chrom), str(start), str(end), strand] 163 if args.fasta_header_type == "bedtools_getfasta_default":
163 field_delimiter = egdu.get_description_field_delimiter(args.description_field_delimiter) 164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),
164 meta_data = field_delimiter.join(fields) 165 str(start),
165 if name.strip(): 166 str(end),
166 out.write(">%s %s\n" % (meta_data, name)) 167 strand,
167 else: 168 includes_strand_col))
168 out.write(">%s\n" % meta_data) 169 else:
170 # args.fasta_header_type == "char_delimited":
171 fields = [args.genome, str(chrom), str(start), str(end), strand]
172 field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter)
173 meta_data = field_delimiter.join(fields)
174 if name.strip():
175 out.write(">%s %s\n" % (meta_data, name))
176 else:
177 out.write(">%s\n" % meta_data)
169 while c < l: 178 while c < l:
170 b = min(c + 50, l) 179 b = min(c + 50, l)
171 out.write("%s\n" % str(sequence[c:b])) 180 out.write("%s\n" % str(sequence[c:b]))
172 c = b 181 c = b
173 else: 182 else:
181 feature.score, 190 feature.score,
182 feature.strand, 191 feature.strand,
183 ".", 192 ".",
184 egdu.gff_attributes_to_str(feature.attributes, "GTF")]) 193 egdu.gff_attributes_to_str(feature.attributes, "GTF")])
185 else: 194 else:
186 # Where is fields being set here? 195 # Here fields was set up around line 73.
187 meta_data = "\t".join(fields) 196 meta_data = "\t".join(fields)
188 if input_is_gff: 197 if input_is_gff:
189 format_str = "%s seq \"%s\";\n" 198 format_str = "%s seq \"%s\";\n"
190 else: 199 else:
191 format_str = "%s\t%s\n" 200 format_str = "%s\t%s\n"