changeset 17:24c055f48065 draft

Uploaded
author greg
date Thu, 03 Mar 2016 14:52:35 -0500
parents 030691e5cc86
children ec35e8d25958
files extract_genomic_dna.py
diffstat 1 files changed, 17 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/extract_genomic_dna.py	Thu Mar 03 14:52:26 2016 -0500
+++ b/extract_genomic_dna.py	Thu Mar 03 14:52:35 2016 -0500
@@ -17,7 +17,8 @@
 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file")
 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file")
 parser.add_argument('--output_format', dest='output_format', help="Output format")
-parser.add_argument('--description_field_delimiter', dest='description_field_delimiter', default=None, help="Fasta description field delimiter")
+parser.add_argument('--fasta_header_type', dest='fasta_header_type', default=None, help="Fasta header format")
+parser.add_argument('--fasta_header_delimiter', dest='fasta_header_delimiter', default=None, help="Fasta header field delimiter")
 parser.add_argument('--output', dest='output', help="Output dataset")
 args = parser.parse_args()
 
@@ -159,13 +160,21 @@
         c = 0
         if input_is_gff:
             start, end = egdu.convert_bed_coords_to_gff([start, end])
-        fields = [args.genome, str(chrom), str(start), str(end), strand]
-        field_delimiter = egdu.get_description_field_delimiter(args.description_field_delimiter)
-        meta_data = field_delimiter.join(fields)
-        if name.strip():
-            out.write(">%s %s\n" % (meta_data, name))
+        if args.fasta_header_type == "bedtools_getfasta_default":
+            out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),
+                                                                          str(start),
+                                                                          str(end),
+                                                                          strand,
+                                                                          includes_strand_col))
         else:
-            out.write(">%s\n" % meta_data)
+            # args.fasta_header_type == "char_delimited":
+            fields = [args.genome, str(chrom), str(start), str(end), strand]
+            field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter)
+            meta_data = field_delimiter.join(fields)
+            if name.strip():
+                out.write(">%s %s\n" % (meta_data, name))
+            else:
+                out.write(">%s\n" % meta_data)
         while c < l:
             b = min(c + 50, l)
             out.write("%s\n" % str(sequence[c:b]))
@@ -183,7 +192,7 @@
                                    ".",
                                    egdu.gff_attributes_to_str(feature.attributes, "GTF")])
         else:
-            # Where is fields being set here?
+            # Here fields was set up around line 73.
             meta_data = "\t".join(fields)
         if input_is_gff:
             format_str = "%s seq \"%s\";\n"