changeset 14:1a10864abc1f draft

Uploaded
author greg
date Wed, 02 Mar 2016 09:13:24 -0500
parents fe88f4eeaddc
children e70f5ca82b63
files extract_genomic_dna.py extract_genomic_dna.xml extract_genomic_dna_utils.py test-data/1.bed test-data/cufflinks_out1.gtf test-data/droPer1.bed test-data/extract_genomic_dna_out1.fasta test-data/extract_genomic_dna_out2.fasta test-data/extract_genomic_dna_out3.interval test-data/extract_genomic_dna_out4.gff test-data/extract_genomic_dna_out5.fasta test-data/extract_genomic_dna_out6.fasta test-data/extract_genomic_dna_out7.fasta test-data/gff_filter_by_attribute_out1.gff test-data/tophat_in1.fasta
diffstat 15 files changed, 344 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/extract_genomic_dna.py	Wed Jan 20 07:37:31 2016 -0500
+++ b/extract_genomic_dna.py	Wed Mar 02 09:13:24 2016 -0500
@@ -17,6 +17,7 @@
 parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file")
 parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file")
 parser.add_argument('--output_format', dest='output_format', help="Output format")
+parser.add_argument('--description_field_delimiter', dest='description_field_delimiter', default=None, help="Fasta description field delimiter")
 parser.add_argument('--output', dest='output', help="Output dataset")
 args = parser.parse_args()
 
@@ -159,7 +160,8 @@
         if input_is_gff:
             start, end = egdu.convert_bed_coords_to_gff([start, end])
         fields = [args.genome, str(chrom), str(start), str(end), strand]
-        meta_data = "_".join(fields)
+        field_delimiter = extract_genomic_dna_utils.get_description_field_delimiter(args.description_field_delimiter)
+        meta_data = field_delimiter.join(fields)
         if name.strip():
             out.write(">%s %s\n" % (meta_data, name))
         else:
--- a/extract_genomic_dna.xml	Wed Jan 20 07:37:31 2016 -0500
+++ b/extract_genomic_dna.xml	Wed Mar 02 09:13:24 2016 -0500
@@ -1,4 +1,4 @@
-<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
+<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2">
     <description>using coordinates from assembled/unassembled genomes</description>
     <requirements>
         <requirement type="package" version="0.7.1">bx-python</requirement>
@@ -26,7 +26,10 @@
             #else:
                 --reference_genome $reference_genome_cond.reference_genome
             #end if
-            --output_format $output_format
+            --output_format $output_format_cond.output_format
+            #if str($output_format_cond.output_format) == "fasta":
+                --description_field_delimiter $output_format_cond.description_field_delimiter
+            #end if
             --output $output
         ]]>
     </command>
@@ -60,15 +63,27 @@
                 </param>
             </when>
         </conditional>
-        <param name="output_format" type="select" label="Select output format">
-            <option value="fasta" selected="True">fasta</option>
-            <option value="interval">interval</option>
-        </param>
+        <conditional name="output_format_cond">
+            <param name="output_format" type="select" label="Select output format">
+                <option value="fasta" selected="True">fasta</option>
+                <option value="interval">interval</option>
+            </param>
+            <when value="fasta">
+                <param name="description_field_delimiter" type="select" label="Select description field delimiter" help="Character delimiter for words in description line">
+                    <option value="underscore" selected="True">underscore (_)</option>
+                    <option value="semicolon">semicolon (;)</option>
+                    <option value="comma">comma (,)</option>
+                    <option value="tilda">tilda (~)</option>
+                    <option value="vetical_bar">vertical bar (|)</option>
+                </param>
+            </when>
+            <when value="interval"/>
+        </conditional>
     </inputs>
     <outputs>
         <data format_source="input" name="output" metadata_source="input">
             <change_format>
-                <when input="output_format" value="fasta" format="fasta" />
+                <when input="output_format_cond.output_format" value="fasta" format="fasta" />
             </change_format>
         </data>
     </outputs>
@@ -78,6 +93,7 @@
             <param name="interpret_features" value="yes"/>
             <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
+            <param name="description_field_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
         </test>
         <test>
@@ -85,6 +101,7 @@
             <param name="interpret_features" value="yes"/>
             <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
+            <param name="description_field_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
         </test>
         <test>
@@ -105,8 +122,9 @@
         <test>
             <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="no"/>
+            <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
-            <param name="index_source" value="cached"/>
+            <param name="description_field_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
         </test>
         <!-- Test custom sequences support and GFF feature interpretation. -->
@@ -116,6 +134,7 @@
             <param name="index_source" value="history"/>
             <param name="ref_file" value="tophat_in1.fasta"/>
             <param name="out_format" value="fasta"/>
+            <param name="description_field_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
         </test>
         <test>
@@ -124,6 +143,7 @@
             <param name="index_source" value="history"/>
             <param name="ref_file" value="tophat_in1.fasta"/>
             <param name="out_format" value="fasta"/>
+            <param name="description_field_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
         </test>
     </tests>
@@ -145,15 +165,19 @@
  - Chromosome, start or end coordinates that are invalid for the specified build.
  - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
 
-.. class:: infomark
-
- **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. 
-
 -----
 
 **What it does**
 
 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
+If the output format is FASTA, the character delimiter can be selected for the fields in the description.
+For example, selecting an underscore will produce a description like this:
+
+    >gi_31563518_ref_NP_852610.1
+
+while selecting a vertical bar will produce a description like this:
+
+    >gi|31563518|ref|NP_852610.1
 
 If strand is not defined, the default value is "+".
 
@@ -167,7 +191,7 @@
     chr7  127485994  127486166  NM_000230  0  +
     chr7  127486011  127486166  D49487     0  +
 
-Extracting sequences with **FASTA** output data type returns::
+Extracting sequences with **FASTA** output data type  and **Description Field Delimiter** set to the underscore character returns::
 
     &gt;hg17_chr7_127475281_127475310_+ NM_000230
     GTAGGAATCGCAGCGCCAGCGGTTGCAAG
@@ -192,7 +216,7 @@
     <citations>
         <citation type="bibtex">
             @unpublished{None,
-            author = {Guru Ananda,Greg Von Kuster},
+            author = {Guru Ananda},
             title = {None},
             year = {None},
             eprint = {None},
--- a/extract_genomic_dna_utils.py	Wed Jan 20 07:37:31 2016 -0500
+++ b/extract_genomic_dna_utils.py	Wed Mar 02 09:13:24 2016 -0500
@@ -292,6 +292,22 @@
         stop_err('Error running faToTwoBit. ' + str(e))
 
 
+def get_description_field_delimiter(description_field_delimiter):
+    # Convert a word to an appropriate character.
+    if description_field_delimiter == 'underscore':
+        return '_'
+    if description_field_delimiter == 'semicolon':
+        return ';'
+    if description_field_delimiter == 'comma':
+        return ','
+    if description_field_delimiter == 'tilda':
+        return '~'
+    if description_field_delimiter == 'vetical_bar':
+        return '|'
+    # Set the default to underscore.
+    return '_'
+
+
 def get_lines(feature):
     # Get feature's line(s).
     if isinstance(feature, GFFFeature):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.bed	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,65 @@
+chr1	147962192	147962580	CCDS989.1_cds_0_0_chr1_147962193_r	0	-
+chr1	147984545	147984630	CCDS990.1_cds_0_0_chr1_147984546_f	0	+
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148185136	148185276	CCDS996.1_cds_0_0_chr1_148185137_f	0	+
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-
+chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+chr11	116206508	116206563	CCDS8377.1_cds_0_0_chr11_116206509_f	0	+
+chr11	116211733	116212337	CCDS8378.1_cds_0_0_chr11_116211734_r	0	-
+chr11	1812377	1812407	CCDS7726.1_cds_0_0_chr11_1812378_f	0	+
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr13	112381694	112381953	CCDS9526.1_cds_0_0_chr13_112381695_f	0	+
+chr14	98710240	98712285	CCDS9949.1_cds_0_0_chr14_98710241_r	0	-
+chr15	41486872	41487060	CCDS10096.1_cds_0_0_chr15_41486873_r	0	-
+chr15	41673708	41673857	CCDS10097.1_cds_0_0_chr15_41673709_f	0	+
+chr15	41679161	41679250	CCDS10098.1_cds_0_0_chr15_41679162_r	0	-
+chr15	41826029	41826196	CCDS10101.1_cds_0_0_chr15_41826030_f	0	+
+chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+chr16	179963	180135	CCDS10401.1_cds_0_0_chr16_179964_r	0	-
+chr16	244413	244681	CCDS10402.1_cds_0_0_chr16_244414_f	0	+
+chr16	259268	259383	CCDS10403.1_cds_0_0_chr16_259269_r	0	-
+chr18	23786114	23786321	CCDS11891.1_cds_0_0_chr18_23786115_r	0	-
+chr18	59406881	59407046	CCDS11985.1_cds_0_0_chr18_59406882_f	0	+
+chr18	59455932	59456337	CCDS11986.1_cds_0_0_chr18_59455933_r	0	-
+chr18	59600586	59600754	CCDS11988.1_cds_0_0_chr18_59600587_f	0	+
+chr19	59068595	59069564	CCDS12866.1_cds_0_0_chr19_59068596_f	0	+
+chr19	59236026	59236146	CCDS12872.1_cds_0_0_chr19_59236027_r	0	-
+chr19	59297998	59298008	CCDS12877.1_cds_0_0_chr19_59297999_f	0	+
+chr19	59302168	59302288	CCDS12878.1_cds_0_0_chr19_59302169_r	0	-
+chr2	118288583	118288668	CCDS2120.1_cds_0_0_chr2_118288584_f	0	+
+chr2	118394148	118394202	CCDS2121.1_cds_0_0_chr2_118394149_r	0	-
+chr2	220190202	220190242	CCDS2441.1_cds_0_0_chr2_220190203_f	0	+
+chr2	220229609	220230869	CCDS2443.1_cds_0_0_chr2_220229610_r	0	-
+chr20	33330413	33330423	CCDS13249.1_cds_0_0_chr20_33330414_r	0	-
+chr20	33513606	33513792	CCDS13255.1_cds_0_0_chr20_33513607_f	0	+
+chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+chr21	32707032	32707192	CCDS13614.1_cds_0_0_chr21_32707033_f	0	+
+chr21	32869641	32870022	CCDS13615.1_cds_0_0_chr21_32869642_r	0	-
+chr21	33321040	33322012	CCDS13620.1_cds_0_0_chr21_33321041_f	0	+
+chr21	33744994	33745040	CCDS13625.1_cds_0_0_chr21_33744995_r	0	-
+chr22	30120223	30120265	CCDS13897.1_cds_0_0_chr22_30120224_f	0	+
+chr22	30160419	30160661	CCDS13898.1_cds_0_0_chr22_30160420_r	0	-
+chr22	30665273	30665360	CCDS13901.1_cds_0_0_chr22_30665274_f	0	+
+chr22	30939054	30939266	CCDS13903.1_cds_0_0_chr22_30939055_r	0	-
+chr5	131424298	131424460	CCDS4149.1_cds_0_0_chr5_131424299_f	0	+
+chr5	131556601	131556672	CCDS4151.1_cds_0_0_chr5_131556602_r	0	-
+chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+
+chr5	131847541	131847666	CCDS4155.1_cds_0_0_chr5_131847542_r	0	-
+chr6	108299600	108299744	CCDS5061.1_cds_0_0_chr6_108299601_r	0	-
+chr6	108594662	108594687	CCDS5063.1_cds_0_0_chr6_108594663_f	0	+
+chr6	108640045	108640151	CCDS5064.1_cds_0_0_chr6_108640046_r	0	-
+chr6	108722976	108723115	CCDS5067.1_cds_0_0_chr6_108722977_f	0	+
+chr7	113660517	113660685	CCDS5760.1_cds_0_0_chr7_113660518_f	0	+
+chr7	116512159	116512389	CCDS5771.1_cds_0_0_chr7_116512160_r	0	-
+chr7	116714099	116714152	CCDS5773.1_cds_0_0_chr7_116714100_f	0	+
+chr7	116945541	116945787	CCDS5774.1_cds_0_0_chr7_116945542_r	0	-
+chr8	118881131	118881317	CCDS6324.1_cds_0_0_chr8_118881132_r	0	-
+chr9	128764156	128764189	CCDS6914.1_cds_0_0_chr9_128764157_f	0	+
+chr9	128787519	128789136	CCDS6915.1_cds_0_0_chr9_128787520_r	0	-
+chr9	128882427	128882523	CCDS6917.1_cds_0_0_chr9_128882428_f	0	+
+chr9	128937229	128937445	CCDS6919.1_cds_0_0_chr9_128937230_r	0	-
+chrX	122745047	122745924	CCDS14606.1_cds_0_0_chrX_122745048_f	0	+
+chrX	152648964	152649196	CCDS14733.1_cds_0_0_chrX_152648965_r	0	-
+chrX	152691446	152691471	CCDS14735.1_cds_0_0_chrX_152691447_f	0	+
+chrX	152694029	152694263	CCDS14736.1_cds_0_0_chrX_152694030_r	0	-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cufflinks_out1.gtf	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,4 @@
+test_chromosome	Cufflinks	transcript	53	550	1000	+	.	gene_id "CUFF.1"; transcript_id "CUFF.1.1"; FPKM "10679134.4063403048"; frac "1.000000"; conf_lo "8543307.525072"; conf_hi "12814961.287608"; cov "145.770185";
+test_chromosome	Cufflinks	exon	53	250	1000	+	.	gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "1"; FPKM "10679134.4063403048"; frac "1.000000"; conf_lo "8543307.525072"; conf_hi "12814961.287608"; cov "145.770185";
+test_chromosome	Cufflinks	exon	351	400	1000	+	.	gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "2"; FPKM "10679134.4063403048"; frac "1.000000"; conf_lo "8543307.525072"; conf_hi "12814961.287608"; cov "145.770185";
+test_chromosome	Cufflinks	exon	501	550	1000	+	.	gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "3"; FPKM "10679134.4063403048"; frac "1.000000"; conf_lo "8543307.525072"; conf_hi "12814961.287608"; cov "145.770185";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/droPer1.bed	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,2 @@
+super_1	139823	139913	AK028861	0	-	139823	139913	0	1	90,	0,
+super_1	156750	156844	BC126698	0	-	156750	156844	0	1	94,	0,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out1.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,88 @@
+>hg17_chr1_147962192_147962580_-
+ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG
+GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT
+GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT
+CCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGG
+ACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATT
+CGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGC
+CTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTG
+GGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA
+>hg17_chr1_147984545_147984630_+
+ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTT
+TGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
+>hg17_chr1_148078400_148078582_-
+GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAA
+GAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGA
+GCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACG
+GCTTCCAGTACCTCACCAACGGCATCATGTGA
+>hg17_chr1_148185136_148185276_+
+ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGC
+CCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATC
+CGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA
+>hg17_chr10_55251623_55253124_-
+TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCT
+CAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACT
+ATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAA
+ACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAA
+GTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAAT
+TCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAAT
+ACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGC
+TGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCA
+AATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAA
+AGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCA
+ATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATA
+AGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCT
+CTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCA
+CTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTC
+ATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTG
+TGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTAC
+CTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAAC
+ATCTCTCCTTCTGCCTGtccccttccccctcctcctcctatttctcctcc
+ttctcctcctcctgctcctgctcctcttgctcctcctcctgacatttctc
+ctttttctcttttttgtcctcctccctctcctccttctatccctcttcct
+cttcctcctcctACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCC
+AACAccacctcttctacctccatttccaactcctcttcctccaccacctc
+cttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAG
+TGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGA
+GAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAA
+CAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTA
+GCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAA
+CTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAG
+TAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAAT
+ATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTA
+A
+>hg17_chr11_116124407_116124501_-
+ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGG
+CAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA
+>hg17_chr11_116206508_116206563_+
+ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTC
+TGCCC
+>hg17_chr11_116211733_116212337_-
+CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGC
+TGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAA
+AAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGT
+GAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGG
+AGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTC
+CAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCC
+ACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGC
+GCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCG
+CGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCA
+CGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCG
+CGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAG
+GTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCA
+GTGA
+>hg17_chr11_1812377_1812407_+
+ATGCTCCACCTGCATGGCTGGCAAACCATG
+>hg17_chr12_38440094_38440321_-
+GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATC
+TATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATC
+ACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAG
+GGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCT
+GACAATGATGCTTCTGATGTGGAATAA
+>hg17_chr13_112381694_112381953_+
+ATGAACTCACCAGAGGCGAGGCTCTGCGTTGCTCAATGCAGAGACTCTTA
+CCCAGGGTGTCAGCCTCTGAAAGATACACGTGCCTGGGCCTCTTCCCTGA
+AGATGGACCCGGCAGGTCTGGAGGGAGGCCCCCGTGATGAATCCCGTGAT
+GAGCCGCCGATCCGAGCTCAGGCTGCGTCATGGGACCAGCCACAAGGTTG
+CCTGACCTATAAAGGTCGCAGGAGTGCCTCAGGGACACAGAAGCAGTTAC
+AGCTGCCAG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out2.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,6 @@
+>droPer1_super_1_139823_139913_- AK028861
+CGTCGGCTTCTGCTTCTGCTGATGATGGTCGTTCTTCTTCCTTTACTTCT
+TCCTATTTTTCTTCCTTCCCTTACACTATATCTTCCTTTA
+>droPer1_super_1_156750_156844_- BC126698
+CCGGGCTGCGGCAAGGGATTCACCTGCTCCAAACAGCTCAAGGTGCACTC
+CCGCACGCACACGGGCGAGAAGCCCTATCACTGCGACATCTGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out3.interval	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,10 @@
+chr1	147962192	147962580	CCDS989.1_cds_0_0_chr1_147962193_r	0	-	ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTGGACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGTGATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGTCCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGGACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATTCGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGCCTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTGGGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA
+chr1	147984545	147984630	CCDS990.1_cds_0_0_chr1_147984546_f	0	+	ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-	GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAAGAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGAGCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACGGCTTCCAGTACCTCACCAACGGCATCATGTGA
+chr1	148185136	148185276	CCDS996.1_cds_0_0_chr1_148185137_f	0	+	ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGCCCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATCCGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-	TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCTCAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACTATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAAACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAAGTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAATTCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAATACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGCTGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCAAATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAAAGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCAATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATAAGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCTCTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCACTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTCATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTGTGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTACCTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAACATCTCTCCTTCTGCCTGtccccttccccctcctcctcctatttctcctccttctcctcctcctgctcctgctcctcttgctcctcctcctgacatttctcctttttctcttttttgtcctcctccctctcctccttctatccctcttcctcttcctcctcctACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCCAACAccacctcttctacctccatttccaactcctcttcctccaccacctccttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAGTGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGAGAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAACAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTAGCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAACTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAGTAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAATATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTAA
+chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-	ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGGCAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA
+chr11	116206508	116206563	CCDS8377.1_cds_0_0_chr11_116206509_f	0	+	ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTCTGCCC
+chr11	116211733	116212337	CCDS8378.1_cds_0_0_chr11_116211734_r	0	-	CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGCTGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAAAAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGTGAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGGAGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTCCAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCCACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGCGCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCGCGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCACGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCGCGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAGGTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCAGTGA
+chr11	1812377	1812407	CCDS7726.1_cds_0_0_chr11_1812378_f	0	+	ATGCTCCACCTGCATGGCTGGCAAACCATG
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-	GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATCTATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATCACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAGGGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCTGACAATGATGCTTCTGATGTGGAATAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out4.gff	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,10 @@
+chr10	Cufflinks	transcript	62044837	62045189	1000	.	.	gene_id "CUFF.23531"; transcript_id "CUFF.23531.1"; FPKM "19.5178121606"; frac "1.000000"; conf_lo "9.264456"; conf_hi "29.771168"; cov "1.108611"; seq "AATTACAAGATCGACACACCAAGATAGGCAGATCCATGGTTGGTTTTACTTTGTAAATCTAAAAGTATGTTGGAAAACGATGCAATGAATTCTTATCCTTTTTCAAAATGAAGAATTTGTGATGGTTAGTGGACAGTTCAGAAGCCTCTCTGCAAGAAAGGGGGCGCTGAGAAGTGGTAAAAAAAGGAAGGAAGCACTCGGGCTTTGTCAGCAGGGTGGACCCTGGGGTCCACAGTGGGAACAGTCCCTTCTGGCCTCTACTCACTGACCAAACGCTTTACTAAAACTCCGCTTCTGGCCTCTGTTGCCACCTCCTGGTCGCTGTCCTCGGAAGTTTCTACTTCCTCCTCGCT";
+chr10	Cufflinks	transcript	75372919	75373002	1000	.	.	gene_id "CUFF.24985"; transcript_id "CUFF.24985.1"; FPKM "124.4970510798"; frac "1.000000"; conf_lo "71.411330"; conf_hi "177.582772"; cov "7.071429"; seq "GCGTCTCGCAGCTTCTGCCCGTCGATCTCCATGTCGAGCCGGATGGGCACCAGCACCTCAGGCTGTGACGCATTCTCATGGATC";
+chr10	Cufflinks	transcript	80362428	80363292	1000	-	.	gene_id "CUFF.26065"; transcript_id "CUFF.26065.1"; FPKM "43.6170921216"; frac "1.000000"; conf_lo "32.260169"; conf_hi "54.974016"; cov "2.477449"; seq "ATGACGGACAAGTGTTTCCGGAAGTGCATCGGGAAGCCCGGGGGCTCCTTGGATAACTCGGAGCAGGTGAGACATCTCGGGAACCCGGGGTGGTGAGGGGCGCGGGGTCAGGAGCGTCTAGGAGGTTGAGAGATGTGCGCGTGCGCGGCCTCTAGCCTTAGCTACTGAGGAAGTTGTGCGCGTGCGCGGGGTGAGGACCCGGCTTCTGTGCCTAGATCGGTGCAGCCTTCATGGGTGATCCTCGGGTCGTGTGACCGTCAGTCAGGGATCCCCCTCCACGCTTTGCAGAAATGCATCGCCATGTGCATGGACCGCTACATGGACGCCTGGAATACCGTGTCCCGCGCCTACAACTCTCGACTGCAGCGGGAACGAGCCAACATGTGACCGGGACCTGTGCCTCGGGACACCGTGCTTATGGTCTGAACTGTTTTCCCTGCCAGTTAGGGTGTCTCCTCCTAGCCGCCCTGAAGTCTGGCAGCATGGAGGGCTTGGGGATCGAGGCCTCTCCCCTGGGTTGCTGCGTCCAGCTCAATCTCAGAAGAGAGTGAGGACCCGACAGAGCACAGGGATCTGGCTGGCCCCACTGACCTGTGACCTCAGGAGAGCAGGCCAATAAATCGCTGCTGGGGCAGTAAAGCAGGCGTGTCACCTCACTGCTTCAGGTCCCTTCCCCTGAGTAGGCCCAGACCTCCCAGGGTATCTTTCCCCTTGGGGTCAGTGGGCTGCTGGCTCTCAGGGAATTCGGAGCATGATCTCAGGTGTTTGGTCATCCCGGGGAGACCAGCCGAGGTTAAGAAGCAAGGCTTCATGTagccttcacctatcatgcatgaggcccagggtgctgaccttaactctgaat";
+chr11	Cufflinks	transcript	7904565	7904642	1000	.	.	gene_id "CUFF.33508"; transcript_id "CUFF.33508.1"; FPKM "61.6484988869"; frac "1.000000"; conf_lo "22.882428"; conf_hi "100.414569"; cov "3.501633"; seq "CATCTTCTATTTGAGCCTCCATCCAGGCACCTCTGAAACAAAGGTGCACTCACTGCATGTCCACTTGTCACAGGAGCC";
+chr11	Cufflinks	exon	78140156	78140259	1000	.	.	gene_id "CUFF.43148"; transcript_id "CUFF.43148.1"; exon_number "1"; FPKM "54.8483511750"; frac "1.000000"; conf_lo "23.181641"; conf_hi "86.515061"; cov "3.115385"; seq "CTGCTTGCTAATTTTCTCTCTTGGGATCAGGGGGACGTGAACTCCAGCCCTGACTCGTGCTCCTTATGCTCTGAGTACATAGCAAATAAATGAGAGCAAAACAC";
+chr11	Cufflinks	exon	105616462	105616737	1000	.	.	gene_id "CUFF.48385"; transcript_id "CUFF.48385.1"; exon_number "1"; FPKM "18.9452034252"; frac "1.000000"; conf_lo "7.520816"; conf_hi "30.369591"; cov "1.076087"; seq "TAGGTGTAATAGTGGAAAACAATAGTTTTTAAACTTCAGAGTCCAGGGCTGTAACTCAGTAGTAACAGTGTTCTCTAAGTATGTTATTCTTCCTCTACATGCTGAAATTTTTCATATTTGGAGCATTCACTGTTCCATGTATCAGTAAATTATATTGTGAGCTGTCATCATATCTAAGCACCATATTGAATATTTTTCATGATTAAAATTTGTTGAAACAACAATTCTATGACCGAAAAAAGCAAGGCTTTGTAAATAACATGTTTGTTACTAGTA";
+chr12	Cufflinks	exon	30701762	30702509	1000	.	.	gene_id "CUFF.53897"; transcript_id "CUFF.53897.1"; exon_number "1"; FPKM "48.9333329111"; frac "1.000000"; conf_lo "37.780391"; conf_hi "60.086275"; cov "2.779412"; seq "TGTGGAGTGTACTTATATGATCCCTATGCTGATAGGATTACCTTCCTAGACATAGCTAGACGCAAAGCCACATGTGTAAGGCTGCTGAGCAAAGACAGCATCCCAGCATGGGTGTGTTCACGGTGGATTCACCACGTTGCATATGTAAAGTGGTCCCCTTGGCTTACCCTTCACTTTGCTCATGAGATTCAGAAGCTGGTGGTCCAGCAGGGGTGAGCATTTGTGAAATAGTAAGCTGAACTTAGTGGTGAGATTTCAGAACAGACTTCTGTGAAGTAAGAGATGTAACCATGCATCTAAAATCAGATGGCCGTGTAACTGCTCGGGCATAGAAATGGTGGGAGAACCTGTCCTGGGTACCTGGCATTTCACATGAGCCCAGGGATATGTCTTGTGCCAAGGCACACAAGTGTCCATGGACTTGGACAGGTGCCAAGGGTTTTTGTCTCTGTTCCTATGTGGGAGGCTGGCTGTGATTTACATTAATTTCTGTATTTCAAACGAAGATGTCTGCAGATCTCCATTTTGATGTTACAGCCTCATTGCCCAGGCAGTGGGCAGTGCCCAGACACCCTTTCTGACTAGCCACTGCATTGGGCTTCTGTGATTCAAAGTAGTGTATATATTTATTTACTTCTCTGACTGTGGCCAACAGCCAAATGCCATTTTATGTTCCTTGTATTCAGTCCATTACCAAAGAGGTGTTTGCACTTTGTAATGATACCTTTCAGTTCAAATAAAAGGACCA";
+chr13	Cufflinks	exon	49159496	49159569	1000	.	.	gene_id "CUFF.67788"; transcript_id "CUFF.67788.1"; exon_number "1"; FPKM "44.9657653777"; frac "1.000000"; conf_lo "10.974842"; conf_hi "78.956689"; cov "2.554054"; seq "ttttcttttggattacttgatttttttttatttgatcttatttatgatgattttgagtacatttttgaacagtt";
+chr13	Cufflinks	transcript	100200304	100200330	1000	.	.	gene_id "CUFF.73108"; transcript_id "CUFF.73108.1"; FPKM "123.2395051093"; frac "1.000000"; conf_lo "30.079196"; conf_hi "216.399814"; cov "7.000000"; seq "TCTCATATGAATAGCCACCCTCTTCTG";
+chr14	Cufflinks	transcript	31949103	31949152	1000	.	.	gene_id "CUFF.77316"; transcript_id "CUFF.77316.1"; FPKM "85.5634278330"; frac "1.000000"; conf_lo "28.521143"; conf_hi "142.605713"; cov "4.860000"; seq "GGATGCTATCCGCGATGTGCATGTAAAGGGCCTCATGTACCAGTGGATCG";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out5.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,31 @@
+>mm9_chr10_62044837_62045189_+
+AATTACAAGATCGACACACCAAGATAGGCAGATCCATGGTTGGTTTTACT
+TTGTAAATCTAAAAGTATGTTGGAAAACGATGCAATGAATTCTTATCCTT
+TTTCAAAATGAAGAATTTGTGATGGTTAGTGGACAGTTCAGAAGCCTCTC
+TGCAAGAAAGGGGGCGCTGAGAAGTGGTAAAAAAAGGAAGGAAGCACTCG
+GGCTTTGTCAGCAGGGTGGACCCTGGGGTCCACAGTGGGAACAGTCCCTT
+CTGGCCTCTACTCACTGACCAAACGCTTTACTAAAACTCCGCTTCTGGCC
+TCTGTTGCCACCTCCTGGTCGCTGTCCTCGGAAGTTTCTACTTCCTCCTC
+GCT
+>mm9_chr10_75372919_75373002_+
+GCGTCTCGCAGCTTCTGCCCGTCGATCTCCATGTCGAGCCGGATGGGCAC
+CAGCACCTCAGGCTGTGACGCATTCTCATGGATC
+>mm9_chr10_80362428_80363292_-
+ATGACGGACAAGTGTTTCCGGAAGTGCATCGGGAAGCCCGGGGGCTCCTT
+GGATAACTCGGAGCAGGTGAGACATCTCGGGAACCCGGGGTGGTGAGGGG
+CGCGGGGTCAGGAGCGTCTAGGAGGTTGAGAGATGTGCGCGTGCGCGGCC
+TCTAGCCTTAGCTACTGAGGAAGTTGTGCGCGTGCGCGGGGTGAGGACCC
+GGCTTCTGTGCCTAGATCGGTGCAGCCTTCATGGGTGATCCTCGGGTCGT
+GTGACCGTCAGTCAGGGATCCCCCTCCACGCTTTGCAGAAATGCATCGCC
+ATGTGCATGGACCGCTACATGGACGCCTGGAATACCGTGTCCCGCGCCTA
+CAACTCTCGACTGCAGCGGGAACGAGCCAACATGTGACCGGGACCTGTGC
+CTCGGGACACCGTGCTTATGGTCTGAACTGTTTTCCCTGCCAGTTAGGGT
+GTCTCCTCCTAGCCGCCCTGAAGTCTGGCAGCATGGAGGGCTTGGGGATC
+GAGGCCTCTCCCCTGGGTTGCTGCGTCCAGCTCAATCTCAGAAGAGAGTG
+AGGACCCGACAGAGCACAGGGATCTGGCTGGCCCCACTGACCTGTGACCT
+CAGGAGAGCAGGCCAATAAATCGCTGCTGGGGCAGTAAAGCAGGCGTGTC
+ACCTCACTGCTTCAGGTCCCTTCCCCTGAGTAGGCCCAGACCTCCCAGGG
+TATCTTTCCCCTTGGGGTCAGTGGGCTGCTGGCTCTCAGGGAATTCGGAG
+CATGATCTCAGGTGTTTGGTCATCCCGGGGAGACCAGCCGAGGTTAAGAA
+GCAAGGCTTCATGTagccttcacctatcatgcatgaggcccagggtgctg
+accttaactctgaat
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out6.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,20 @@
+>mm9_test_chromosome_53_550_+
+TACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCAC
+TACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAG
+CGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTAC
+GTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCCGT
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAC
+TATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTGT
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGTT
+TTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC
+>mm9_test_chromosome_53_250_+
+TACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCAC
+TACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAG
+CGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTAC
+GTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCC
+>mm9_test_chromosome_351_400_+
+ACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACT
+>mm9_test_chromosome_501_550_+
+TTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_genomic_dna_out7.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,17 @@
+>mm9_test_chromosome_53_550_+
+TACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCAC
+TACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAG
+CGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTAC
+GTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCCGT
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAC
+TATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTGT
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGTT
+TTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGCTA
+CTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCACTA
+CGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCG
+ACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGT
+ATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTA
+TTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTT
+CTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gff_filter_by_attribute_out1.gff	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,20 @@
+chr10	Cufflinks	transcript	62044837	62045189	1000	.	.	gene_id "CUFF.23531"; transcript_id "CUFF.23531.1"; FPKM "19.5178121606"; frac "1.000000"; conf_lo "9.264456"; conf_hi "29.771168"; cov "1.108611";
+chr10	Cufflinks	transcript	75372919	75373002	1000	.	.	gene_id "CUFF.24985"; transcript_id "CUFF.24985.1"; FPKM "124.4970510798"; frac "1.000000"; conf_lo "71.411330"; conf_hi "177.582772"; cov "7.071429";
+chr10	Cufflinks	transcript	80362428	80363292	1000	-	.	gene_id "CUFF.26065"; transcript_id "CUFF.26065.1"; FPKM "43.6170921216"; frac "1.000000"; conf_lo "32.260169"; conf_hi "54.974016"; cov "2.477449";
+chr11	Cufflinks	transcript	7904565	7904642	1000	.	.	gene_id "CUFF.33508"; transcript_id "CUFF.33508.1"; FPKM "61.6484988869"; frac "1.000000"; conf_lo "22.882428"; conf_hi "100.414569"; cov "3.501633";
+chr11	Cufflinks	exon	78140156	78140259	1000	.	.	gene_id "CUFF.43148"; transcript_id "CUFF.43148.1"; exon_number "1"; FPKM "54.8483511750"; frac "1.000000"; conf_lo "23.181641"; conf_hi "86.515061"; cov "3.115385";
+chr11	Cufflinks	exon	105616462	105616737	1000	.	.	gene_id "CUFF.48385"; transcript_id "CUFF.48385.1"; exon_number "1"; FPKM "18.9452034252"; frac "1.000000"; conf_lo "7.520816"; conf_hi "30.369591"; cov "1.076087";
+chr12	Cufflinks	exon	30701762	30702509	1000	.	.	gene_id "CUFF.53897"; transcript_id "CUFF.53897.1"; exon_number "1"; FPKM "48.9333329111"; frac "1.000000"; conf_lo "37.780391"; conf_hi "60.086275"; cov "2.779412";
+chr13	Cufflinks	exon	49159496	49159569	1000	.	.	gene_id "CUFF.67788"; transcript_id "CUFF.67788.1"; exon_number "1"; FPKM "44.9657653777"; frac "1.000000"; conf_lo "10.974842"; conf_hi "78.956689"; cov "2.554054";
+chr13	Cufflinks	transcript	100200304	100200330	1000	.	.	gene_id "CUFF.73108"; transcript_id "CUFF.73108.1"; FPKM "123.2395051093"; frac "1.000000"; conf_lo "30.079196"; conf_hi "216.399814"; cov "7.000000";
+chr14	Cufflinks	transcript	31949103	31949152	1000	.	.	gene_id "CUFF.77316"; transcript_id "CUFF.77316.1"; FPKM "85.5634278330"; frac "1.000000"; conf_lo "28.521143"; conf_hi "142.605713"; cov "4.860000";
+chr14	Cufflinks	exon	67604227	67604668	1000	.	.	gene_id "CUFF.81446"; transcript_id "CUFF.81446.1"; exon_number "1"; FPKM "123.6776546104"; frac "1.000000"; conf_lo "100.611653"; conf_hi "146.743656"; cov "7.024887";
+chr14	Cufflinks	exon	75165582	75165744	1000	.	.	gene_id "CUFF.82088"; transcript_id "CUFF.82088.1"; exon_number "1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509";
+chr16	Cufflinks	transcript	57154027	57154067	1000	.	.	gene_id "CUFF.103364"; transcript_id "CUFF.103364.1"; FPKM "162.3154457537"; frac "1.000000"; conf_lo "75.554191"; conf_hi "249.076701"; cov "9.219512";
+chr16	Cufflinks	exon	74862302	74862560	1000	.	.	gene_id "CUFF.105450"; transcript_id "CUFF.105450.1"; exon_number "1"; FPKM "11.0120241741"; frac "1.000000"; conf_lo "2.020744"; conf_hi "20.003304"; cov "0.625483";
+chr16	Cufflinks	transcript	98168779	98168914	1000	.	.	gene_id "CUFF.107834"; transcript_id "CUFF.107834.1"; FPKM "24.4666664555"; frac "1.000000"; conf_lo "5.971605"; conf_hi "42.961728"; cov "1.389706";
+chr17	Cufflinks	exon	8483212	8483268	1000	.	.	gene_id "CUFF.108498"; transcript_id "CUFF.108498.1"; exon_number "1"; FPKM "50.0370923000"; frac "1.000000"; conf_lo "9.181978"; conf_hi "90.892207"; cov "2.842105";
+chr17	Cufflinks	exon	30355791	30355913	1000	.	.	gene_id "CUFF.111759"; transcript_id "CUFF.111759.1"; exon_number "1"; FPKM "19.3232673516"; frac "1.000000"; conf_lo "2.040012"; conf_hi "36.606523"; cov "1.097561";
+chr18	Cufflinks	transcript	39571718	39571880	1000	.	.	gene_id "CUFF.123569"; transcript_id "CUFF.123569.1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509";
+chr19	Cufflinks	exon	17633088	17633203	1000	.	.	gene_id "CUFF.131333"; transcript_id "CUFF.131333.1"; exon_number "1"; FPKM "20.4893265884"; frac "1.000000"; conf_lo "2.163116"; conf_hi "38.815537"; cov "1.163793";
+chr19	Cufflinks	transcript	41997624	41997859	1000	.	.	gene_id "CUFF.133569"; transcript_id "CUFF.133569.1"; FPKM "28.1988698132"; frac "1.000000"; conf_lo "13.125940"; conf_hi "43.271800"; cov "1.601695";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tophat_in1.fasta	Wed Mar 02 09:13:24 2016 -0500
@@ -0,0 +1,14 @@
+>test_chromosome
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ACTACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCC
+ACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGC
+AGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCT
+ACGTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCC
+GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG
+ACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACT
+GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG
+TTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA