changeset 18:ec35e8d25958 draft

Uploaded
author greg
date Thu, 03 Mar 2016 14:52:47 -0500
parents 24c055f48065
children b212210c1808
files extract_genomic_dna.xml
diffstat 1 files changed, 46 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/extract_genomic_dna.xml	Thu Mar 03 14:52:35 2016 -0500
+++ b/extract_genomic_dna.xml	Thu Mar 03 14:52:47 2016 -0500
@@ -28,7 +28,10 @@
             #end if
             --output_format $output_format_cond.output_format
             #if str($output_format_cond.output_format) == "fasta":
-                --description_field_delimiter $output_format_cond.description_field_delimiter
+                --fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type
+                #if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited":
+                    --fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter
+                #end if
             #end if
             --output $output
         ]]>
@@ -69,13 +72,22 @@
                 <option value="interval">interval</option>
             </param>
             <when value="fasta">
-                <param name="description_field_delimiter" type="select" label="Select description field delimiter" help="Character delimiter for words in description line">
-                    <option value="underscore" selected="True">underscore (_)</option>
-                    <option value="semicolon">semicolon (;)</option>
-                    <option value="comma">comma (,)</option>
-                    <option value="tilda">tilda (~)</option>
-                    <option value="vetical_bar">vertical bar (|)</option>
-                </param>
+                <conditional name="fasta_header_type_cond">
+                    <param name="fasta_header_type" type="select" label="Select fasta header format">
+                        <option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option>
+                        <option value="char_delimited">character delimited field values</option>
+                    </param>
+                    <when value="bedtools_getfasta_default"/>
+                    <when value="char_delimited">
+                        <param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter">
+                            <option value="underscore" selected="True">underscore (_)</option>
+                            <option value="semicolon">semicolon (;)</option>
+                            <option value="comma">comma (,)</option>
+                            <option value="tilda">tilda (~)</option>
+                            <option value="vetical_bar">vertical bar (|)</option>
+                        </param>
+                    </when>
+                </conditional>
             </when>
             <when value="interval"/>
         </conditional>
@@ -93,7 +105,8 @@
             <param name="interpret_features" value="yes"/>
             <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
-            <param name="description_field_delimiter" value="underscore"/>
+            <param name="fasta_header_type" value="char_delimited"/>
+            <param name="fasta_header_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
         </test>
         <test>
@@ -101,7 +114,8 @@
             <param name="interpret_features" value="yes"/>
             <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
-            <param name="description_field_delimiter" value="underscore"/>
+            <param name="fasta_header_type" value="char_delimited"/>
+            <param name="fasta_header_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
         </test>
         <test>
@@ -124,7 +138,8 @@
             <param name="interpret_features" value="no"/>
             <param name="index_source" value="cached"/>
             <param name="out_format" value="fasta"/>
-            <param name="description_field_delimiter" value="underscore"/>
+            <param name="fasta_header_type" value="char_delimited"/>
+            <param name="fasta_header_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
         </test>
         <!-- Test custom sequences support and GFF feature interpretation. -->
@@ -134,7 +149,8 @@
             <param name="index_source" value="history"/>
             <param name="ref_file" value="tophat_in1.fasta"/>
             <param name="out_format" value="fasta"/>
-            <param name="description_field_delimiter" value="underscore"/>
+            <param name="fasta_header_type" value="char_delimited"/>
+            <param name="fasta_header_delimiter" value="underscore"/>
             <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
         </test>
         <test>
@@ -143,7 +159,7 @@
             <param name="index_source" value="history"/>
             <param name="ref_file" value="tophat_in1.fasta"/>
             <param name="out_format" value="fasta"/>
-            <param name="description_field_delimiter" value="underscore"/>
+            <param name="fasta_header_type" value="bedtools_getfasta_default"/>
             <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
         </test>
     </tests>
@@ -170,14 +186,27 @@
 **What it does**
 
 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
-If the output format is FASTA, the character delimiter can be selected for the fields in the description.
-For example, selecting an underscore will produce a description like this:
+If the output format is FASTA, the header format can be specified.
+
+Selecting the **bedtools getfasta default** option produces a FASTA heder formatted like the default header
+produced the the bedtools getfasta tool, and the "force strandedness" option is assumed.  If the input data
+includes a strand column and the strand is '+' or '-', it is included in the header.  If the input data includes
+a strand column and the value of strand is anything but '+' or '-', a '.' is included in the header.  If the
+input data does not include a strand column, a '.' is included in the header.
 
-    >gi_31563518_ref_NP_852610.1
+An example FASTA header produced by selecting this option is:
+
+    &gt;chr7:127475281-127475310(+)
+
+Selecing the **character delimited field values** option allows selection of a character delimiter that is used
+when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the
+selected character.  For example, selecting an underscore will produce a FASTA header like this:
+
+    &gt;mm9_53_550_+ test_chromosome
 
 while selecting a vertical bar will produce a description like this:
 
-    >gi|31563518|ref|NP_852610.1
+    &gt;mm9|53|550|+ test_chromosome
 
 If strand is not defined, the default value is "+".