comparison extract_genomic_dna.xml @ 14:1a10864abc1f draft

Uploaded
author greg
date Wed, 02 Mar 2016 09:13:24 -0500
parents fe88f4eeaddc
children ec35e8d25958
comparison
equal deleted inserted replaced
13:fe88f4eeaddc 14:1a10864abc1f
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0"> 1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2">
2 <description>using coordinates from assembled/unassembled genomes</description> 2 <description>using coordinates from assembled/unassembled genomes</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.7.1">bx-python</requirement> 4 <requirement type="package" version="0.7.1">bx-python</requirement>
5 <requirement type="package" version="35x1">faToTwoBit</requirement> 5 <requirement type="package" version="35x1">faToTwoBit</requirement>
6 </requirements> 6 </requirements>
24 #if str($reference_genome_cond.reference_genome_source) == "cached" 24 #if str($reference_genome_cond.reference_genome_source) == "cached"
25 --reference_genome $reference_genome_cond.reference_genome.fields.path 25 --reference_genome $reference_genome_cond.reference_genome.fields.path
26 #else: 26 #else:
27 --reference_genome $reference_genome_cond.reference_genome 27 --reference_genome $reference_genome_cond.reference_genome
28 #end if 28 #end if
29 --output_format $output_format 29 --output_format $output_format_cond.output_format
30 #if str($output_format_cond.output_format) == "fasta":
31 --description_field_delimiter $output_format_cond.description_field_delimiter
32 #end if
30 --output $output 33 --output $output
31 ]]> 34 ]]>
32 </command> 35 </command>
33 <inputs> 36 <inputs>
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in"> 37 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
58 </options> 61 </options>
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> 62 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
60 </param> 63 </param>
61 </when> 64 </when>
62 </conditional> 65 </conditional>
63 <param name="output_format" type="select" label="Select output format"> 66 <conditional name="output_format_cond">
64 <option value="fasta" selected="True">fasta</option> 67 <param name="output_format" type="select" label="Select output format">
65 <option value="interval">interval</option> 68 <option value="fasta" selected="True">fasta</option>
66 </param> 69 <option value="interval">interval</option>
70 </param>
71 <when value="fasta">
72 <param name="description_field_delimiter" type="select" label="Select description field delimiter" help="Character delimiter for words in description line">
73 <option value="underscore" selected="True">underscore (_)</option>
74 <option value="semicolon">semicolon (;)</option>
75 <option value="comma">comma (,)</option>
76 <option value="tilda">tilda (~)</option>
77 <option value="vetical_bar">vertical bar (|)</option>
78 </param>
79 </when>
80 <when value="interval"/>
81 </conditional>
67 </inputs> 82 </inputs>
68 <outputs> 83 <outputs>
69 <data format_source="input" name="output" metadata_source="input"> 84 <data format_source="input" name="output" metadata_source="input">
70 <change_format> 85 <change_format>
71 <when input="output_format" value="fasta" format="fasta" /> 86 <when input="output_format_cond.output_format" value="fasta" format="fasta" />
72 </change_format> 87 </change_format>
73 </data> 88 </data>
74 </outputs> 89 </outputs>
75 <tests> 90 <tests>
76 <test> 91 <test>
77 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> 92 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
78 <param name="interpret_features" value="yes"/> 93 <param name="interpret_features" value="yes"/>
79 <param name="index_source" value="cached"/> 94 <param name="index_source" value="cached"/>
80 <param name="out_format" value="fasta"/> 95 <param name="out_format" value="fasta"/>
96 <param name="description_field_delimiter" value="underscore"/>
81 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" /> 97 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
82 </test> 98 </test>
83 <test> 99 <test>
84 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> 100 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
85 <param name="interpret_features" value="yes"/> 101 <param name="interpret_features" value="yes"/>
86 <param name="index_source" value="cached"/> 102 <param name="index_source" value="cached"/>
87 <param name="out_format" value="fasta"/> 103 <param name="out_format" value="fasta"/>
104 <param name="description_field_delimiter" value="underscore"/>
88 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" /> 105 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
89 </test> 106 </test>
90 <test> 107 <test>
91 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> 108 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
92 <param name="interpret_features" value="yes"/> 109 <param name="interpret_features" value="yes"/>
103 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" /> 120 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
104 </test> 121 </test>
105 <test> 122 <test>
106 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> 123 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
107 <param name="interpret_features" value="no"/> 124 <param name="interpret_features" value="no"/>
108 <param name="out_format" value="fasta"/> 125 <param name="index_source" value="cached"/>
109 <param name="index_source" value="cached"/> 126 <param name="out_format" value="fasta"/>
127 <param name="description_field_delimiter" value="underscore"/>
110 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" /> 128 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
111 </test> 129 </test>
112 <!-- Test custom sequences support and GFF feature interpretation. --> 130 <!-- Test custom sequences support and GFF feature interpretation. -->
113 <test> 131 <test>
114 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> 132 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
115 <param name="interpret_features" value="no"/> 133 <param name="interpret_features" value="no"/>
116 <param name="index_source" value="history"/> 134 <param name="index_source" value="history"/>
117 <param name="ref_file" value="tophat_in1.fasta"/> 135 <param name="ref_file" value="tophat_in1.fasta"/>
118 <param name="out_format" value="fasta"/> 136 <param name="out_format" value="fasta"/>
137 <param name="description_field_delimiter" value="underscore"/>
119 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" /> 138 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
120 </test> 139 </test>
121 <test> 140 <test>
122 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> 141 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
123 <param name="interpret_features" value="yes"/> 142 <param name="interpret_features" value="yes"/>
124 <param name="index_source" value="history"/> 143 <param name="index_source" value="history"/>
125 <param name="ref_file" value="tophat_in1.fasta"/> 144 <param name="ref_file" value="tophat_in1.fasta"/>
126 <param name="out_format" value="fasta"/> 145 <param name="out_format" value="fasta"/>
146 <param name="description_field_delimiter" value="underscore"/>
127 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" /> 147 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
128 </test> 148 </test>
129 </tests> 149 </tests>
130 <help> 150 <help>
131 151
143 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. 163 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
144 - Sequences that fall outside of the range of a line's start and end coordinates. 164 - Sequences that fall outside of the range of a line's start and end coordinates.
145 - Chromosome, start or end coordinates that are invalid for the specified build. 165 - Chromosome, start or end coordinates that are invalid for the specified build.
146 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). 166 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
147 167
148 .. class:: infomark
149
150 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
151
152 ----- 168 -----
153 169
154 **What it does** 170 **What it does**
155 171
156 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. 172 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
173 If the output format is FASTA, the character delimiter can be selected for the fields in the description.
174 For example, selecting an underscore will produce a description like this:
175
176 >gi_31563518_ref_NP_852610.1
177
178 while selecting a vertical bar will produce a description like this:
179
180 >gi|31563518|ref|NP_852610.1
157 181
158 If strand is not defined, the default value is "+". 182 If strand is not defined, the default value is "+".
159 183
160 ----- 184 -----
161 185
165 189
166 chr7 127475281 127475310 NM_000230 0 + 190 chr7 127475281 127475310 NM_000230 0 +
167 chr7 127485994 127486166 NM_000230 0 + 191 chr7 127485994 127486166 NM_000230 0 +
168 chr7 127486011 127486166 D49487 0 + 192 chr7 127486011 127486166 D49487 0 +
169 193
170 Extracting sequences with **FASTA** output data type returns:: 194 Extracting sequences with **FASTA** output data type and **Description Field Delimiter** set to the underscore character returns::
171 195
172 &gt;hg17_chr7_127475281_127475310_+ NM_000230 196 &gt;hg17_chr7_127475281_127475310_+ NM_000230
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG 197 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
174 &gt;hg17_chr7_127485994_127486166_+ NM_000230 198 &gt;hg17_chr7_127485994_127486166_+ NM_000230
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG 199 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
190 214
191 </help> 215 </help>
192 <citations> 216 <citations>
193 <citation type="bibtex"> 217 <citation type="bibtex">
194 @unpublished{None, 218 @unpublished{None,
195 author = {Guru Ananda,Greg Von Kuster}, 219 author = {Guru Ananda},
196 title = {None}, 220 title = {None},
197 year = {None}, 221 year = {None},
198 eprint = {None}, 222 eprint = {None},
199 url = {http://www.bx.psu.edu/~anton/labSite/} 223 url = {http://www.bx.psu.edu/~anton/labSite/}
200 }</citation> 224 }</citation>