annotate extract_genomic_dna.xml @ 12:9886ad53474a draft

Uploaded
author greg
date Tue, 19 Jan 2016 21:48:00 -0500
parents 59bb87024183
children fe88f4eeaddc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
2 <description>using coordinates from assembled/unassembled genomes</description>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
3 <requirements>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
4 <requirement type="package" version="0.7.1">bx-python</requirement>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
5 <requirement type="package" version="35x1">faToTwoBit</requirement>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
6 </requirements>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
7 <command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
8 <![CDATA[
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
9 #set genome = $input.metadata.dbkey
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
10 #set datatype = $input.datatype
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
11 mkdir -p output_dir &&
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
12 python $__tool_directory__/extract_genomic_dna.py
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
13 --input "$input"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
14 --genome "$genome"
12
9886ad53474a Uploaded
greg
parents: 10
diff changeset
15 #if $input.is_of_type("gff"):
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
16 --input_format "gff"
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
17 --columns "1,4,5,7"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
18 --interpret_features $interpret_features
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
19 #else:
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
20 --input_format "interval"
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
21 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
22 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
23 --reference_genome_source $reference_genome_cond.reference_genome_source
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
24 #if str($reference_genome_cond.reference_genome_source) == "cached"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
25 --reference_genome $reference_genome_cond.reference_genome.fields.path
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
26 #else:
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
27 --reference_genome $reference_genome_cond.reference_genome
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
28 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
29 --output_format $output_format
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
30 --output $output
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
31 ]]>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
32 </command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
33 <inputs>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
35 <validator type="unspecified_build" />
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
36 </param>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
37 <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is in the gff family">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
38 <option value="yes">Yes</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
39 <option value="no">No</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
40 </param>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
41 <conditional name="reference_genome_cond">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
42 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
43 <option value="cached">locally cached</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
44 <option value="history">from history</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
45 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
46 <when value="cached">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
47 <param name="reference_genome" type="select" label="Using reference genome">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
48 <options from_data_table="twobit">
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
49 <filter type="data_meta" key="dbkey" ref="input" column="0"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
50 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
51 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
52 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
53 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
54 <when value="history">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
55 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
56 <options>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
57 <filter type="data_meta" key="dbkey" ref="input"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
58 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
60 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
61 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
62 </conditional>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
63 <param name="output_format" type="select" label="Select output format">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
64 <option value="fasta" selected="True">fasta</option>
12
9886ad53474a Uploaded
greg
parents: 10
diff changeset
65 <option value="gff">gff</option>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
66 <option value="interval">interval</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
67 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
68 </inputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
69 <outputs>
12
9886ad53474a Uploaded
greg
parents: 10
diff changeset
70 <data name="output" format="fasta">
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
71 <change_format>
12
9886ad53474a Uploaded
greg
parents: 10
diff changeset
72 <when input="output_format" value="gff" format="gff" />
9886ad53474a Uploaded
greg
parents: 10
diff changeset
73 <when input="output_format" value="interval" format="interval" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
74 </change_format>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
75 </data>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
76 </outputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
77 <tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
78 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
79 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
80 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
81 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
82 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
83 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
84 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
85 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
86 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
87 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
88 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
89 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
90 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
91 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
92 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
93 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
94 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
95 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
96 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
97 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
98 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
99 <!-- Test GFF file support. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
100 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
101 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
102 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
103 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
104 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
105 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
106 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
107 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
108 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
109 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
110 <param name="out_format" value="fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
111 <param name="index_source" value="cached"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
112 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
113 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
114 <!-- Test custom sequences support and GFF feature interpretation. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
115 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
116 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
117 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
118 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
119 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
120 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
121 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
122 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
123 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
124 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
125 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
126 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
127 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
128 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
129 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
130 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
131 </tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
132 <help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
133
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
134 .. class:: warningmark
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
135
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
136 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
137
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
138 .. class:: warningmark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
139
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
140 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
141
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
142 .. class:: warningmark
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
143
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
144 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
145 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
146 - Sequences that fall outside of the range of a line's start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
147 - Chromosome, start or end coordinates that are invalid for the specified build.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
148 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
149
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
150 .. class:: infomark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
151
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
152 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
153
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
154 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
155
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
156 **What it does**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
157
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
158 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
159
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
160 If strand is not defined, the default value is "+".
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
161
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
162 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
163
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
164 **Example**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
165
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
166 If the input dataset is::
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
167
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
168 chr7 127475281 127475310 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
169 chr7 127485994 127486166 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
170 chr7 127486011 127486166 D49487 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
171
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
172 Extracting sequences with **FASTA** output data type returns::
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
173
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
174 &gt;hg17_chr7_127475281_127475310_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
175 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
176 &gt;hg17_chr7_127485994_127486166_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
177 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
178 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
179 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
180 GATCAATGACATTTCACACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
181 &gt;hg17_chr7_127486011_127486166_+ D49487
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
182 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
183 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
184 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
185 ACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
186
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
187 Extracting sequences with **Interval** output data type returns::
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
188
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
189 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
190 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
191 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
192
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
193 </help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
194 <citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
195 <citation type="bibtex">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
196 @unpublished{None,
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
197 author = {Guru Ananda,Greg Von Kuster},
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
198 title = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
199 year = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
200 eprint = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
201 url = {http://www.bx.psu.edu/~anton/labSite/}
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
202 }</citation>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
203 </citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
204 </tool>