annotate extract_genomic_dna.xml @ 13:fe88f4eeaddc draft

Uploaded
author greg
date Wed, 20 Jan 2016 07:37:31 -0500
parents 9886ad53474a
children 1a10864abc1f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
2 <description>using coordinates from assembled/unassembled genomes</description>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
3 <requirements>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
4 <requirement type="package" version="0.7.1">bx-python</requirement>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
5 <requirement type="package" version="35x1">faToTwoBit</requirement>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
6 </requirements>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
7 <command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
8 <![CDATA[
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
9 #set genome = $input.metadata.dbkey
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
10 #set datatype = $input.datatype
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
11 mkdir -p output_dir &&
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
12 python $__tool_directory__/extract_genomic_dna.py
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
13 --input "$input"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
14 --genome "$genome"
12
9886ad53474a Uploaded
greg
parents: 10
diff changeset
15 #if $input.is_of_type("gff"):
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
16 --input_format "gff"
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
17 --columns "1,4,5,7"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
18 --interpret_features $interpret_features
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
19 #else:
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
20 --input_format "interval"
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
21 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
22 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
23 --reference_genome_source $reference_genome_cond.reference_genome_source
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
24 #if str($reference_genome_cond.reference_genome_source) == "cached"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
25 --reference_genome $reference_genome_cond.reference_genome.fields.path
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
26 #else:
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
27 --reference_genome $reference_genome_cond.reference_genome
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
28 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
29 --output_format $output_format
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
30 --output $output
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
31 ]]>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
32 </command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
33 <inputs>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
35 <validator type="unspecified_build" />
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
36 </param>
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
37 <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is in the gff family">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
38 <option value="yes">Yes</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
39 <option value="no">No</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
40 </param>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
41 <conditional name="reference_genome_cond">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
42 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
43 <option value="cached">locally cached</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
44 <option value="history">from history</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
45 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
46 <when value="cached">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
47 <param name="reference_genome" type="select" label="Using reference genome">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
48 <options from_data_table="twobit">
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
49 <filter type="data_meta" key="dbkey" ref="input" column="0"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
50 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
51 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
52 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
53 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
54 <when value="history">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
55 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
56 <options>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
57 <filter type="data_meta" key="dbkey" ref="input"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
58 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
60 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
61 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
62 </conditional>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
63 <param name="output_format" type="select" label="Select output format">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
64 <option value="fasta" selected="True">fasta</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
65 <option value="interval">interval</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
66 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
67 </inputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
68 <outputs>
13
fe88f4eeaddc Uploaded
greg
parents: 12
diff changeset
69 <data format_source="input" name="output" metadata_source="input">
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
70 <change_format>
13
fe88f4eeaddc Uploaded
greg
parents: 12
diff changeset
71 <when input="output_format" value="fasta" format="fasta" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
72 </change_format>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
73 </data>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
74 </outputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
75 <tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
76 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
77 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
78 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
79 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
80 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
81 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
82 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
83 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
84 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
85 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
86 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
87 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
88 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
89 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
90 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
91 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
92 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
93 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
94 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
95 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
96 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
97 <!-- Test GFF file support. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
98 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
99 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
100 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
101 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
102 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
103 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
104 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
105 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
106 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
107 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
108 <param name="out_format" value="fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
109 <param name="index_source" value="cached"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
110 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
111 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
112 <!-- Test custom sequences support and GFF feature interpretation. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
113 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
114 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
115 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
116 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
117 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
118 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
119 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
120 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
121 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
122 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
123 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
124 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
125 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
126 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
127 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
128 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
129 </tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
130 <help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
131
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
132 .. class:: warningmark
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
133
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
134 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
135
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
136 .. class:: warningmark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
137
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
138 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
139
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
140 .. class:: warningmark
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
141
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
142 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
143 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
144 - Sequences that fall outside of the range of a line's start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
145 - Chromosome, start or end coordinates that are invalid for the specified build.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
146 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
147
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
148 .. class:: infomark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
149
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
150 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
151
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
152 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
153
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
154 **What it does**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
155
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
156 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
157
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
158 If strand is not defined, the default value is "+".
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
159
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
160 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
161
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
162 **Example**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
163
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
164 If the input dataset is::
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
165
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
166 chr7 127475281 127475310 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
167 chr7 127485994 127486166 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
168 chr7 127486011 127486166 D49487 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
169
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
170 Extracting sequences with **FASTA** output data type returns::
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
171
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
172 &gt;hg17_chr7_127475281_127475310_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
174 &gt;hg17_chr7_127485994_127486166_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
176 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
177 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
178 GATCAATGACATTTCACACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
179 &gt;hg17_chr7_127486011_127486166_+ D49487
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
180 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
181 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
182 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
183 ACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
184
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
185 Extracting sequences with **Interval** output data type returns::
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
186
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
187 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
188 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
189 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
190
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
191 </help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
192 <citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
193 <citation type="bibtex">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
194 @unpublished{None,
10
59bb87024183 Uploaded
greg
parents: 7
diff changeset
195 author = {Guru Ananda,Greg Von Kuster},
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
196 title = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
197 year = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
198 eprint = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
199 url = {http://www.bx.psu.edu/~anton/labSite/}
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
200 }</citation>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
201 </citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
202 </tool>