annotate extract_genomic_dna.xml @ 7:338e991cdd1f draft

Uploaded
author greg
date Thu, 14 Jan 2016 11:01:00 -0500
parents 3d40351fc9ac
children 59bb87024183
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
2 <description>using coordinates from assembled/unassembled genomes</description>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
3 <requirements>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
4 <requirement type="package" version="35x1">faToTwoBit</requirement>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
5 </requirements>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
6 <command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
7 <![CDATA[
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
8 #set genome = $input.metadata.dbkey
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
9 #set datatype = $input.datatype
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
10 mkdir -p output_dir &&
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
11 python $__tool_directory__/extract_genomic_dna.py
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
12 --input "$input"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
13 --genome "$genome"
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
14 #if isinstance($datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
15 --input_format "gff"
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
16 --columns "1,4,5,7"
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
17 --interpret_features $interpret_features
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
18 #else:
6
3d40351fc9ac Uploaded
greg
parents: 2
diff changeset
19 --input_format "interval"
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
20 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
21 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
22 --reference_genome_source $reference_genome_cond.reference_genome_source
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
23 #if str($reference_genome_cond.reference_genome_source) == "cached"
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
24 --reference_genome $reference_genome_cond.reference_genome.fields.path
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
25 #else:
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
26 --reference_genome $reference_genome_cond.reference_genome
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
27 #end if
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
28 --output_format $output_format
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
29 --output $output
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
30 ]]>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
31 </command>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
32 <inputs>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
33 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in" help="Supported formats are gff, interval">
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
34 <validator type="unspecified_build" />
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
35 </param>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
36 <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is gff">
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
37 <option value="yes">Yes</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
38 <option value="no">No</option>
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
39 </param>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
40 <conditional name="reference_genome_cond">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
41 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
42 <option value="cached">locally cached</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
43 <option value="history">from history</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
44 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
45 <when value="cached">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
46 <param name="reference_genome" type="select" label="Using reference genome">
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
47 <options from_data_table="twobit">
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
48 <filter type="data_meta" key="dbkey" ref="input" column="0"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
49 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
50 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
51 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
52 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
53 <when value="history">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
54 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
55 <options>
1
311febbd33d6 Uploaded
greg
parents: 0
diff changeset
56 <filter type="data_meta" key="dbkey" ref="input"/>
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
57 </options>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
58 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
59 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
60 </when>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
61 </conditional>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
62 <param name="output_format" type="select" label="Select output format">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
63 <option value="fasta" selected="True">fasta</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
64 <option value="interval">interval</option>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
65 </param>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
66 </inputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
67 <outputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
68 <data name="output" format="gff">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
69 <change_format>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
70 <when output_format="interval" format="interval" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
71 </change_format>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
72 </data>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
73 </outputs>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
74 <tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
75 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
76 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
77 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
78 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
79 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
80 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
81 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
82 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
83 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
84 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
85 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
86 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
87 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
88 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
89 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
90 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
91 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
92 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
93 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
94 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
95 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
96 <!-- Test GFF file support. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
97 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
98 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
99 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
100 <param name="index_source" value="cached"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
101 <param name="out_format" value="interval"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
102 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
103 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
104 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
105 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
106 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
107 <param name="out_format" value="fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
108 <param name="index_source" value="cached"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
109 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
110 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
111 <!-- Test custom sequences support and GFF feature interpretation. -->
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
112 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
113 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
114 <param name="interpret_features" value="no"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
115 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
116 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
117 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
118 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
119 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
120 <test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
121 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
122 <param name="interpret_features" value="yes"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
123 <param name="index_source" value="history"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
124 <param name="ref_file" value="tophat_in1.fasta"/>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
125 <param name="out_format" value="fasta"/>
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
126 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
127 </test>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
128 </tests>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
129 <help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
130
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
131 .. class:: warningmark
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
132
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
133 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
134
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
135 .. class:: warningmark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
136
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
137 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
138
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
139 .. class:: warningmark
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
140
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
141 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
142 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
143 - Sequences that fall outside of the range of a line's start and end coordinates.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
144 - Chromosome, start or end coordinates that are invalid for the specified build.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
145 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
146
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
147 .. class:: infomark
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
148
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
149 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
150
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
151 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
152
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
153 **What it does**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
154
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
155 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
156
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
157 If strand is not defined, the default value is "+".
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
158
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
159 -----
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
160
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
161 **Example**
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
162
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
163 If the input dataset is::
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
164
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
165 chr7 127475281 127475310 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
166 chr7 127485994 127486166 NM_000230 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
167 chr7 127486011 127486166 D49487 0 +
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
168
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
169 Extracting sequences with **FASTA** output data type returns::
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
170
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
171 &gt;hg17_chr7_127475281_127475310_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
172 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
173 &gt;hg17_chr7_127485994_127486166_+ NM_000230
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
174 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
175 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
176 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
177 GATCAATGACATTTCACACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
178 &gt;hg17_chr7_127486011_127486166_+ D49487
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
179 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
180 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
181 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
182 ACACG
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
183
7
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
184 Extracting sequences with **Interval** output data type returns::
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
185
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
186 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
187 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
188 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
338e991cdd1f Uploaded
greg
parents: 6
diff changeset
189
0
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
190 </help>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
191 <citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
192 <citation type="bibtex">
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
193 @unpublished{None,
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
194 author = {},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
195 title = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
196 year = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
197 eprint = {None},
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
198 url = {http://www.bx.psu.edu/~anton/labSite/}
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
199 }</citation>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
200 </citations>
cff5b7c9be55 Uploaded
greg
parents:
diff changeset
201 </tool>