0
|
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
|
|
2 <description>using coordinates from assembled/unassembled genomes</description>
|
|
3 <command>
|
|
4 <![CDATA[
|
|
5 #set input_format $input_format_cond.input_format
|
|
6 #set input $input_format_cond.input
|
|
7 #set dbkey = $input.metadata.dbkey
|
|
8 #set datatype = $input.datatype
|
|
9 mkdir -p output_dir &&
|
|
10 python $__tool_directory__/extract_genomic_dna.py
|
|
11 --input_format $input_format
|
|
12 --input "$input"
|
|
13 --dbkey $dbkey
|
|
14 #if str($input_format) == "gff":
|
|
15 --interpret_features $input_format_cond.interpret_features
|
|
16 #end if
|
|
17 #if isinstance($datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
|
|
18 --columns "1,4,5,7"
|
|
19 #else:
|
|
20 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
|
|
21 #end if
|
|
22 --reference_genome_source $reference_genome_cond.reference_genome_source
|
|
23 #if str($reference_genome_cond.reference_genome_source) == "cached"
|
|
24 --reference_genome $reference_genome_cond.reference_genome.fields.path
|
|
25 #else:
|
|
26 --reference_genome $reference_genome_cond.reference_genome
|
|
27 #end if
|
|
28 --output_format $output_format
|
|
29 --output $output
|
|
30 ]]>
|
|
31 </command>
|
|
32 <inputs>
|
|
33 <conditional name="input_format_cond">
|
|
34 <param name="input_format" type="select" label="Input file format">
|
|
35 <option value="gff" selected="True">Gff</option>
|
|
36 <option value="interval">Interval</option>
|
|
37 </param>
|
|
38 <when value="gff">
|
|
39 <param name="input" type="data" format="gff" label="Fetch sequences for intervals in">
|
|
40 <validator type="unspecified_build" />
|
|
41 </param>
|
|
42 <param name="interpret_features" type="select" label="Interpret features when possible">
|
|
43 <option value="yes">Yes</option>
|
|
44 <option value="no">No</option>
|
|
45 </param>
|
|
46 </when>
|
|
47 <when value="interval">
|
|
48 <param name="input" type="data" format="interval" label="Fetch sequences for intervals in">
|
|
49 <validator type="unspecified_build" />
|
|
50 </param>
|
|
51 </when>
|
|
52 </conditional>
|
|
53 <conditional name="reference_genome_cond">
|
|
54 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
|
|
55 <option value="cached">locally cached</option>
|
|
56 <option value="history">from history</option>
|
|
57 </param>
|
|
58 <when value="cached">
|
|
59 <param name="reference_genome" type="select" label="Using reference genome">
|
|
60 <options from_data_table="alignseq_seq">
|
|
61 <filter type="data_meta" key="dbkey" ref="input" column="dbkey"/>
|
|
62 </options>
|
|
63 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
64 </param>
|
|
65 </when>
|
|
66 <when value="history">
|
|
67 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
|
|
68 <options>
|
|
69 <filter type="data_meta" key="dbkey" ref="input_bam" />
|
|
70 </options>
|
|
71 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
|
|
72 </param>
|
|
73 </when>
|
|
74 </conditional>
|
|
75 <param name="output_format" type="select" label="Select output format">
|
|
76 <option value="fasta" selected="True">fasta</option>
|
|
77 <option value="interval">interval</option>
|
|
78 </param>
|
|
79 </inputs>
|
|
80 <outputs>
|
|
81 <data name="output" format="gff">
|
|
82 <change_format>
|
|
83 <when output_format="interval" format="interval" />
|
|
84 </change_format>
|
|
85 </data>
|
|
86 </outputs>
|
|
87 <tests>
|
|
88 <test>
|
|
89 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
90 <param name="interpret_features" value="yes"/>
|
|
91 <param name="index_source" value="cached"/>
|
|
92 <param name="out_format" value="fasta"/>
|
|
93 <output name="out_file1">
|
|
94 <assert_contents>
|
|
95 <!-- First few lines... -->
|
|
96 <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" />
|
|
97 <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" />
|
|
98 <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" />
|
|
99 <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" />
|
|
100 <!-- Last few lines... -->
|
|
101 <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" />
|
|
102 <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" />
|
|
103 <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" />
|
|
104 <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" />
|
|
105 <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" />
|
|
106 </assert_contents>
|
|
107 </output>
|
|
108 </test>
|
|
109 <test>
|
|
110 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
|
|
111 <param name="interpret_features" value="yes"/>
|
|
112 <param name="index_source" value="cached"/>
|
|
113 <param name="out_format" value="fasta"/>
|
|
114 <output name="out_file1" file="extract_genomic_dna_out2.fasta" />
|
|
115 </test>
|
|
116 <test>
|
|
117 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
118 <param name="interpret_features" value="yes"/>
|
|
119 <param name="index_source" value="cached"/>
|
|
120 <param name="out_format" value="interval"/>
|
|
121 <output name="out_file1" file="extract_genomic_dna_out3.interval" />
|
|
122 </test>
|
|
123 <!-- Test GFF file support. -->
|
|
124 <test>
|
|
125 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
126 <param name="interpret_features" value="no"/>
|
|
127 <param name="index_source" value="cached"/>
|
|
128 <param name="out_format" value="interval"/>
|
|
129 <output name="out_file1" file="extract_genomic_dna_out4.gff" />
|
|
130 </test>
|
|
131 <test>
|
|
132 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
133 <param name="interpret_features" value="no"/>
|
|
134 <param name="out_format" value="fasta"/>
|
|
135 <param name="index_source" value="cached"/>
|
|
136 <output name="out_file1" file="extract_genomic_dna_out5.fasta" />
|
|
137 </test>
|
|
138 <!-- Test custom sequences support and GFF feature interpretation. -->
|
|
139 <test>
|
|
140 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
141 <param name="interpret_features" value="no"/>
|
|
142 <param name="index_source" value="history"/>
|
|
143 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
144 <param name="out_format" value="fasta"/>
|
|
145 <output name="out_file1" file="extract_genomic_dna_out6.fasta" />
|
|
146 </test>
|
|
147 <test>
|
|
148 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
149 <param name="interpret_features" value="yes"/>
|
|
150 <param name="index_source" value="history"/>
|
|
151 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
152 <param name="out_format" value="fasta"/>
|
|
153 <output name="out_file1" file="extract_genomic_dna_out7.fasta" />
|
|
154 </test>
|
|
155 </tests>
|
|
156 <help>
|
|
157
|
|
158 .. class:: warningmark
|
|
159
|
|
160 The following will cause a line from the input dataset to be skipped and a warning generated.
|
|
161
|
|
162 - Sequences that fall outside of the range of a line's start and end coordinates.
|
|
163 - Chromosome start or end coordinates that are invalid for the specified build.
|
|
164
|
|
165 -----
|
|
166
|
|
167 **What it does**
|
|
168
|
|
169 This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data.
|
|
170
|
|
171 -----
|
|
172
|
|
173 **Example**
|
|
174
|
|
175 If the input dataset is::
|
|
176
|
|
177 chr7 127475281 127475310 NM_000230 0 +
|
|
178 chr7 127485994 127486166 NM_000230 0 +
|
|
179 chr7 127486011 127486166 D49487 0 +
|
|
180
|
|
181 Extracting sequences returns::
|
|
182
|
|
183 >hg17_chr7_127475281_127475310_+ NM_000230
|
|
184 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
|
|
185 >hg17_chr7_127485994_127486166_+ NM_000230
|
|
186 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
|
|
187 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
|
|
188 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
|
|
189 GATCAATGACATTTCACACACG
|
|
190 >hg17_chr7_127486011_127486166_+ D49487
|
|
191 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
|
|
192 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
|
|
193 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
|
|
194 ACACG
|
|
195
|
|
196 </help>
|
|
197 <citations>
|
|
198 <citation type="bibtex">
|
|
199 @unpublished{None,
|
|
200 author = {},
|
|
201 title = {None},
|
|
202 year = {None},
|
|
203 eprint = {None},
|
|
204 url = {http://www.bx.psu.edu/~anton/labSite/}
|
|
205 }</citation>
|
|
206 </citations>
|
|
207 </tool>
|