0
|
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
|
|
2 <description>using coordinates from assembled/unassembled genomes</description>
|
1
|
3 <requirements>
|
|
4 <requirement type="package" version="35x1">faToTwoBit</requirement>
|
|
5 </requirements>
|
0
|
6 <command>
|
|
7 <![CDATA[
|
2
|
8 #set input_format = $input.ext
|
1
|
9 #set genome = $input.metadata.dbkey
|
0
|
10 #set datatype = $input.datatype
|
|
11 mkdir -p output_dir &&
|
|
12 python $__tool_directory__/extract_genomic_dna.py
|
|
13 --input_format $input_format
|
|
14 --input "$input"
|
1
|
15 --genome "$genome"
|
0
|
16 #if str($input_format) == "gff":
|
1
|
17 --interpret_features $interpret_features
|
0
|
18 #end if
|
|
19 #if isinstance($datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
|
|
20 --columns "1,4,5,7"
|
|
21 #else:
|
|
22 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
|
|
23 #end if
|
|
24 --reference_genome_source $reference_genome_cond.reference_genome_source
|
|
25 #if str($reference_genome_cond.reference_genome_source) == "cached"
|
|
26 --reference_genome $reference_genome_cond.reference_genome.fields.path
|
|
27 #else:
|
|
28 --reference_genome $reference_genome_cond.reference_genome
|
|
29 #end if
|
|
30 --output_format $output_format
|
|
31 --output $output
|
|
32 ]]>
|
|
33 </command>
|
|
34 <inputs>
|
1
|
35 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in" help="Supported formats are gff, interval">
|
|
36 <validator type="unspecified_build" />
|
|
37 </param>
|
|
38 <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is gff">
|
|
39 <option value="yes">Yes</option>
|
|
40 <option value="no">No</option>
|
|
41 </param>
|
0
|
42 <conditional name="reference_genome_cond">
|
|
43 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
|
|
44 <option value="cached">locally cached</option>
|
|
45 <option value="history">from history</option>
|
|
46 </param>
|
|
47 <when value="cached">
|
|
48 <param name="reference_genome" type="select" label="Using reference genome">
|
1
|
49 <options from_data_table="twobit">
|
|
50 <filter type="data_meta" key="dbkey" ref="input" column="0"/>
|
0
|
51 </options>
|
|
52 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
53 </param>
|
|
54 </when>
|
|
55 <when value="history">
|
|
56 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
|
|
57 <options>
|
1
|
58 <filter type="data_meta" key="dbkey" ref="input"/>
|
0
|
59 </options>
|
|
60 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
|
|
61 </param>
|
|
62 </when>
|
|
63 </conditional>
|
|
64 <param name="output_format" type="select" label="Select output format">
|
|
65 <option value="fasta" selected="True">fasta</option>
|
|
66 <option value="interval">interval</option>
|
|
67 </param>
|
|
68 </inputs>
|
|
69 <outputs>
|
|
70 <data name="output" format="gff">
|
|
71 <change_format>
|
|
72 <when output_format="interval" format="interval" />
|
|
73 </change_format>
|
|
74 </data>
|
|
75 </outputs>
|
|
76 <tests>
|
|
77 <test>
|
|
78 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
79 <param name="interpret_features" value="yes"/>
|
|
80 <param name="index_source" value="cached"/>
|
|
81 <param name="out_format" value="fasta"/>
|
|
82 <output name="out_file1">
|
|
83 <assert_contents>
|
|
84 <!-- First few lines... -->
|
|
85 <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" />
|
|
86 <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" />
|
|
87 <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" />
|
|
88 <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" />
|
|
89 <!-- Last few lines... -->
|
|
90 <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" />
|
|
91 <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" />
|
|
92 <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" />
|
|
93 <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" />
|
|
94 <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" />
|
|
95 </assert_contents>
|
|
96 </output>
|
|
97 </test>
|
|
98 <test>
|
|
99 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
|
|
100 <param name="interpret_features" value="yes"/>
|
|
101 <param name="index_source" value="cached"/>
|
|
102 <param name="out_format" value="fasta"/>
|
|
103 <output name="out_file1" file="extract_genomic_dna_out2.fasta" />
|
|
104 </test>
|
|
105 <test>
|
|
106 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
107 <param name="interpret_features" value="yes"/>
|
|
108 <param name="index_source" value="cached"/>
|
|
109 <param name="out_format" value="interval"/>
|
|
110 <output name="out_file1" file="extract_genomic_dna_out3.interval" />
|
|
111 </test>
|
|
112 <!-- Test GFF file support. -->
|
|
113 <test>
|
|
114 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
115 <param name="interpret_features" value="no"/>
|
|
116 <param name="index_source" value="cached"/>
|
|
117 <param name="out_format" value="interval"/>
|
|
118 <output name="out_file1" file="extract_genomic_dna_out4.gff" />
|
|
119 </test>
|
|
120 <test>
|
|
121 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
122 <param name="interpret_features" value="no"/>
|
|
123 <param name="out_format" value="fasta"/>
|
|
124 <param name="index_source" value="cached"/>
|
|
125 <output name="out_file1" file="extract_genomic_dna_out5.fasta" />
|
|
126 </test>
|
|
127 <!-- Test custom sequences support and GFF feature interpretation. -->
|
|
128 <test>
|
|
129 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
130 <param name="interpret_features" value="no"/>
|
|
131 <param name="index_source" value="history"/>
|
|
132 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
133 <param name="out_format" value="fasta"/>
|
|
134 <output name="out_file1" file="extract_genomic_dna_out6.fasta" />
|
|
135 </test>
|
|
136 <test>
|
|
137 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
138 <param name="interpret_features" value="yes"/>
|
|
139 <param name="index_source" value="history"/>
|
|
140 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
141 <param name="out_format" value="fasta"/>
|
|
142 <output name="out_file1" file="extract_genomic_dna_out7.fasta" />
|
|
143 </test>
|
|
144 </tests>
|
|
145 <help>
|
|
146
|
|
147 .. class:: warningmark
|
|
148
|
|
149 The following will cause a line from the input dataset to be skipped and a warning generated.
|
|
150
|
|
151 - Sequences that fall outside of the range of a line's start and end coordinates.
|
|
152 - Chromosome start or end coordinates that are invalid for the specified build.
|
|
153
|
|
154 -----
|
|
155
|
|
156 **What it does**
|
|
157
|
|
158 This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data.
|
|
159
|
|
160 -----
|
|
161
|
|
162 **Example**
|
|
163
|
|
164 If the input dataset is::
|
|
165
|
|
166 chr7 127475281 127475310 NM_000230 0 +
|
|
167 chr7 127485994 127486166 NM_000230 0 +
|
|
168 chr7 127486011 127486166 D49487 0 +
|
|
169
|
|
170 Extracting sequences returns::
|
|
171
|
|
172 >hg17_chr7_127475281_127475310_+ NM_000230
|
|
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
|
|
174 >hg17_chr7_127485994_127486166_+ NM_000230
|
|
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
|
|
176 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
|
|
177 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
|
|
178 GATCAATGACATTTCACACACG
|
|
179 >hg17_chr7_127486011_127486166_+ D49487
|
|
180 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
|
|
181 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
|
|
182 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
|
|
183 ACACG
|
|
184
|
|
185 </help>
|
|
186 <citations>
|
|
187 <citation type="bibtex">
|
|
188 @unpublished{None,
|
|
189 author = {},
|
|
190 title = {None},
|
|
191 year = {None},
|
|
192 eprint = {None},
|
|
193 url = {http://www.bx.psu.edu/~anton/labSite/}
|
|
194 }</citation>
|
|
195 </citations>
|
|
196 </tool>
|