comparison extract_genomic_dna.xml @ 7:338e991cdd1f draft

Uploaded
author greg
date Thu, 14 Jan 2016 11:01:00 -0500
parents 3d40351fc9ac
children 59bb87024183
comparison
equal deleted inserted replaced
6:3d40351fc9ac 7:338e991cdd1f
75 <test> 75 <test>
76 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> 76 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
77 <param name="interpret_features" value="yes"/> 77 <param name="interpret_features" value="yes"/>
78 <param name="index_source" value="cached"/> 78 <param name="index_source" value="cached"/>
79 <param name="out_format" value="fasta"/> 79 <param name="out_format" value="fasta"/>
80 <output name="out_file1" file="extract_genomic_dna_out1.fasta"> 80 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
81 <assert_contents>
82 <!-- First few lines... -->
83 <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" />
84 <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" />
85 <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" />
86 <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" />
87 <!-- Last few lines... -->
88 <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" />
89 <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" />
90 <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" />
91 <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" />
92 <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" />
93 </assert_contents>
94 </output>
95 </test> 81 </test>
96 <test> 82 <test>
97 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> 83 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
98 <param name="interpret_features" value="yes"/> 84 <param name="interpret_features" value="yes"/>
99 <param name="index_source" value="cached"/> 85 <param name="index_source" value="cached"/>
100 <param name="out_format" value="fasta"/> 86 <param name="out_format" value="fasta"/>
101 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains"/> 87 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
102 </test> 88 </test>
103 <test> 89 <test>
104 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> 90 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
105 <param name="interpret_features" value="yes"/> 91 <param name="interpret_features" value="yes"/>
106 <param name="index_source" value="cached"/> 92 <param name="index_source" value="cached"/>
107 <param name="out_format" value="interval"/> 93 <param name="out_format" value="interval"/>
108 <output name="out_file1" file="extract_genomic_dna_out3.interval" /> 94 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
109 </test> 95 </test>
110 <!-- Test GFF file support. --> 96 <!-- Test GFF file support. -->
111 <test> 97 <test>
112 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> 98 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
113 <param name="interpret_features" value="no"/> 99 <param name="interpret_features" value="no"/>
114 <param name="index_source" value="cached"/> 100 <param name="index_source" value="cached"/>
115 <param name="out_format" value="interval"/> 101 <param name="out_format" value="interval"/>
116 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains"/> 102 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
117 </test> 103 </test>
118 <test> 104 <test>
119 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> 105 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
120 <param name="interpret_features" value="no"/> 106 <param name="interpret_features" value="no"/>
121 <param name="out_format" value="fasta"/> 107 <param name="out_format" value="fasta"/>
122 <param name="index_source" value="cached"/> 108 <param name="index_source" value="cached"/>
123 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains"/> 109 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
124 </test> 110 </test>
125 <!-- Test custom sequences support and GFF feature interpretation. --> 111 <!-- Test custom sequences support and GFF feature interpretation. -->
126 <test> 112 <test>
127 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> 113 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
128 <param name="interpret_features" value="no"/> 114 <param name="interpret_features" value="no"/>
129 <param name="index_source" value="history"/> 115 <param name="index_source" value="history"/>
130 <param name="ref_file" value="tophat_in1.fasta"/> 116 <param name="ref_file" value="tophat_in1.fasta"/>
131 <param name="out_format" value="fasta"/> 117 <param name="out_format" value="fasta"/>
132 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains"/> 118 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
133 </test> 119 </test>
134 <test> 120 <test>
135 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> 121 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
136 <param name="interpret_features" value="yes"/> 122 <param name="interpret_features" value="yes"/>
137 <param name="index_source" value="history"/> 123 <param name="index_source" value="history"/>
138 <param name="ref_file" value="tophat_in1.fasta"/> 124 <param name="ref_file" value="tophat_in1.fasta"/>
139 <param name="out_format" value="fasta"/> 125 <param name="out_format" value="fasta"/>
140 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains"/> 126 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
141 </test> 127 </test>
142 </tests> 128 </tests>
143 <help> 129 <help>
144 130
145 .. class:: warningmark 131 .. class:: warningmark
146 132
147 The following will cause a line from the input dataset to be skipped and a warning generated. 133 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
148 134
149 - Sequences that fall outside of the range of a line's start and end coordinates. 135 .. class:: warningmark
150 - Chromosome start or end coordinates that are invalid for the specified build. 136
137 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
138
139 .. class:: warningmark
140
141 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
142 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
143 - Sequences that fall outside of the range of a line's start and end coordinates.
144 - Chromosome, start or end coordinates that are invalid for the specified build.
145 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
146
147 .. class:: infomark
148
149 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
151 150
152 ----- 151 -----
153 152
154 **What it does** 153 **What it does**
155 154
156 This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data. 155 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
156
157 If strand is not defined, the default value is "+".
157 158
158 ----- 159 -----
159 160
160 **Example** 161 **Example**
161 162
163 164
164 chr7 127475281 127475310 NM_000230 0 + 165 chr7 127475281 127475310 NM_000230 0 +
165 chr7 127485994 127486166 NM_000230 0 + 166 chr7 127485994 127486166 NM_000230 0 +
166 chr7 127486011 127486166 D49487 0 + 167 chr7 127486011 127486166 D49487 0 +
167 168
168 Extracting sequences returns:: 169 Extracting sequences with **FASTA** output data type returns::
169 170
170 &gt;hg17_chr7_127475281_127475310_+ NM_000230 171 &gt;hg17_chr7_127475281_127475310_+ NM_000230
171 GTAGGAATCGCAGCGCCAGCGGTTGCAAG 172 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
172 &gt;hg17_chr7_127485994_127486166_+ NM_000230 173 &gt;hg17_chr7_127485994_127486166_+ NM_000230
173 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG 174 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
178 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG 179 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
179 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA 180 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
180 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC 181 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
181 ACACG 182 ACACG
182 183
184 Extracting sequences with **Interval** output data type returns::
185
186 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
187 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
188 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
189
183 </help> 190 </help>
184 <citations> 191 <citations>
185 <citation type="bibtex"> 192 <citation type="bibtex">
186 @unpublished{None, 193 @unpublished{None,
187 author = {}, 194 author = {},