comparison tophat2_wrapper.xml @ 1:0d9d5dede10b draft

planemo upload commit a52cc16ed8d0d60e99742b55fccbdedcbb64b82c
author devteam
date Wed, 13 May 2015 09:23:55 -0400
parents 2c9b355b83f4
children 7f44f7ee7ab3
comparison
equal deleted inserted replaced
0:2c9b355b83f4 1:0d9d5dede10b
1 <tool id="tophat2" name="Tophat2" version="0.6"> 1 <tool id="tophat2" name="Tophat" version="0.9">
2 <!-- Wrapper compatible with Tophat version 2.0.0+ --> 2 <!-- Wrapper compatible with Tophat version 2.0.0+ -->
3 <description>Gapped-read mapper for RNA-seq data</description> 3 <description>Gapped-read mapper for RNA-seq data</description>
4 <version_command>tophat2 --version</version_command> 4 <version_command>tophat2 --version</version_command>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="0.1.18">samtools</requirement> 6 <requirement type="package" version="2.2.5">bowtie2</requirement>
7 <requirement type="package" version="2.1.0">bowtie2</requirement> 7 <requirement type="package" version="2.0.14">tophat</requirement>
8 <requirement type="package" version="2.0.9">tophat2</requirement>
9 </requirements> 8 </requirements>
10 9
11 <command> 10 <command>
12 ## 11 ##
13 ## Set path to index, building the reference if necessary. 12 ## Set path to index, building the reference if necessary.
109 --rg-platform "$readGroup.rgpl" 108 --rg-platform "$readGroup.rgpl"
110 --rg-sample "$readGroup.rgsm" 109 --rg-sample "$readGroup.rgsm"
111 #end if 110 #end if
112 111
113 ## Set index path, inputs and parameters specific to paired data. 112 ## Set index path, inputs and parameters specific to paired data.
114 #if $singlePaired.sPaired == "paired" 113 #if $singlePaired.sPaired != "single"
115 -r $singlePaired.mate_inner_distance 114 -r $singlePaired.mate_inner_distance
116 --mate-std-dev=$singlePaired.mate_std_dev 115 --mate-std-dev=$singlePaired.mate_std_dev
117 116
118 #if str($singlePaired.report_discordant_pairs) == "No": 117 #if str($singlePaired.report_discordant_pairs) == "No":
119 --no-discordant 118 --no-discordant
120 #end if 119 #end if
121 120
122 ${index_path} $singlePaired.input1 $singlePaired.input2 121 #if $singlePaired.sPaired == "paired"
122 ${index_path} "$singlePaired.input1" "$singlePaired.input2"
123 #else
124 ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse"
125 #end if
123 #else 126 #else
124 ${index_path} $singlePaired.input1 127 ${index_path} "$singlePaired.input1"
125 #end if 128 #end if
126 </command> 129 </command>
127 130
128 <inputs> 131 <inputs>
129 <conditional name="singlePaired"> 132 <conditional name="singlePaired">
130 <param name="sPaired" type="select" label="Is this library mate-paired?"> 133 <param name="sPaired" type="select" label="Is this single-end or paired-end data?">
131 <option value="single">Single-end</option> 134 <option value="single">Single-end</option>
132 <option value="paired">Paired-end</option> 135 <option value="paired">Paired-end (as individual datasets)</option>
136 <option value="paired_collection">Paired-end (as collection)</option>
133 </param> 137 </param>
134 <when value="single"> 138 <when value="single">
135 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> 139 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
136 </when> 140 </when>
137 <when value="paired"> 141 <when value="paired">
138 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 142 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Must have Sanger-scaled quality values with ASCII offset 33" />
139 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 143 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Must have Sanger-scaled quality values with ASCII offset 33" />
140 <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" /> 144 <expand macro="paired_parameters" />
141 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> 145 </when>
142 <!-- Discordant pairs. --> 146 <when value="paired_collection">
143 <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?"> 147 <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ paired reads" help="Must have Sanger-scaled quality values with ASCII offset 33" />
144 <option value="No">No</option> 148 <expand macro="paired_parameters" />
145 <option selected="True" value="Yes">Yes</option>
146 </param>
147 </when> 149 </when>
148 </conditional> 150 </conditional>
149 <expand macro="refGenomeSourceConditional"> 151 <expand macro="refGenomeSourceConditional">
150 <options from_data_table="tophat2_indexes"> 152 <options from_data_table="tophat2_indexes">
151 <filter type="sort_by" column="2"/> 153 <filter type="sort_by" column="2"/>
158 <option value="full">Full parameter list</option> 160 <option value="full">Full parameter list</option>
159 </param> 161 </param>
160 <when value="preSet" /> 162 <when value="preSet" />
161 <!-- Full/advanced params. --> 163 <!-- Full/advanced params. -->
162 <when value="full"> 164 <when value="full">
163 <param name="read_realign_edit_dist" type="integer" value="1000" label="Max realign edit distance" help="Some of the reads spanning multiple exons may be mapped incorrectly as a contiguous alignment to the genome even though the correct alignment should be a spliced one - this can happen in the presence of processed pseudogenes that are rarely (if at all) transcribed or expressed. This option can direct TopHat to re-align reads for which the edit distance of an alignment obtained in a previous mapping step is above or equal to this option value. If you set this option to 0, TopHat will map every read in all the mapping steps (transcriptome if you provided gene annotations, genome, and finally splice variants detected by TopHat), reporting the best possible alignment found in any of these mapping steps. This may greatly increase the mapping accuracy at the expense of an increase in running time. The default value for this option is set such that TopHat will not try to realign reads already mapped in earlier steps." /> 165 <param name="read_realign_edit_dist" type="integer" value="1000" label="Max realign edit distance" help="--read-realign-edit-dist; Some of the reads spanning multiple exons may be mapped incorrectly as a contiguous alignment to the genome even though the correct alignment should be a spliced one - this can happen in the presence of processed pseudogenes that are rarely (if at all) transcribed or expressed. This option can direct TopHat to re-align reads for which the edit distance of an alignment obtained in a previous mapping step is above or equal to this option value. If you set this option to 0, TopHat will map every read in all the mapping steps (transcriptome if you provided gene annotations, genome, and finally splice variants detected by TopHat), reporting the best possible alignment found in any of these mapping steps. This may greatly increase the mapping accuracy at the expense of an increase in running time. The default value for this option is set such that TopHat will not try to realign reads already mapped in earlier steps." />
164 166
165 <param name="read_edit_dist" type="integer" value="2" label="Max edit distance" help="Final read alignments having more than these many edit distance are discarded." /> 167 <param name="read_edit_dist" type="integer" value="2" label="Max edit distance" help="--read-edit-dist; Final read alignments having more than these many edit distance are discarded." />
166 168
167 <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> 169 <param name="library_type" type="select" label="Library Type" help="--library-type; TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
168 <option value="fr-unstranded">FR Unstranded</option> 170 <option value="fr-unstranded">FR Unstranded</option>
169 <option value="fr-firststrand">FR First Strand</option> 171 <option value="fr-firststrand">FR First Strand</option>
170 <option value="fr-secondstrand">FR Second Strand</option> 172 <option value="fr-secondstrand">FR Second Strand</option>
171 </param> 173 </param>
172 <param name="read_mismatches" type="integer" value="2" label="Final read mismatches" help="Final read alignments having more than these many mismatches are discarded." /> 174 <param name="read_mismatches" type="integer" value="2" label="Final read mismatches" help="--read-mismatches; Final read alignments having more than these many mismatches are discarded." />
173 <param name="bowtie_n" type="select" label="Use bowtie -n mode"> 175 <param name="bowtie_n" type="select" label="Use bowtie -n mode" help="--bowtie-n; TopHat uses &quot;-v&quot; in Bowtie for initial read mapping (the default), but with this option, &quot;-n&quot; is used instead. Read segments are always mapped using &quot;-v&quot; option.">
174 <option selected="true" value="No">No</option> 176 <option selected="true" value="No">No</option>
175 <option value="Yes">Yes</option> 177 <option value="Yes">Yes</option>
176 </param> 178 </param>
177 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> 179 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="-a/--min-anchor-length; TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one read with this many bases on each side. This must be at least 3 and the default is 8." />
178 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> 180 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" help="-m/--splice-mismatches; The default is 0."/>
179 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> 181 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="-i/--min-intron-length; TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70." />
180 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> 182 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="-I/--max-intron-length; When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000." />
183
181 <expand macro="indel_searchConditional" /> 184 <expand macro="indel_searchConditional" />
182 alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /> 185
183 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> 186 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" help="-g/--max-multihits; Instructs TopHat to allow up to this many alignments to the reference for a given read, and choose the alignments based on their alignment scores if there are more than this number. The default is 20 for read mapping. Unless you use --report-secondary-alignments, TopHat will report the alignments with the best alignment score. If there are more alignments with the same score than this number, TopHat will randomly report only this many alignments. In case of using --report-secondary-alignments, TopHat will try to report alignments up to this option value, and TopHat may randomly output some of the alignments with the same score to meet this number."/>
184 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> 187 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" help="--min-segment-intron; The minimum intron length that may be found during split-segment search. The default is 50."/>
185 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> 188 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" help="--max-segment-intron; The maximum intron length that may be found during split-segment search. The default is 500000."/>
186 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> 189 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" help="--segment-mismatches; Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2."/>
187 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> 190 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" help="--segment-length; Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25."/>
188 191 <param name="output_unmapped" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output unmapped reads" help="If checked, a BAM with the unmapped reads will be added to the history" />
189 <!-- Options for supplying own junctions. --> 192 <!-- Options for supplying own junctions. -->
190 <expand macro="own_junctionsConditional" /> 193 <expand macro="own_junctionsConditional" />
191 <!-- Coverage search. --> 194 <!-- Coverage search. -->
192 <conditional name="coverage_search"> 195 <conditional name="coverage_search">
193 <param name="use_search" type="select" label="Use Coverage Search" help="Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity."> 196 <param name="use_search" type="select" label="Use Coverage Search" help="Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.">
194 <option selected="true" value="No">No</option> 197 <option selected="true" value="No">No</option>
195 <option value="Yes">Yes</option> 198 <option value="Yes">Yes</option>
196 </param> 199 </param>
197 <when value="Yes"> 200 <when value="Yes">
198 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> 201 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" help="--min-coverage-intron; The minimum intron length that may be found during coverage search. The default is 50."/>
199 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> 202 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" help="--max-coverage-intron; The maximum intron length that may be found during coverage search. The default is 20000."/>
200 </when> 203 </when>
201 <when value="No" /> 204 <when value="No" />
202 </conditional> 205 </conditional>
203 206
204 <!-- Microexon search params --> 207 <!-- Microexon search params -->
205 <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."> 208 <param name="microexon_search" type="select" label="Use Microexon Search" help="--microexon-search; With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
206 <option value="No">No</option> 209 <option value="No">No</option>
207 <option value="Yes">Yes</option> 210 <option value="Yes">Yes</option>
208 </param> 211 </param>
209 212
210 <!-- Fusion mapping. --> 213 <!-- Fusion mapping. -->
211 <conditional name="fusion_search"> 214 <conditional name="fusion_search">
212 <param name="do_search" type="select" label="Do Fusion Search"> 215 <param name="do_search" type="select" label="Do Fusion Search" help="Reads can be aligned to potential fusion transcripts if the --fusion-search option is specified. The fusion alignments are reported in SAM format using custom fields XF and XP (see the output format) and some additional information about fusions will be reported (see fusions.out). Once mapping is done, you can run tophat-fusion-post to filter out fusion transcripts (see the TopHat-Fusion website for more details).">
213 <option selected="true" value="No">No</option> 216 <option selected="true" value="No">No</option>
214 <option value="Yes">Yes</option> 217 <option value="Yes">Yes</option>
215 </param> 218 </param>
216 <when value="No" /> 219 <when value="No" />
217 <when value="Yes"> 220 <when value="Yes">
218 <param name="anchor_len" type="integer" value="20" label="Anchor Length" help="A 'supporting' read must map to both sides of a fusion by at least this many bases."/> 221 <param name="anchor_len" type="integer" value="20" label="Anchor Length" help="--fusion-anchor-length; A 'supporting' read must map to both sides of a fusion by at least this many bases. The default is 20."/>
219 <param name="min_dist" type="integer" value="10000000" label="Minimum Distance" help="For intra-chromosomal fusions, TopHat-Fusion tries to find fusions separated by at least this distance."/> 222 <param name="min_dist" type="integer" value="10000000" label="Minimum Distance" help="--fusion-min-dist; For intra-chromosomal fusions, TopHat-Fusion tries to find fusions separated by at least this distance. The default is 10000000."/>
220 <param name="read_mismatches" type="integer" value="2" label="Read Mismatches" help="Reads support fusions if they map across fusion with at most this many mismatches."/> 223 <param name="read_mismatches" type="integer" value="2" label="Read Mismatches" help="--fusion-read-mismatches; Reads support fusions if they map across fusion with at most this many mismatches. The default is 2."/>
221 <param name="multireads" type="integer" value="2" label="Multireads" help="Reads that map to more than this many places will be ignored. It may be possible that a fusion is supported by reads (or pairs) that map to multiple places."/> 224 <param name="multireads" type="integer" value="2" label="Multireads" help="--fusion-multireads; Reads that map to more than this many places will be ignored. It may be possible that a fusion is supported by reads (or pairs) that map to multiple places. The default is 2."/>
222 <param name="multipairs" type="integer" value="2" label="Multipairs" help="Pairs that map to more than this many places will be ignored."/> 225 <param name="multipairs" type="integer" value="2" label="Multipairs" help="--fusion-multipairs; Pairs that map to more than this many places will be ignored. The default is 2."/>
223 <param name="ignore_chromosomes" type="text" value='' label="Ignore some chromosomes such as chrM when detecting fusion break points"/> 226 <param name="ignore_chromosomes" type="text" value='' label="--fusion-ignore-chromosomes; Ignore some chromosomes such as chrM when detecting fusion break points"/>
224 </when> 227 </when>
225 </conditional> 228 </conditional>
226 229
227 <!-- Bowtie2 settings. --> 230 <!-- Bowtie2 settings. -->
228 <conditional name="bowtie2_settings"> 231 <conditional name="bowtie2_settings">
287 <expand macro="dbKeyActions" /> 290 <expand macro="dbKeyActions" />
288 </data> 291 </data>
289 <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"> 292 <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam">
290 <expand macro="dbKeyActions" /> 293 <expand macro="dbKeyActions" />
291 </data> 294 </data>
295 <data format="bam" name="unmapped" label="${tool.name} on ${on_string}: unmapped" from_work_dir="tophat_out/unmapped.bam">
296 <filter>(params['settingsType'] == 'full' and params['output_unmapped'])</filter>
297 <expand macro="dbKeyActions" />
298 </data>
299
292 </outputs> 300 </outputs>
293 301
294 <macros> 302 <macros>
295 <import>tophat_macros.xml</import> 303 <import>tophat_macros.xml</import>
304 <xml name="paired_parameters">
305 <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" help="-r/--mate-inner-dist; This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 50bp."/>
306 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="--mate-std-dev; The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp."/>
307 <!-- Discordant pairs. -->
308 <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?" help="--no-discordant">
309 <option value="No">No</option>
310 <option selected="True" value="Yes">Yes</option>
311 </param>
312 </xml>
296 <macro name="dbKeyActions"> 313 <macro name="dbKeyActions">
297 <actions> 314 <actions>
298 <conditional name="refGenomeSource.genomeSource"> 315 <conditional name="refGenomeSource.genomeSource">
299 <when value="indexed"> 316 <when value="indexed">
300 <action type="metadata" name="dbkey"> 317 <action type="metadata" name="dbkey">
324 <param name="sPaired" value="single" /> 341 <param name="sPaired" value="single" />
325 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> 342 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
326 <param name="genomeSource" value="indexed" /> 343 <param name="genomeSource" value="indexed" />
327 <param name="index" value="tophat_test" /> 344 <param name="index" value="tophat_test" />
328 <param name="settingsType" value="preSet" /> 345 <param name="settingsType" value="preSet" />
329 <param name="specReadGroup" value="No" /> 346 <param name="specReadGroup" value="no" />
330 <output name="junctions" file="tophat2_out1j.bed" /> 347 <output name="junctions" file="tophat2_out1j.bed" />
331 <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" /> 348 <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" />
332 </test> 349 </test>
333 <!-- Test using base-space test data: paired-end reads, index from history. --> 350 <!-- Test using base-space test data: paired-end reads, index from history. -->
334 <test> 351 <test>
342 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" /> 359 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" />
343 <param name="genomeSource" value="history" /> 360 <param name="genomeSource" value="history" />
344 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" /> 361 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
345 <param name="mate_inner_distance" value="20" /> 362 <param name="mate_inner_distance" value="20" />
346 <param name="settingsType" value="preSet" /> 363 <param name="settingsType" value="preSet" />
347 <param name="specReadGroup" value="No" /> 364 <param name="specReadGroup" value="no" />
365 <output name="junctions" file="tophat2_out2j.bed" />
366 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
367 </test>
368 <test>
369 <!-- Same test as above but with a collection. -->
370 <param name="sPaired" value="paired_collection" />
371 <param name="input">
372 <collection type="paired">
373 <element name="forward" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
374 <element name="reverse" value="tophat_in3.fastqsanger" ftype="fastqsanger" />
375 </collection>
376 </param>
377 <param name="genomeSource" value="history" />
378 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
379 <param name="mate_inner_distance" value="20" />
380 <param name="settingsType" value="preSet" />
381 <param name="specReadGroup" value="no" />
348 <output name="junctions" file="tophat2_out2j.bed" /> 382 <output name="junctions" file="tophat2_out2j.bed" />
349 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> 383 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
350 </test> 384 </test>
351 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters --> 385 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
352 <test> 386 <test>
354 bowtie2-build -f test-data/tophat_in1.fasta tophat_in1 388 bowtie2-build -f test-data/tophat_in1.fasta tophat_in1
355 tophat2 -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger 389 tophat2 -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
356 Replace the + with double-dash 390 Replace the + with double-dash
357 Rename the files in tmp_dir appropriately 391 Rename the files in tmp_dir appropriately
358 --> 392 -->
359 <param name="sPaired" value="single"/> 393 <conditional name="singlePaired">
360 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> 394 <param name="sPaired" value="single"/>
395 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
396 </conditional>
361 <param name="genomeSource" value="history"/> 397 <param name="genomeSource" value="history"/>
362 <param name="ownFile" value="tophat_in1.fasta"/> 398 <param name="ownFile" value="tophat_in1.fasta"/>
363 <param name="settingsType" value="full"/> 399 <conditional name="params">
364 <param name="library_type" value="FR Unstranded"/> 400 <param name="settingsType" value="full"/>
365 <param name="read_mismatches" value="2"/> 401 <param name="library_type" value="FR Unstranded"/>
366 <param name="bowtie_n" value="No"/> 402 <param name="read_mismatches" value="2"/>
367 <param name="anchor_length" value="8"/> 403 <param name="bowtie_n" value="No"/>
368 <param name="splice_mismatches" value="0"/> 404 <param name="anchor_length" value="8"/>
369 <param name="min_intron_length" value="70"/> 405 <param name="splice_mismatches" value="0"/>
370 <param name="max_intron_length" value="500000"/> 406 <param name="min_intron_length" value="70"/>
371 <param name="max_multihits" value="40"/> 407 <param name="max_intron_length" value="500000"/>
372 <param name="min_segment_intron" value="50" /> 408 <param name="max_multihits" value="40"/>
373 <param name="max_segment_intron" value="500000" /> 409 <param name="min_segment_intron" value="50" />
374 <param name="seg_mismatches" value="2"/> 410 <param name="max_segment_intron" value="500000" />
375 <param name="seg_length" value="25"/> 411 <param name="seg_mismatches" value="2"/>
376 <param name="allow_indel_search" value="Yes"/> 412 <param name="seg_length" value="25"/>
377 <param name="max_insertion_length" value="3"/> 413 <conditional name="indel_search">
378 <param name="max_deletion_length" value="3"/> 414 <param name="allow_indel_search" value="Yes"/>
379 <param name="use_junctions" value="Yes" /> 415 <param name="max_insertion_length" value="3"/>
380 <param name="use_annotations" value="No" /> 416 <param name="max_deletion_length" value="3"/>
381 <param name="use_juncs" value="No" /> 417 </conditional>
382 <param name="no_novel_juncs" value="No" /> 418 <conditional name="own_junctions">
383 <param name="use_search" value="Yes" /> 419 <param name="use_junctions" value="Yes" />
384 <param name="min_coverage_intron" value="50" /> 420 <conditional name="gene_model_ann">
385 <param name="max_coverage_intron" value="20000" /> 421 <param name="use_annotations" value="No" />
386 <param name="microexon_search" value="Yes" /> 422 </conditional>
387 <param name="b2_settings" value="No" /> 423 <conditional name="raw_juncs">
388 <!-- Fusion search params --> 424 <param name="use_juncs" value="No" />
389 <param name="do_search" value="Yes" /> 425 </conditional>
390 <param name="anchor_len" value="21" /> 426 <conditional name="no_novel_juncs">
391 <param name="min_dist" value="10000021" /> 427 <param name="no_novel_juncs" value="No" />
392 <param name="read_mismatches" value="3" /> 428 </conditional>
393 <param name="multireads" value="4" /> 429 </conditional>
394 <param name="multipairs" value="5" /> 430 <conditional name="coverage_search">
395 <param name="ignore_chromosomes" value="chrM"/> 431 <param name="use_search" value="Yes" />
396 <param name="specReadGroup" value="No" /> 432 <param name="min_coverage_intron" value="50" />
433 <param name="max_coverage_intron" value="20000" />
434 </conditional>
435 <param name="microexon_search" value="Yes" />
436 <conditional name="bowtie2_settings">
437 <param name="b2_settings" value="No" />
438 </conditional>
439 <!-- Fusion search params -->
440 <conditional name="fusion_search">
441 <param name="do_search" value="Yes" />
442 <param name="anchor_len" value="21" />
443 <param name="min_dist" value="10000021" />
444 <param name="read_mismatches" value="3" />
445 <param name="multireads" value="4" />
446 <param name="multipairs" value="5" />
447 <param name="ignore_chromosomes" value="chrM"/>
448 </conditional>
449 </conditional>
450 <conditional name="readGroup">
451 <param name="specReadGroup" value="no" />
452 </conditional>
397 <output name="insertions" file="tophat_out3i.bed" /> 453 <output name="insertions" file="tophat_out3i.bed" />
398 <output name="deletions" file="tophat_out3d.bed" /> 454 <output name="deletions" file="tophat_out3d.bed" />
399 <output name="junctions" file="tophat2_out3j.bed" /> 455 <output name="junctions" file="tophat2_out3j.bed" />
400 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" /> 456 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
401 </test> 457 </test>
404 <!-- TopHat commands: 460 <!-- TopHat commands:
405 tophat2 -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search +report_discordant_pairs tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger 461 tophat2 -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search +report_discordant_pairs tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
406 Replace the + with double-dash 462 Replace the + with double-dash
407 Rename the files in tmp_dir appropriately 463 Rename the files in tmp_dir appropriately
408 --> 464 -->
409 <param name="sPaired" value="paired"/> 465 <conditional name="singlePaired">
410 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> 466 <param name="sPaired" value="paired"/>
411 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> 467 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
468 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
469 <param name="mate_inner_distance" value="20"/>
470 <param name="report_discordant_pairs" value="Yes" />
471 </conditional>
412 <param name="genomeSource" value="indexed"/> 472 <param name="genomeSource" value="indexed"/>
413 <param name="index" value="tophat_test"/> 473 <param name="index" value="tophat_test"/>
414 <param name="mate_inner_distance" value="20"/> 474 <conditional name="params">
415 <param name="settingsType" value="full"/> 475 <param name="settingsType" value="full"/>
416 <param name="library_type" value="FR Unstranded"/> 476 <param name="library_type" value="FR Unstranded"/>
417 <param name="read_mismatches" value="5"/> 477 <param name="read_mismatches" value="5"/>
418 <param name="bowtie_n" value="Yes"/> 478 <!-- Error: the read mismatches (5) and the read gap length (2) should be less than or equal to the read edit dist (2) -->
419 <param name="mate_std_dev" value="20"/> 479 <param name="read_edit_dist" value="5" />
420 <param name="anchor_length" value="8"/> 480 <param name="bowtie_n" value="Yes"/>
421 <param name="splice_mismatches" value="0"/> 481 <param name="mate_std_dev" value="20"/>
422 <param name="min_intron_length" value="70"/> 482 <param name="anchor_length" value="8"/>
423 <param name="max_intron_length" value="500000"/> 483 <param name="splice_mismatches" value="0"/>
424 <param name="max_multihits" value="40"/> 484 <param name="min_intron_length" value="70"/>
425 <param name="min_segment_intron" value="50" /> 485 <param name="max_intron_length" value="500000"/>
426 <param name="max_segment_intron" value="500000" /> 486 <param name="max_multihits" value="40"/>
427 <param name="seg_mismatches" value="2"/> 487 <param name="min_segment_intron" value="50" />
428 <param name="seg_length" value="25"/> 488 <param name="max_segment_intron" value="500000" />
429 <param name="allow_indel_search" value="No"/> 489 <param name="seg_mismatches" value="2"/>
430 <param name="use_junctions" value="Yes" /> 490 <param name="seg_length" value="25"/>
431 <param name="use_annotations" value="No" /> 491 <conditional name="indel_search">
432 <param name="use_juncs" value="No" /> 492 <param name="allow_indel_search" value="No"/>
433 <param name="no_novel_juncs" value="No" /> 493 </conditional>
434 <param name="report_discordant_pairs" value="Yes" /> 494 <conditional name="own_junctions">
435 <param name="use_search" value="No" /> 495 <param name="use_junctions" value="Yes" />
436 <param name="microexon_search" value="Yes" /> 496 <conditional name="gene_model_ann">
437 <param name="b2_settings" value="No" /> 497 <param name="use_annotations" value="No" />
438 <!-- Fusion search params --> 498 </conditional>
439 <param name="do_search" value="Yes" /> 499 <conditional name="raw_juncs">
440 <param name="anchor_len" value="21" /> 500 <param name="use_juncs" value="No" />
441 <param name="min_dist" value="10000021" /> 501 </conditional>
442 <param name="read_mismatches" value="3" /> 502 <conditional name="no_novel_juncs">
443 <param name="multireads" value="4" /> 503 <param name="no_novel_juncs" value="No" />
444 <param name="multipairs" value="5" /> 504 </conditional>
445 <param name="ignore_chromosomes" value="chrM"/> 505 </conditional>
446 <param name="specReadGroup" value="No" /> 506 <conditional name="coverage_search">
507 <param name="use_search" value="No" />
508 </conditional>
509 <param name="microexon_search" value="Yes" />
510 <conditional name="bowtie2_settings">
511 <param name="b2_settings" value="No" />
512 </conditional>
513 <!-- Fusion search params -->
514 <conditional name="fusion_search">
515 <param name="do_search" value="Yes" />
516 <param name="anchor_len" value="21" />
517 <param name="min_dist" value="10000021" />
518 <param name="read_mismatches" value="3" />
519 <param name="multireads" value="4" />
520 <param name="multipairs" value="5" />
521 <param name="ignore_chromosomes" value="chrM"/>
522 </conditional>
523 </conditional>
524 <conditional name="readGroup">
525 <param name="specReadGroup" value="no" />
526 </conditional>
447 <output name="junctions" file="tophat2_out4j.bed" /> 527 <output name="junctions" file="tophat2_out4j.bed" />
448 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> 528 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
449 </test> 529 </test>
450 </tests> 530 </tests>
451
452 <help> 531 <help>
453 **Tophat Overview** 532 **Tophat Overview**
454 533
455 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment 534 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment
456 of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013. 535 of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013.
457 536
458 .. _Tophat: http://tophat.cbcb.umd.edu/ 537 .. _Tophat: http://ccb.jhu.edu/software/tophat/
459 538
460 ------ 539 ------
461 540
462 **Know what you are doing** 541 **Know what you are doing**
463 542
464 .. class:: warningmark 543 .. class:: warningmark
465 544
466 There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. 545 There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
467 546
468 .. __: http://tophat.cbcb.umd.edu/manual.html 547 .. __: http://ccb.jhu.edu/software/tophat/manual.shtml
469 548
470 ------ 549 ------
471 550
472 **Input formats** 551 **Input formats**
473 552
522 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. 601 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50.
523 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. 602 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000.
524 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. 603 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50.
525 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. 604 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000.
526 </help> 605 </help>
606 <citations>
607 <citation type="doi">10.1186/gb-2013-14-4-r36</citation>
608 </citations>
527 </tool> 609 </tool>