comparison pilon.xml @ 0:b362b0f74c78 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pilon commit e01d50f8aa0f9a06dcf162ff6ac7d447c69a2324
author iuc
date Sat, 13 Aug 2016 04:33:19 -0400
parents
children 57cfa19f44ba
comparison
equal deleted inserted replaced
-1:000000000000 0:b362b0f74c78
1 <tool id="pilon" name="pilon" version="0.1">
2 <description>An automated genome assembly improvement and variant detection tool</description>
3 <requirements>
4 <requirement type="package" version="1.18">pilon</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7 #if $auto_selection.auto_enabled == "yes"
8 #for $bamfile in $auto_selection.bam
9 ln -f -s "$bamfile" "\$(basename $bamfile)" &&
10 ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
11 #end for
12 #end if
13 #if $options.selection_mode == "advanced"
14 #if $options.frags_selection.frags_enabled == "yes"
15 #for $bamfile in $options.frags_selection.frags
16 ln -f -s "$bamfile" "\$(basename $bamfile)" &&
17 ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
18 #end for
19 #end if
20 #if $options.jumps_selection.jumps_enabled == "yes"
21 #for $bamfile in $options.jumps_selection.jumps
22 ln -f -s "$bamfile" "\$(basename $bamfile)" &&
23 ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
24 #end for
25 #end if
26 #if $options.unpaired_selection.unpaired_enabled == "yes"
27 #for $bamfile in $options.unpaired_selection.unpaired
28 ln -f -s "$bamfile" "\$(basename $bamfile)" &&
29 ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
30 #end for
31 #end if
32 #end if
33 ln -s -f
34 #if $reference_genome.reference_genome_source == "history"
35 "$reference_genome.history_item"
36 #else
37 "$reference_genome.builtin.fields.path"
38 #end if
39 reference.fasta &&
40 pilon
41 --genome reference.fasta
42 $variant
43 $changes
44 #if $auto_selection.auto_enabled == "yes"
45 #for $bamfile in $auto_selection.bam
46 --bam "\$(basename $bamfile)"
47 #end for
48 #end if
49 #if $options.selection_mode == "advanced"
50 #if $options.frags_selection.frags_enabled == "yes"
51 #for $bamfile in $options.frags_selection.frags
52 --frags "\$(basename $bamfile)"
53 #end for
54 #end if
55 #if $options.jumps_selection.jumps_enabled == "yes"
56 #for $bamfile in $options.jumps_selection.jumps
57 --jumps "\$(basename $bamfile)"
58 #end for
59 #end if
60 #if $options.unpaired_selection.unpaired_enabled == "yes"
61 #for $bamfile in $options.unpaired_selection.unpaired
62 --unpaired "\$(basename $bamfile)"
63 #end for
64 #end if
65 $options.vcfqe
66 $options.vcf
67 $options.tracks
68 --chunksize $options.chunk_size
69 $options.diploid
70 $options.duplicates
71 $options.iupac
72 $options.nonpf
73 #if len($options.targetlist.strip()) > 0
74 --targetlist $options.targetlist
75 #end if
76 --fix $options.fixes
77 $options.verbose
78 --defaultqual $options.defaultqual
79 --flank $options.flank
80 --gapmargin $options.gapmargin
81 --K $options.kmersize
82 --mindepth $options.mindepth
83 --mingap $options.mingap
84 --minmq $options.minmq
85 --minqual $options.minqual
86 $options.nostrays
87 #end if
88 --threads \${GALAXY_SLOTS:-1}
89 --output pilon
90 ]]></command>
91 <inputs>
92 <conditional name="reference_genome">
93 <param label="Source for reference genome used for BAM alignments" name="reference_genome_source" type="select">
94 <option selected="True" value="history">Use a genome from history</option>
95 <option value="builtin">Use a built-in genome"</option>
96 </param>
97 <when value="history">
98 <param format="fasta" type="data" metadata_name="dbkey" name="history_item" />
99 </when>
100 <when value="builtin">
101 <param label="Select a reference genome" name="builtin" type="select">
102 <options from_data_table="all_fasta">
103 <filter column="2" type="sort_by" />
104 <validator message="No genomes are available for the selected input dataset" type="no_options" />
105 </options>
106 </param>
107 </when>
108 </conditional>
109 <conditional name="auto_selection">
110 <param name="auto_enabled" label="Type automatically determined by pilon"
111 type="boolean" checked="true" truevalue="yes" falsevalue="no" />
112 <when value="yes">
113 <param argument="bam" label="Input BAM file" multiple="true" type="data" format="bam"/>
114 </when>
115 <when value="no"></when>
116 </conditional>
117 <param argument="variant" type="boolean" label="Variant calling mode" checked="true" truevalue="--variant" falsevalue=""
118 help="Sets up heuristics for variant calling, as opposed to assembly improvement; equivalent to '--vcf --fix all,breaks'."/>
119 <param argument="changes" type="boolean" label="Create changes file" truevalue="--changes" falsevalue=""
120 help="If specified, a file listing changes in the &lt;output&gt;.fasta will be generated."/>
121 <conditional name="options">
122 <param label="Use advanced options" name="selection_mode" type="select">
123 <option selected="True" value="default">Use default options</option>
124 <option value="advanced">Use advanced options</option>
125 </param>
126 <when value="default"> </when>
127 <when value="advanced">
128 <conditional name="frags_selection">
129 <param name="frags_enabled" label="Paired end fragments" type="boolean" truevalue="yes" falsevalue="no" />
130 <when value="yes">
131 <param argument="frags" label="Input BAM file (paired end fragments)" multiple="true" type="data" format="bam"
132 help="BAM file consisting of fragment paired-end alignments." />
133 </when>
134 <when value="no"></when>
135 </conditional>
136 <conditional name="jumps_selection">
137 <param name="jumps_enabled" label="Mate pairs" type="boolean" truevalue="yes" falsevalue="no" />
138 <when value="yes">
139 <param argument="jumps" label="Input BAM file (mate pairs)" multiple="true" type="data" format="bam"
140 help="BAM file consisting of jump (mate pair) paired-end alignments." />
141 </when>
142 <when value="no"></when>
143 </conditional>
144 <conditional name="unpaired_selection">
145 <param name="unpaired_enabled" label="Unpaired reads" type="boolean" truevalue="yes" falsevalue="no" />
146 <when value="yes">
147 <param argument="unpaired" label="Input BAM file (unpaired)" multiple="true" type="data" format="bam"
148 help="BAM file consisting of unpaired alignments." />
149 </when>
150 <when value="no"></when>
151 </conditional>
152 <param argument="vcf" type="boolean" checked="false" label="VCF output"
153 truevalue="--vcf" falsevalue="" help="If specified, a vcf file will be generated (even if 'Variant calling mode' is off)"/>
154 <param argument="tracks" type="boolean" checked="false" label="Output annotation tracks"
155 help="Write many track files (*.bed, *.wig) suitable for viewing in a genome browser."
156 truevalue="--tracks" falsevalue="" />
157 <param argument="chunk_size" type="integer" min="1" value="10000000" label="Chunk size"
158 help="Input FASTA elements larger than this will be processed in smaller pieces not to
159 exceed this size." />
160 <param argument="vcfqe" type="boolean" checked="false" label="QE (not QP) in VCF" help="If specified the VCF will contain a QE (quality-weighted evidence) field rather
161 than the default QP (quality-weighted percentage of evidence) field." truevalue="--vcfqe" falsevalue="" />
162 <param argument="fixes" label="Issues that pilon should try and fix" type="select" multiple="true">
163 <option value="all" selected="true">All non-experimental fixes</option>
164 <option value="bases">Individual bases and small indels</option>
165 <option value="gaps">Fill gaps</option>
166 <option value="local">Detect and fix local misassemblies</option>
167 <option value="none">Do none of these fixes (no FASTA will be written)</option>
168 <option value="amb">Fix ambigious bases in FASTA output (experimental)</option>
169 <option value="breaks">Allow local reassembly to open new gaps (experimental, requires local assembly fixing to be selected)</option>
170 <option value="novel">Assemble novel sequence from unaligned non-jump reads (experimental)</option>
171 </param>
172 <param argument="diploid" label="Organism is diploid" type="boolean" checked="false"
173 help="Sample is from diploid organism; will eventually affect calling of heterozygous SNPs"
174 truevalue="--diploid" falsevalue="" />
175 <param argument="duplicates" label="Use duplicates" type="boolean" checked="false"
176 help="Use reads marked as duplicates in the input BAMs"
177 truevalue="--duplicates" falsevalue="" />
178 <param argument="iupac" label="Use IUPAC codes in FASTA output" type="boolean" checked="false"
179 help="Output IUPAC ambiguous base codes in the output FASTA file when appropriate"
180 truevalue="--iupac" falsevalue="" />
181 <param argument="nonpf" label="Use low quality reads" type="boolean" checked="false"
182 help="Use reads which failed sequencer quality filtering"
183 truevalue="--nonpf" falsevalue="" />
184 <param argument="targetlist" label="List of targets to process (leave blank for all)" default="" type="text" length="40"
185 help="Only process the specified target(s). Targets are comma-separated, and each target is a fasta element name optionally followed by a base range." />
186 <param argument="verbose" label="Verbose output (in tool log)" type="boolean" check="false"
187 truevalue="--verbose" falsevalue="" />
188 <param argument="defaultqual" label="Default base quality" type="integer" min="1" value="15"
189 help="Assumes bases are of this quality if quals are no present in input BAMs" />
190 <param argument="flank" label="Flanking bases to ignore" type="integer" min="1" value="10"
191 help="This many bases at each end of the good reads will be ignored." />
192 <param argument="gapmargin" label="Allowable gap margin" type="integer" min="1" value="100000"
193 help="Closed gaps must be within this number of bases of true size to be closed" />
194 <param argument="kmersize" label="Kmer size" type="integer" min="1" value="47"
195 help="Kmer size used by internal assembler" />
196 <param argument="mindepth" label="Minimum depth" type="float" value="0.1"
197 help="Minimum depth of coverage required for variants to be called. See complete documentation below." />
198 <param argument="mingap" label="Mininum gap size" type="integer" value="10"
199 help="Minimum size for unclosed gaps" />
200 <param argument="minmq" label="Minimum mapping quality" type="integer" value="0"
201 help="Minimum alignment mapping quality for a read to count in pileups" />
202 <param argument="minqual" label="Minimum base quality" type="integer" value="0"
203 help="Minimum base quality to consider for pileups" />
204 <param argument="nostrays" label="Disable 'stray read filtering'" type="boolean" checked="false"
205 help="See documentation below"
206 truevalue="--nostrays" falsevalue="" />
207 </when>
208 </conditional>
209 </inputs>
210 <outputs>
211 <data format="vcf" from_work_dir="pilon.vcf" label="VCF from ${tool.name} on ${on_string}" name="output_vcf">
212 <filter>variant or (options['selection_mode'] == 'advanced' and options['vcf'])</filter>
213 </data>
214 <data format="txt" from_work_dir="pilon.changes" label="Changes in FASTA from ${tool.name} on ${on_string}" name="output_changes">
215 <filter>changes</filter>
216 </data>
217 <data format="fasta" from_work_dir="pilon.fasta" label="FASTA from ${tool.name} on ${on_string}" name="output_fasta">
218 <filter>options['selection_mode'] == 'default' or (options['selection_mode'] == 'advanced' and 'none' not in options['fixes'])</filter>
219 </data>
220 <collection name="tracks" type="list" label="Annotation tracks from ${tool.name} on ${on_string}">
221 <filter>options['selection_mode'] == 'advanced' and options['tracks']</filter>
222 <data format="bed" from_work_dir="pilonPilon.bed" label="Features from ${tool.name} on ${on_string} (BED format)" name="output_pilon_bed" />
223 <data format="wig" from_work_dir="pilonChanges.wig" label="${tool.name} changes track on ${on_string} (WIG format)" name="output_changes_wig" />
224 <data format="wig" from_work_dir="pilonUnconfirmed.wig" label="${tool.name} unconfirmed track on ${on_string}" name="output_unconfirmed_wig" />
225 <data format="wig" from_work_dir="pilonCopyNumber.wig" label="${tool.name} copy number track on ${on_string}" name="output_copynumber_wig" />
226 <data format="wig" from_work_dir="pilonCoverage.wig" label="${tool.name} coverage track on ${on_string}" name="output_coverage_wig" />
227 <data format="wig" from_work_dir="pilonBadCoverage.wig" label="${tool.name} bad coverage track on ${on_string}" name="output_badcoverage_wig" />
228 <data format="wig" from_work_dir="pilonPctBad.wig" label="${tool.name} pct bad track on ${on_string}" name="output_pctbad_wig" />
229 <data format="wig" from_work_dir="pilonDeltaCoverage.wig" label="${tool.name} delta coverage track on ${on_string}" name="output_deltacoverage_wig" />
230 <data format="wig" from_work_dir="pilonDipCoverage.wig" label="${tool.name} dip coverage track on ${on_string}" name="output_dipcoverage_wig" />
231 <data format="wig" from_work_dir="pilonPhysicalCoverage.wig" label="${tool.name} physical coverage track on ${on_string}" name="output_physicalcoverage_wig" />
232 <data format="wig" from_work_dir="pilonClippedAlignments.wig" label="${tool.name} clipped alignments track on ${on_string}" name="output_clippedalignments_wig" />
233 <data format="wig" from_work_dir="pilonWeightedQual.wig" label="${tool.name} weighted quality track on ${on_string}" name="output_weightedqual_wig" />
234 <data format="wig" from_work_dir="pilonWeightedMq.wig" label="${tool.name} weighted MQ track on ${on_string}" name="output_weightedmq_wig" />
235 <data format="wig" from_work_dir="pilonGC.wig" label="${tool.name} GC track on ${on_string}" name="output_gc_wig" />
236 </collection>
237 </outputs>
238 <tests>
239 <test>
240 <param name="reference_genome_source" value="history" />
241 <param ftype="fasta" name="history_item" value="test1.fasta" />
242 <param name="bam" value="test1.bam" />
243 <param name="variant" value="true" />
244 <output file="test1-vcf-part" ftype="vcf" name="output_vcf" compare="contains" />
245 <output md5="352907b0d965bc926289b1b2eb9cbecb" ftype="fasta" name="output_fasta" />
246 </test>
247 <test>
248 <param name="reference_genome_source" value="history" />
249 <param ftype="fasta" name="history_item" value="test1.fasta" />
250 <param name="bam" value="test1.bam" />
251 <param name="variant" value="true" />
252 <param name="selection_mode" value="advanced" />
253 <param name="tracks" value="true" />
254 <output file="test1-vcf-part" ftype="vcf" name="output_vcf" compare="contains" />
255 <output md5="352907b0d965bc926289b1b2eb9cbecb" ftype="fasta" name="output_fasta" />
256 <output_collection name="tracks">
257 <element name="output_pilon_bed" md5="ae0518a6d641efecdcf41c808c014226" />
258 <element name="output_changes_wig" md5="26e0a9a3793e6604673e9911c4d334ab" />
259 <element name="output_unconfirmed_wig" md5="f8bb3def4547e854bd90e1a6b4f8cc66" />
260 <element name="output_copynumber_wig" md5="c1e94296fbc24d00fa7d057f30e4de8f" />
261 <element name="output_coverage_wig" md5="48f7da4bad60fcbb49aaaa992aef8a02" />
262 <element name="output_badcoverage_wig" md5="31e3ad4a5d0f8cd1e296e4b1424d6a95" />
263 <element name="output_pctbad_wig" md5="8353134113c87dbc7d9497f983d58b4b" />
264 <element name="output_deltacoverage_wig" md5="e0fb6474b851e1890f91c57c2ba1fd76" />
265 <element name="output_dipcoverage_wig" md5="8885c63df1dc7309a7bab371b9eb449b" />
266 <element name="output_physicalcoverage_wig" md5="db485f84b51499c3a3c72da70a8ef7af" />
267 <element name="output_clippedalignments_wig" md5="b6eab0827a9b6f2e925d9ece5ee4f87f" />
268 <element name="output_weightedqual_wig" md5="9987289bd0ec8cd05bd6d330bdd1d01d" />
269 <element name="output_weightedmq_wig" md5="7ec1b6420f52fc44bf3c04aa593fcdeb" />
270 <element name="output_gc_wig" md5="815af1378d016b85ed6f52667dde10c1" />
271 </output_collection>
272 </test>
273
274 </tests>
275 <help><![CDATA[
276 Pilon is a software tool which can be used to:
277
278 * Automatically improve draft assemblies
279
280 * Find variation among strains, including large event detection
281
282 Pilon requires as input a FASTA file of the genome along with one or more BAM files of reads aligned to the input FASTA file. Pilon uses read alignment analysis to identify inconsistencies between the input genome and the evidence in the reads. It then attempts to make improvements to the input genome, including:
283
284 * Single base differences
285
286 * Small indels
287
288 * Larger indel or block substitution events
289
290 * Gap filling
291
292 * Identification of local misassemblies, including optional opening of new gaps
293
294 Pilon then outputs a FASTA file containing an improved representation of the genome from the read data and an optional VCF file detailing variation seen between the read data and the input genome.
295
296 To aid manual inspection and improvement by an analyst, Pilon can optionally produce tracks that can be displayed in genome viewers such as IGV and GenomeView, and it reports other events (such as possible large collapsed repeat regions) in its standard output.
297
298 Note on **mindepth**:
299
300 Variants (snps and indels) will only be called if there is coverage of good pairs
301 at the value set for *mindepth* depth or more; if this value is >= 1, it is an absolute depth, if it is a
302 fraction < 1, then minimum depth is computed by multiplying this value by the mean
303 coverage for the region, with a minumum value of 5 (default 0.1: min depth to call
304 is 10% of mean coverage or 5, whichever is greater).
305
306 Note on **stray read filtering**
307
308 By default a pass is made through the input BAM files to identify stray pairs, that is,
309 those pairs in which both reads are aligned but not marked valid because they have
310 inconsistent orientation or separation. Identifying stray pairs can help fill gaps
311 and assemble larger insertions, especially of repeat content. However, doing so
312 sometimes consumes considerable memory.
313 ]]></help>
314 <citations>
315 <citation type="doi">10.1371/journal.pone.0112963</citation>
316 </citations>
317 </tool>