comparison bbnorm.xml @ 1:aa7b97bc2f3f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit cd59ba2c349865259b92302a1d70e103b8a5e3cb
author iuc
date Tue, 27 Aug 2024 10:12:21 +0000
parents ac326f8788a2
children 78e3ecfdf6df
comparison
equal deleted inserted replaced
0:ac326f8788a2 1:aa7b97bc2f3f
2 <description>Normalise sequencing coverage</description> 2 <description>Normalise sequencing coverage</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="edam_ontology"/> 6 <expand macro="edam_ontology"/>
7 <expand macro="bio.tools"/>
7 <expand macro="requirements"/> 8 <expand macro="requirements"/>
8 <stdio> 9 <stdio>
9 <regex match="This table is (very|crazy|totally) full, which may reduce accuracy. Ideal load is under" source="stderr" level="fatal_oom" description="Too low memory - generated results might be inaccurate."/> 10 <regex match="This table is (very|crazy|totally) full, which may reduce accuracy. Ideal load is under" source="stderr" level="fatal_oom" description="Too low memory - generated results might be inaccurate."/>
10 </stdio> 11 </stdio>
11 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
142 <when value="PE_2files"> 143 <when value="PE_2files">
143 <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads"/> 144 <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads"/>
144 <param name="read2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads"/> 145 <param name="read2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads"/>
145 </when> 146 </when>
146 <when value="paired"> 147 <when value="paired">
147 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of forward and reverse reads"/> 148 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" label="Collection of forward and reverse reads" collection_type="paired"/>
148 </when> 149 </when>
149 </conditional> 150 </conditional>
150 <param argument="target" type="integer" value="100" min="1" label="Target normalization depth" help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/> 151 <param argument="target" type="integer" min="1" value="100" label="Target normalization depth" help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/>
151 <section name="norm_params" title="Normalization parameters"> 152 <section name="norm_params" title="Normalization parameters">
152 <param argument="maxdepth" type="integer" value="-1" min="-1" label="Reads will not be downsampled when below this depth, even if they are above the target depth." help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/> 153 <param argument="maxdepth" type="integer" min="-1" value="-1" label="Reads will not be downsampled when below this depth, even if they are above the target depth." help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/>
153 <param argument="mindepth" type="integer" value="5" min="0" label="kmers with depth below this number will not be included when calculating the depth of a read." help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/> 154 <param argument="mindepth" type="integer" min="0" value="5" label="kmers with depth below this number will not be included when calculating the depth of a read." help="All depth parameters control kmer depth, not read depth. For kmer depth Dk, read depth Dr, read length R, and kmer size K: Dr=Dk*(R/(R-K+1))"/>
154 <param argument="minkmers" type="integer" value="15" min="0" label="Reads must have at least this many kmers over min depth to be retained."/> 155 <param argument="minkmers" type="integer" min="0" value="15" label="Reads must have at least this many kmers over min depth to be retained."/>
155 <param argument="percentile" type="integer" value="54" min="1" max="100" label="Percentile to infer read depth" help="Read depth is by default inferred from the 54th percentile of kmer depth, but this may be changed to any number 1-100."/> 156 <param argument="percentile" type="integer" min="1" max="100" value="54" label="Percentile to infer read depth" help="Read depth is by default inferred from the 54th percentile of kmer depth, but this may be changed to any number 1-100."/>
156 <param argument="uselowerdepth" type="boolean" checked="true" label="For pairs, use the depth of the lower read as the depth proxy."/> 157 <param argument="uselowerdepth" type="boolean" checked="true" label="For pairs, use the depth of the lower read as the depth proxy."/>
157 <param argument="deterministic" type="boolean" checked="true" label="Generate random numbers deterministically" help="This would ensure identical output between multiple runs. May decrease speed with a huge number of threads."/> 158 <param argument="deterministic" type="boolean" checked="true" label="Generate random numbers deterministically" help="This would ensure identical output between multiple runs. May decrease speed with a huge number of threads."/>
158 <param argument="fixspikes" type="boolean" checked="false" label="Do a slower, high-precision bloom filter lookup of kmers that appear to have an abnormally high depth due to collisions."/> 159 <param argument="fixspikes" type="boolean" checked="false" label="Do a slower, high-precision bloom filter lookup of kmers that appear to have an abnormally high depth due to collisions."/>
159 <param argument="passes" type="integer" value="2" label="Number of passes to perform" help=" pass is the basic mode. 2 passes allows greater accuracy, error detection, better contol of output depth."/> 160 <param argument="passes" type="integer" value="2" label="Number of passes to perform" help=" pass is the basic mode. 2 passes allows greater accuracy, error detection, better contol of output depth."/>
160 </section> 161 </section>
161 <section name="hashing_params" title="Hashing parameters"> 162 <section name="hashing_params" title="Hashing parameters">
162 <param argument="k" type="integer" value="31" min="1" label="kmer length" help="Values under 32 are most efficient, but arbitrarily high values are supported."/> 163 <param argument="k" type="integer" min="1" value="31" label="kmer length" help="Values under 32 are most efficient, but arbitrarily high values are supported."/>
163 <param argument="bits" type="select" label="Bits per cell in bloom filter" help="Maximum kmer depth recorded is 2^c bits. Large values decrease accuracy for a fixed amount of memory, so use the lowest number you can that will still capture highest-depth kmers."> 164 <param argument="bits" type="select" label="Bits per cell in bloom filter" help="Maximum kmer depth recorded is 2^c bits. Large values decrease accuracy for a fixed amount of memory, so use the lowest number you can that will still capture highest-depth kmers.">
164 <option value="2">2</option> 165 <option value="2">2</option>
165 <option value="4">4</option> 166 <option value="4">4</option>
166 <option value="8">8</option> 167 <option value="8">8</option>
167 <option value="16" selected="true">16</option> 168 <option value="16" selected="true">16</option>
168 <option value="32">32</option> 169 <option value="32">32</option>
169 </param> 170 </param>
170 <param argument="hashes" type="integer" value="3" min="1" label="Number of times each kmer is hashed and stored." help="Higher is slower. Higher is more accurate if there is enough memory, but less accurate if there is not enough memory."/> 171 <param argument="hashes" type="integer" min="1" value="3" label="Number of times each kmer is hashed and stored." help="Higher is slower. Higher is more accurate if there is enough memory, but less accurate if there is not enough memory."/>
171 <conditional name="prefilter"> 172 <conditional name="prefilter">
172 <param argument="prefilter" type="select" label="Use a prefilter to eliminate low-depth kmers" help="True is slower, but generally more accurate; filters out low-depth kmers from the main hashtable. The prefilter is more memory-efficient because it uses 2-bit cells."> 173 <param argument="prefilter" type="select" label="Use a prefilter to eliminate low-depth kmers" help="True is slower, but generally more accurate; filters out low-depth kmers from the main hashtable. The prefilter is more memory-efficient because it uses 2-bit cells.">
173 <option value="true">Yes</option> 174 <option value="true">Yes</option>
174 <option value="false" selected="true">No</option> 175 <option value="false" selected="true">No</option>
175 </param> 176 </param>
176 <when value="false"/> 177 <when value="false"/>
177 <when value="true"> 178 <when value="true">
178 <param argument="prehashes" type="integer" value="2" min="1" label="Number of hashes for the prefilter"/> 179 <param argument="prehashes" type="integer" min="1" value="2" label="Number of hashes for the prefilter"/>
179 <param argument="prefilterbits" type="integer" value="2" min="1" label="Bits per cell in prefilter"/> 180 <param argument="prefilterbits" type="integer" min="1" value="2" label="Bits per cell in prefilter"/>
180 <param argument="prefiltersize" type="float" value="0.35" min="0" max="1" label="Fraction of memory to allocate for the prefilter."/> 181 <param argument="prefiltersize" type="float" min="0" max="1" value="0.35" label="Fraction of memory to allocate for the prefilter."/>
181 </when> 182 </when>
182 </conditional> 183 </conditional>
183 <param argument="buildpasses" type="integer" value="1" min="1" label="Number of passes" help="More passes can sometimes increase accuracy by iteratively removing low-depth kmers"/> 184 <param argument="buildpasses" type="integer" min="1" value="1" label="Number of passes" help="More passes can sometimes increase accuracy by iteratively removing low-depth kmers"/>
184 <param argument="minq" type="integer" value="6" min="0" label="Ignore kmers containing bases with quality below this threshold"/> 185 <param argument="minq" type="integer" min="0" value="6" label="Ignore kmers containing bases with quality below this threshold"/>
185 <param argument="minprob" type="float" value="0.5" min="0" max="1" label="Ignore kmers with overall probability of correctness below this threshold"/> 186 <param argument="minprob" type="float" min="0" max="1" value="0.5" label="Ignore kmers with overall probability of correctness below this threshold"/>
186 <param argument="rdk" type="boolean" checked="true" label="Remove duplicate kmers" help="When true, a kmer's count will only be incremented once per read pair, even if that kmer occurs more than once."/> 187 <param argument="rdk" type="boolean" checked="true" label="Remove duplicate kmers" help="When true, a kmer's count will only be incremented once per read pair, even if that kmer occurs more than once."/>
187 </section> 188 </section>
188 <section name="error_det_params" title="Error detection parameters"> 189 <section name="error_det_params" title="Error detection parameters">
189 <param argument="hdp" type="integer" value="90" min="0" max="100" label="highdepthpercentile" help="Position in sorted kmer depth array used as proxy of a read's high kmer depth."/> 190 <param argument="hdp" type="integer" min="0" max="100" value="90" label="highdepthpercentile" help="Position in sorted kmer depth array used as proxy of a read's high kmer depth."/>
190 <param argument="ldp" type="integer" value="25" min="0" max="100" label="lowdepthpercentile" help="Position in sorted kmer depth array used as proxy of a read's low kmer depth."/> 191 <param argument="ldp" type="integer" min="0" max="100" value="25" label="lowdepthpercentile" help="Position in sorted kmer depth array used as proxy of a read's low kmer depth."/>
191 <param argument="tossbadreads" type="boolean" checked="false" label="Throw away reads detected as containing errors."/> 192 <param argument="tossbadreads" type="boolean" checked="false" label="Throw away reads detected as containing errors."/>
192 <param argument="requirebothbad" type="boolean" checked="false" label="Only toss bad pairs if both reads are bad."/> 193 <param argument="requirebothbad" type="boolean" checked="false" label="Only toss bad pairs if both reads are bad."/>
193 <param argument="errordetectratio" type="integer" value="125" min="0" label="Error detection ratio" help="Reads with a ratio of at least this much between their high and low depth kmers will be classified as error reads."/> 194 <param argument="errordetectratio" type="integer" min="0" value="125" label="Error detection ratio" help="Reads with a ratio of at least this much between their high and low depth kmers will be classified as error reads."/>
194 <param argument="highthresh" type="integer" value="12" min="0" label="Threshold for high kmer" help="A high kmer at this or above are considered non-error."/> 195 <param argument="highthresh" type="integer" min="0" value="12" label="Threshold for high kmer" help="A high kmer at this or above are considered non-error."/>
195 <param argument="lowthresh" type="integer" value="3" min="0" label="Threshold for low kmer" help="Kmers at this and below are always considered errors."/> 196 <param argument="lowthresh" type="integer" min="0" value="3" label="Threshold for low kmer" help="Kmers at this and below are always considered errors."/>
196 </section> 197 </section>
197
198 <section name="error_corr_params" title="Error correction parameters"> 198 <section name="error_corr_params" title="Error correction parameters">
199 <conditional name="ecc"> 199 <conditional name="ecc">
200 <param argument="ecc" type="select" label="What should be done with detected errors?" help="Tadpole is now preferred for error correction, as it does a better job."> 200 <param argument="ecc" type="select" label="What should be done with detected errors?" help="Tadpole is now preferred for error correction, as it does a better job.">
201 <option value="true" >Correct errors when possible</option> 201 <option value="true">Correct errors when possible</option>
202 <option value="false" selected="true">Do not attempt to correct errors</option> 202 <option value="false" selected="true">Do not attempt to correct errors</option>
203 </param> 203 </param>
204 <when value="false"/> 204 <when value="false"/>
205 <when value="true"> 205 <when value="true">
206 <param argument="ecclimit" type="integer" value="3" min="1" label="Correct up to this many errors per read." help="If more are detected, the read will remain unchanged."/> 206 <param argument="ecclimit" type="integer" min="1" value="3" label="Correct up to this many errors per read." help="If more are detected, the read will remain unchanged."/>
207 <param argument="errorcorrectratio" type="integer" value="140" min="0" label="Depth ratio" help="Adjacent kmers with a depth ratio of at least this much between will be classified as an error."/> 207 <param argument="errorcorrectratio" type="integer" min="0" value="140" label="Depth ratio" help="Adjacent kmers with a depth ratio of at least this much between will be classified as an error."/>
208 <param argument="echighthresh" type="integer" value="22" min="0" label="Threshold for high kmer" help="A kmer at this or above may be considered non-error."/> 208 <param argument="echighthresh" type="integer" min="0" value="22" label="Threshold for high kmer" help="A kmer at this or above may be considered non-error."/>
209 <param argument="eclowthresh" type="integer" value="2" min="0" label="Threshold for low kmer." help="kmers at this depth or below will be considered as errors."/> 209 <param argument="eclowthresh" type="integer" min="0" value="2" label="Threshold for low kmer." help="kmers at this depth or below will be considered as errors."/>
210 <param argument="eccmaxqual" type="integer" value="127" min="0" label="Do not correct bases with quality above this value."/> 210 <param argument="eccmaxqual" type="integer" min="0" value="127" label="Do not correct bases with quality above this value."/>
211 <param argument="meo" type="boolean" checked="false" label="Marks errors by reducing quality value of suspected errors; does not correct anything."/> 211 <param argument="meo" type="boolean" checked="false" label="Marks errors by reducing quality value of suspected errors; does not correct anything."/>
212 <param argument="mue" type="boolean" checked="true" label="Mark errors only on uncorrectable reads."/> 212 <param argument="mue" type="boolean" checked="true" label="Mark errors only on uncorrectable reads."/>
213 <param argument="overlap" type="boolean" checked="false" label="Correct errors by read overlap."/> 213 <param argument="overlap" type="boolean" checked="false" label="Correct errors by read overlap."/>
214 </when> 214 </when>
215 </conditional> 215 </conditional>
216 </section> 216 </section>
217
218 <section name="output_options" title="Output options"> 217 <section name="output_options" title="Output options">
219 <param name="save_discarded_reads" type="boolean" checked="false" label="Save the reads that were eliminated from the input datasets to the history"/> 218 <param name="save_discarded_reads" type="boolean" checked="false" label="Save the reads that were eliminated from the input datasets to the history"/>
220 <param name="save_kmer_hists" type="boolean" checked="false" label="Save the kmer histograms (in tabular format) for the input and output datasets to the history"/> 219 <param name="save_kmer_hists" type="boolean" checked="false" label="Save the kmer histograms (in tabular format) for the input and output datasets to the history"/>
221 </section> 220 </section>
222 </inputs> 221 </inputs>
244 </outputs> 243 </outputs>
245 <tests> 244 <tests>
246 <!-- Single end sequencing --> 245 <!-- Single end sequencing -->
247 <test expect_num_outputs="1"> 246 <test expect_num_outputs="1">
248 <param name="input_type" value="single_end"/> 247 <param name="input_type" value="single_end"/>
249 <param name="read1" ftype="fastqsanger" value="bbnorm/input_R1.fastq"/> 248 <param name="read1" value="bbnorm/input_R1.fastq" ftype="fastqsanger"/>
250 <param name="target" value="4"/> 249 <param name="target" value="4"/>
251 <section name="norm_params"> 250 <section name="norm_params">
252 <param name="deterministic" value="true"/> 251 <param name="deterministic" value="true"/>
253 <param name="mindepth" value="0"/> 252 <param name="mindepth" value="0"/>
254 </section> 253 </section>
255 <output name="output_normalised_R1" ftype="fastqsanger" value="bbnorm/normalised_R1.fastq"/> 254 <output name="output_normalised_R1" ftype="fastqsanger" value="bbnorm/normalised_R1.fastq"/>
256 </test> 255 </test>
257 <!-- Single end sequencing, compressed --> 256 <!-- Single end sequencing, compressed -->
258 <test expect_num_outputs="1"> 257 <test expect_num_outputs="1">
259 <param name="input_type" value="single_end"/> 258 <param name="input_type" value="single_end"/>
260 <param name="read1" ftype="fastqsanger.gz" value="bbnorm/input_R1.fastq.gz"/> 259 <param name="read1" value="bbnorm/input_R1.fastq.gz" ftype="fastqsanger.gz"/>
261 <param name="target" value="4"/> 260 <param name="target" value="4"/>
262 <section name="norm_params"> 261 <section name="norm_params">
263 <param name="deterministic" value="true"/> 262 <param name="deterministic" value="true"/>
264 <param name="mindepth" value="0"/> 263 <param name="mindepth" value="0"/>
265 </section> 264 </section>
266 <output name="output_normalised_R1" ftype="fastqsanger.gz" value="bbnorm/normalised_R1.fastq"/> 265 <output name="output_normalised_R1" ftype="fastqsanger.gz" value="bbnorm/normalised_R1.fastq"/>
267 </test> 266 </test>
268 <!-- PE as an interleaved file --> 267 <!-- PE as an interleaved file -->
269 <test expect_num_outputs="4"> 268 <test expect_num_outputs="4">
270 <param name="input_type" value="PE_1file"/> 269 <param name="input_type" value="PE_1file"/>
271 <param name="read1" ftype="fastqsanger" value="bbnorm/input_interleaved.fastq"/> 270 <param name="read1" value="bbnorm/input_interleaved.fastq" ftype="fastqsanger"/>
272 <param name="target" value="4"/> 271 <param name="target" value="4"/>
273 <section name="norm_params"> 272 <section name="norm_params">
274 <param name="deterministic" value="true"/> 273 <param name="deterministic" value="true"/>
275 <param name="mindepth" value="0"/> 274 <param name="mindepth" value="0"/>
276 </section> 275 </section>
284 <output name="kmer_hist_output" ftype="tabular" file="bbnorm/kmer_hist_output.tabular"/> 283 <output name="kmer_hist_output" ftype="tabular" file="bbnorm/kmer_hist_output.tabular"/>
285 </test> 284 </test>
286 <!-- PE as 2 files --> 285 <!-- PE as 2 files -->
287 <test expect_num_outputs="2"> 286 <test expect_num_outputs="2">
288 <param name="input_type" value="PE_2files"/> 287 <param name="input_type" value="PE_2files"/>
289 <param name="read1" ftype="fastqsanger" value="bbnorm/input_R1.fastq"/> 288 <param name="read1" value="bbnorm/input_R1.fastq" ftype="fastqsanger"/>
290 <param name="read2" ftype="fastqsanger" value="bbnorm/input_R2.fastq"/> 289 <param name="read2" value="bbnorm/input_R2.fastq" ftype="fastqsanger"/>
291 <param name="target" value="4"/> 290 <param name="target" value="4"/>
292 <section name="norm_params"> 291 <section name="norm_params">
293 <param name="deterministic" value="true"/> 292 <param name="deterministic" value="true"/>
294 <param name="mindepth" value="0"/> 293 <param name="mindepth" value="0"/>
295 </section> 294 </section>
387 * The sequencing platform has a very high error rate (ex: ONT) that might mislead this algorithm. 386 * The sequencing platform has a very high error rate (ex: ONT) that might mislead this algorithm.
388 387
389 </help> 388 </help>
390 <expand macro="citations"/> 389 <expand macro="citations"/>
391 </tool> 390 </tool>
392