comparison fastp.xml @ 20:7e30f4d7077d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastp commit 3214ce465671de3c15da94f71f2c3558f332d39a
author iuc
date Sun, 19 Oct 2025 07:26:44 +0000
parents 046512eda8c0
children
comparison
equal deleted inserted replaced
19:cbed9b3abcd3 20:7e30f4d7077d
1 <tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy0" profile="23.1"> 1 <tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1">
2 <description>fast all-in-one preprocessing for FASTQ files</description> 2 <description>fast all-in-one preprocessing for FASTQ files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="biotools" /> 6 <expand macro="biotools" />
147 #if str($filter_options.low_complexity_filter.complexity_threshold): 147 #if str($filter_options.low_complexity_filter.complexity_threshold):
148 -Y $filter_options.low_complexity_filter.complexity_threshold 148 -Y $filter_options.low_complexity_filter.complexity_threshold
149 #end if 149 #end if
150 150
151 151
152 ## Duplicate analysis / deduplication
153
154 $duplicated_reads.handling_options.eval_dups
155 #if not str($duplicated_reads.handling_options.eval_dups):
156 $duplicated_reads.handling_options.dedup
157 #end if
158
159
152 ## Read Modification Options 160 ## Read Modification Options
153 161
154 ## PolyG tail trimming, useful for NextSeq/NovaSeq data 162 ## PolyG tail trimming, useful for NextSeq/NovaSeq data
155 163
156 #if $read_mod_options.polyg_tail_trimming.trimming_select in ['', '-g']: 164 #if $read_mod_options.polyg_tail_trimming.trimming_select in ['', '-g']:
184 #end if 192 #end if
185 #end if 193 #end if
186 194
187 ## Per read cutting by quality options 195 ## Per read cutting by quality options
188 196
189 #if $read_mod_options.cutting_by_quality_options.cut_by_quality5 or $read_mod_options.cutting_by_quality_options.cut_by_quality3: 197 #if str($read_mod_options.cutting_by_quality_options.cut_front_select.cut_front) == '--cut_front'
190 198 $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front
191 $read_mod_options.cutting_by_quality_options.cut_by_quality5 199 --cut_front_window_size $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_window_size
192 200 --cut_front_mean_quality $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_mean_quality
193 $read_mod_options.cutting_by_quality_options.cut_by_quality3 201 #end if
194 202
195 #if str($read_mod_options.cutting_by_quality_options.cut_window_size): 203 #if str($read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail) == '--cut_tail'
196 -W $read_mod_options.cutting_by_quality_options.cut_window_size 204 $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail
197 #end if 205 --cut_tail_window_size $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_window_size
198 #if str($read_mod_options.cutting_by_quality_options.cut_mean_quality): 206 --cut_tail_mean_quality $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_mean_quality
199 -M $read_mod_options.cutting_by_quality_options.cut_mean_quality 207 #end if
200 #end if 208
209 #if str($read_mod_options.cutting_by_quality_options.cut_right_select.cut_right) == '--cut_right'
210 $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right
211 --cut_right_window_size $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_window_size
212 --cut_right_mean_quality $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_mean_quality
201 #end if 213 #end if
202 214
203 ## Base correction by overlap analysis options 215 ## Base correction by overlap analysis options
204 216
205 $read_mod_options.base_correction_options.correction 217 $read_mod_options.base_correction_options.correction
267 <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True"> 279 <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True">
268 <param name="enable_low_complexity_filter" argument="-y" type="boolean" truevalue="-y" falsevalue="" checked="false" label="Enable low complexity filter" help="The complexity is defined as the percentage of base that is different from its next base, default is No"/> 280 <param name="enable_low_complexity_filter" argument="-y" type="boolean" truevalue="-y" falsevalue="" checked="false" label="Enable low complexity filter" help="The complexity is defined as the percentage of base that is different from its next base, default is No"/>
269 <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/> 281 <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/>
270 </section> 282 </section>
271 </section> 283 </section>
272 284 <section name="duplicated_reads" title="Duplicated Reads Options">
285 <conditional name="handling_options">
286 <param name="eval_dups" type="select" label="Enable duplicated reads analysis" help="If enabled, calculate and report read duplication statistics. Enabling this is also a prerequisite for optional deduplication of reads. Duplicate detection relies exclusively on exact identity between read sequences (both for SE and PE data). It also increases tool memory requirements and running time moderately. NOTE: the default (no duplication analysis) is different from the command-line tool.">
287 <option value="">Enable</option>
288 <option value="--dont_eval_duplication" selected="true">Disable (--dont_eval_duplication)</option>
289 </param>
290 <when value="--dont_eval_duplication" />
291 <when value="">
292 <param argument="--dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Drop duplicate reads/pairs"/>
293 </when>
294 </conditional>
295 </section>
273 <!-- Read Modification Options --> 296 <!-- Read Modification Options -->
274 <section name="read_mod_options" title="Read Modification Options"> 297 <section name="read_mod_options" title="Read Modification Options">
275 <conditional name="polyg_tail_trimming"> 298 <conditional name="polyg_tail_trimming">
276 <param name="trimming_select" type="select" label="PolyG tail trimming" help="This feature is enabled for NextSeq/NovaSeq data by default. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records."> 299 <param name="trimming_select" type="select" label="PolyG tail trimming" help="This feature is enabled for NextSeq/NovaSeq data by default. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records.">
277 <option value="" selected="true">Automatic trimming for Illumina NextSeq/NovaSeq data</option> 300 <option value="" selected="true">Automatic trimming for Illumina NextSeq/NovaSeq data</option>
305 <param argument="--umi_len" type="integer" optional="true" label="UMI length" help="If the UMI is in read1/read2, its length should be provided."/> 328 <param argument="--umi_len" type="integer" optional="true" label="UMI length" help="If the UMI is in read1/read2, its length should be provided."/>
306 <param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/> 329 <param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/>
307 </section> 330 </section>
308 331
309 <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True"> 332 <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
310 <param name="cut_by_quality5" argument="-5" type="boolean" truevalue="-5" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'), default is disabled (WARNING: this will interfere deduplication for both PE/SE data)."/> 333 <conditional name="cut_front_select">
311 <param name="cut_by_quality3" argument="-3" type="boolean" truevalue="-3" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'), default is disabled (WARNING: this will interfere deduplication for SE data)."/> 334 <param argument="--cut_front" type="select" truevalue="--cut_front" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'). (WARNING: this will interfere with deduplication of both PE/SE data if performed with downstream tools.)">
312 <param name="cut_window_size" argument="-W" type="integer" optional="true" label="Cutting window size" help="The size of the sliding window for sliding window trimming, default is 4."/> 335 <option value="--cut_front">Yes</option>
313 <param name="cut_mean_quality" argument="-M" type="integer" optional="true" label="Cutting mean quality" help="The bases in the sliding window with mean quality below cutting_quality will be cut, default is Q20."/> 336 <option value="" selected="true">No</option>
337 </param>
338 <when value="--cut_front">
339 <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut front" help="The size of the sliding window for sliding window trimming."/>
340 <param argument="--cut_front_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut front" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
341 </when>
342 <when value="">
343 </when>
344 </conditional>
345 <conditional name="cut_tail_select">
346 <param argument="--cut_tail" type="select" truevalue="--cut_tail" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'). (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
347 <option value="--cut_tail">Yes</option>
348 <option value="" selected="true">No</option>
349 </param>
350 <when value="--cut_tail">
351 <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut tail" help="The size of the sliding window for sliding window trimming."/>
352 <param argument="--cut_tail_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut tail" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
353 </when>
354 <when value="">
355 </when>
356 </conditional>
357 <conditional name="cut_right_select">
358 <param argument="--cut_right" type="select" truevalue="--cut_right" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Move a sliding window from front to tail, if meet one window with mean quality &lt; threshold, drop the bases in the window and the right part, and then stop. (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
359 <option value="--cut_right">Yes</option>
360 <option value="" selected="true">No</option>
361 </param>
362 <when value="--cut_right">
363 <param argument="--cut_right_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut right" help="The size of the sliding window for sliding window trimming."/>
364 <param argument="--cut_right_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut right" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
365 </when>
366 <when value="">
367 </when>
368 </conditional>
314 </section> 369 </section>
315 370
316 <section name="base_correction_options" title="Base correction by overlap analysis options" expanded="True"> 371 <section name="base_correction_options" title="Base correction by overlap analysis options" expanded="True">
317 <param name="correction" argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Enable base correction" help="Enable base correction in overlapped regions (only for PE data), default is disabled."/> 372 <param name="correction" argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Enable base correction" help="Enable base correction in overlapped regions (only for PE data), default is disabled."/>
318 </section> 373 </section>
358 </conditional> 413 </conditional>
359 <output name="out1" ftype="fastqsanger" file="out1.fq"/> 414 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
360 <output name="report_html"> 415 <output name="report_html">
361 <assert_contents> 416 <assert_contents>
362 <has_text text="fastp report"/> 417 <has_text text="fastp report"/>
418 <not_has_text text="duplication rate:"/>
363 </assert_contents> 419 </assert_contents>
364 </output> 420 </output>
365 <output name="report_json"> 421 <output name="report_json">
366 <assert_contents> 422 <assert_contents>
367 <has_text text="fastp report"/> 423 <has_text text="fastp report"/>
424 <not_has_text text="&quot;duplication&quot;:"/>
368 </assert_contents> 425 </assert_contents>
369 </output> 426 </output>
370 </test> 427 </test>
371 <!-- 2. Ensure paired collection works --> 428 <!-- 2. Ensure paired collection works -->
372 <test expect_num_outputs="4"> 429 <test expect_num_outputs="4">
383 <param name="report_json" value="False" /> 440 <param name="report_json" value="False" />
384 </section> 441 </section>
385 <output name="report_html"> 442 <output name="report_html">
386 <assert_contents> 443 <assert_contents>
387 <has_text text="fastp report"/> 444 <has_text text="fastp report"/>
445 <not_has_text text="duplication rate:"/>
388 </assert_contents> 446 </assert_contents>
389 </output> 447 </output>
390 <output_collection name="output_paired_coll" type="paired"> 448 <output_collection name="output_paired_coll" type="paired">
391 <element name="forward" value="out_bwa1.fq" ftype="fastqsanger"/> 449 <element name="forward" value="out_bwa1.fq" ftype="fastqsanger"/>
392 <element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/> 450 <element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/>
494 <assert_contents> 552 <assert_contents>
495 <has_text text="fastp report"/> 553 <has_text text="fastp report"/>
496 </assert_contents> 554 </assert_contents>
497 </output> 555 </output>
498 </test> 556 </test>
499 <!-- 8. Ensure JSON report output works --> 557 <!-- 8. Ensure enabling duplicate analysis works -->
500 <test expect_num_outputs="2"> 558 <test expect_num_outputs="3">
501 <conditional name="single_paired"> 559 <conditional name="single_paired">
502 <param name="single_paired_selector" value="single"/> 560 <param name="single_paired_selector" value="single"/>
503 <param name="in1" ftype="fastqsanger" value="R1.fq"/> 561 <param name="in1" ftype="fastqsanger" value="R1.fq"/>
504 </conditional> 562 </conditional>
505 <section name="output_options"> 563 <section name="duplicated_reads">
506 <param name="report_html" value="False"/> 564 <conditional name="handling_options">
565 <param name="eval_dups" value=""/>
566 </conditional>
507 </section> 567 </section>
508 <output name="out1" ftype="fastqsanger" file="out1.fq"/> 568 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
569 <output name="report_html">
570 <assert_contents>
571 <has_text text="fastp report"/>
572 <has_text text="duplication rate:"/>
573 </assert_contents>
574 </output>
509 <output name="report_json"> 575 <output name="report_json">
510 <assert_contents> 576 <assert_contents>
511 <has_text text="fastp report"/> 577 <has_text text="fastp report"/>
578 <has_text text="&quot;duplication&quot;:"/>
512 </assert_contents> 579 </assert_contents>
513 </output> 580 </output>
514 </test> 581 </test>
515 <!-- 9. Ensure polyG trimming works --> 582 <!-- 9. Ensure polyG trimming works -->
516 <test expect_num_outputs="3"> 583 <test expect_num_outputs="3">
714 <output_collection name="output_paired_coll" type="paired"> 781 <output_collection name="output_paired_coll" type="paired">
715 <element name="forward" value="bwa-mem-fastq-paired-collection/output_forward.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/> 782 <element name="forward" value="bwa-mem-fastq-paired-collection/output_forward.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
716 <element name="reverse" value="bwa-mem-fastq-paired-collection/output_reverse.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/> 783 <element name="reverse" value="bwa-mem-fastq-paired-collection/output_reverse.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
717 </output_collection> 784 </output_collection>
718 </test> 785 </test>
786 <!-- 17. Ensure quality cutting work -->
787 <test expect_num_outputs="3">
788 <conditional name="single_paired">
789 <param name="single_paired_selector" value="single"/>
790 <param name="in1" ftype="fastqsanger.gz" value="R1.fq.gz"/>
791 </conditional>
792 <section name="read_mod_options">
793 <section name="cutting_by_quality_options">
794 <conditional name="cut_front_select">
795 <param name="cut_front" value="--cut_front"/>
796 <param name="cut_front_window_size" value="2"/>
797 <param name="cut_front_mean_quality" value="3"/>
798 </conditional>
799 <conditional name="cut_tail_select">
800 <param name="cut_tail" value="--cut_tail"/>
801 <param name="cut_tail_window_size" value="4"/>
802 <param name="cut_tail_mean_quality" value="5"/>
803 </conditional>
804 <conditional name="cut_right_select">
805 <param name="cut_right" value="--cut_right"/>
806 <param name="cut_right_window_size" value="6"/>
807 <param name="cut_right_mean_quality" value="7"/>
808 </conditional>
809 </section>
810 </section>
811 <output name="out1" ftype="fastqsanger.gz" decompress="true" file="quality_cutting_output.fq.gz"/>
812 <output name="report_json">
813 <assert_contents>
814 <has_text text="--cut_front"/>
815 <has_text text="--cut_tail"/>
816 <has_text text="--cut_right"/>
817 <has_text text="--cut_front_window_size 2"/>
818 <has_text text="--cut_front_mean_quality 3"/>
819 <has_text text="--cut_tail_window_size 4"/>
820 <has_text text="--cut_tail_mean_quality 5"/>
821 <has_text text="--cut_right_window_size 6"/>
822 <has_text text="--cut_right_mean_quality 7"/>
823 </assert_contents>
824 </output>
825 </test>
826 <!-- 18. Ensure deduplication works -->
827 <test expect_num_outputs="2">
828 <conditional name="single_paired">
829 <param name="single_paired_selector" value="single"/>
830 <param name="in1" ftype="fastqsanger" value="R1_with_dup.fq"/>
831 </conditional>
832 <section name="duplicated_reads">
833 <conditional name="handling_options">
834 <param name="eval_dups" value=""/>
835 <param name="dedup" value="true"/>
836 </conditional>
837 </section>
838 <section name="output_options">
839 <param name="report_html" value="false"/>
840 </section>
841 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
842 <output name="report_json">
843 <assert_contents>
844 <has_text text="fastp report"/>
845 <has_text text="&quot;duplication&quot;:"/>
846 </assert_contents>
847 </output>
848 </test>
719 </tests> 849 </tests>
720 <help><![CDATA[ 850 <help><![CDATA[
721 .. class:: infomark 851 .. class:: infomark
722 852
723 **What it does** 853 **What it does**
725 fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to 855 fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to
726 afford high performance. 856 afford high performance.
727 857
728 *Features* 858 *Features*
729 859
730 1. Filter out bad reads (too low quality, too short, or too many N...) 860 1. Filter out bad (too low quality, too short, or too many N...) and/or duplicate reads
731 861
732 2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster) 862 2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster)
733 863
734 3. Trim all reads in front and tail 864 3. Trim all reads in front and tail
735 865