changeset 20:7e30f4d7077d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastp commit 3214ce465671de3c15da94f71f2c3558f332d39a
author iuc
date Sun, 19 Oct 2025 07:26:44 +0000
parents cbed9b3abcd3
children
files fastp.xml macros.xml test-data/R1_with_dup.fq test-data/quality_cutting_output.fq.gz
diffstat 4 files changed, 167 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/fastp.xml	Mon Jun 16 08:56:29 2025 +0000
+++ b/fastp.xml	Sun Oct 19 07:26:44 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy0" profile="23.1">
+<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1">
     <description>fast all-in-one preprocessing for FASTQ files</description>
     <macros>
         <import>macros.xml</import>
@@ -149,6 +149,14 @@
 #end if
 
 
+## Duplicate analysis / deduplication
+
+$duplicated_reads.handling_options.eval_dups
+#if not str($duplicated_reads.handling_options.eval_dups):
+    $duplicated_reads.handling_options.dedup
+#end if
+
+
 ## Read Modification Options
 
 ## PolyG tail trimming, useful for NextSeq/NovaSeq data
@@ -186,18 +194,22 @@
 
 ## Per read cutting by quality options
 
-#if $read_mod_options.cutting_by_quality_options.cut_by_quality5 or $read_mod_options.cutting_by_quality_options.cut_by_quality3:
-
-    $read_mod_options.cutting_by_quality_options.cut_by_quality5
-
-    $read_mod_options.cutting_by_quality_options.cut_by_quality3
+#if str($read_mod_options.cutting_by_quality_options.cut_front_select.cut_front) == '--cut_front'
+        $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front
+        --cut_front_window_size $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_window_size
+        --cut_front_mean_quality $read_mod_options.cutting_by_quality_options.cut_front_select.cut_front_mean_quality
+#end if
 
-    #if str($read_mod_options.cutting_by_quality_options.cut_window_size):
-        -W $read_mod_options.cutting_by_quality_options.cut_window_size
-    #end if
-    #if str($read_mod_options.cutting_by_quality_options.cut_mean_quality):
-        -M $read_mod_options.cutting_by_quality_options.cut_mean_quality
-    #end if
+#if str($read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail) == '--cut_tail'
+        $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail
+        --cut_tail_window_size $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_window_size
+        --cut_tail_mean_quality $read_mod_options.cutting_by_quality_options.cut_tail_select.cut_tail_mean_quality
+#end if
+
+#if str($read_mod_options.cutting_by_quality_options.cut_right_select.cut_right) == '--cut_right'
+        $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right
+        --cut_right_window_size $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_window_size
+        --cut_right_mean_quality $read_mod_options.cutting_by_quality_options.cut_right_select.cut_right_mean_quality
 #end if
 
 ## Base correction by overlap analysis options
@@ -269,7 +281,18 @@
                 <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/>
             </section>
         </section>
-
+        <section name="duplicated_reads" title="Duplicated Reads Options">
+            <conditional name="handling_options">
+                <param name="eval_dups" type="select" label="Enable duplicated reads analysis" help="If enabled, calculate and report read duplication statistics. Enabling this is also a prerequisite for optional deduplication of reads. Duplicate detection relies exclusively on exact identity between read sequences (both for SE and PE data). It also increases tool memory requirements and running time moderately. NOTE: the default (no duplication analysis) is different from the command-line tool.">
+                    <option value="">Enable</option>
+                    <option value="--dont_eval_duplication" selected="true">Disable (--dont_eval_duplication)</option>
+                </param>
+                <when value="--dont_eval_duplication" />
+                <when value="">
+                    <param argument="--dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Drop duplicate reads/pairs"/>
+                </when>
+            </conditional>
+        </section>
         <!-- Read Modification Options -->
         <section name="read_mod_options" title="Read Modification Options">
             <conditional name="polyg_tail_trimming">
@@ -307,10 +330,42 @@
             </section>
 
             <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
-                <param name="cut_by_quality5" argument="-5" type="boolean" truevalue="-5" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'), default is disabled (WARNING: this will interfere deduplication for both PE/SE data)."/>
-                <param name="cut_by_quality3" argument="-3" type="boolean" truevalue="-3" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'), default is disabled (WARNING: this will interfere deduplication for SE data)."/>
-                <param name="cut_window_size" argument="-W" type="integer" optional="true" label="Cutting window size" help="The size of the sliding window for sliding window trimming, default is 4."/>
-                <param name="cut_mean_quality" argument="-M" type="integer" optional="true" label="Cutting mean quality" help="The bases in the sliding window with mean quality below cutting_quality will be cut, default is Q20."/>
+                <conditional name="cut_front_select">
+                    <param argument="--cut_front" type="select" truevalue="--cut_front" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'). (WARNING: this will interfere with deduplication of both PE/SE data if performed with downstream tools.)">
+                        <option value="--cut_front">Yes</option>
+                        <option value="" selected="true">No</option>
+                    </param>
+                    <when value="--cut_front">
+                        <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut front" help="The size of the sliding window for sliding window trimming."/>
+                        <param argument="--cut_front_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut front" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
+                    </when>
+                    <when value="">
+                    </when>
+                </conditional>
+                <conditional name="cut_tail_select">
+                    <param argument="--cut_tail" type="select" truevalue="--cut_tail" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'). (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
+                        <option value="--cut_tail">Yes</option>
+                        <option value="" selected="true">No</option>
+                    </param>
+                    <when value="--cut_tail">
+                        <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut tail" help="The size of the sliding window for sliding window trimming."/>
+                        <param argument="--cut_tail_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut tail" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
+                    </when>
+                    <when value="">
+                    </when>
+                </conditional>
+                <conditional name="cut_right_select">
+                    <param argument="--cut_right" type="select" truevalue="--cut_right" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Move a sliding window from front to tail, if meet one window with mean quality &lt; threshold, drop the bases in the window and the right part, and then stop. (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
+                        <option value="--cut_right">Yes</option>
+                        <option value="" selected="true">No</option>
+                    </param>
+                    <when value="--cut_right">
+                        <param argument="--cut_right_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut right" help="The size of the sliding window for sliding window trimming."/>
+                        <param argument="--cut_right_mean_quality" type="integer" optional="true" value="20" min="1" max="30 " label="Cutting mean quality for cut right" help="The bases in the sliding window with mean quality below cutting_quality will be cut."/>
+                    </when>
+                    <when value="">
+                    </when>
+                </conditional>
             </section>
 
             <section name="base_correction_options" title="Base correction by overlap analysis options" expanded="True">
@@ -360,11 +415,13 @@
             <output name="report_html">
                 <assert_contents>
                     <has_text text="fastp report"/>
+                    <not_has_text text="duplication rate:"/>
                 </assert_contents>
             </output>
             <output name="report_json">
                 <assert_contents>
                     <has_text text="fastp report"/>
+                    <not_has_text text="&quot;duplication&quot;:"/>
                 </assert_contents>
             </output>
         </test>
@@ -385,6 +442,7 @@
             <output name="report_html">
                 <assert_contents>
                     <has_text text="fastp report"/>
+                    <not_has_text text="duplication rate:"/>
                 </assert_contents>
             </output>
             <output_collection name="output_paired_coll" type="paired">
@@ -496,19 +554,28 @@
                 </assert_contents>
             </output>
         </test>
-        <!-- 8. Ensure JSON report output works -->
-        <test expect_num_outputs="2">
+        <!-- 8. Ensure enabling duplicate analysis works -->
+        <test expect_num_outputs="3">
             <conditional name="single_paired">
                 <param name="single_paired_selector" value="single"/>
                 <param name="in1" ftype="fastqsanger" value="R1.fq"/>
             </conditional>
-            <section name="output_options">
-                <param name="report_html" value="False"/>
+            <section name="duplicated_reads">
+                <conditional name="handling_options">
+                    <param name="eval_dups" value=""/>
+                </conditional>
             </section>
             <output name="out1" ftype="fastqsanger" file="out1.fq"/>
+            <output name="report_html">
+                <assert_contents>
+                    <has_text text="fastp report"/>
+                    <has_text text="duplication rate:"/>
+                </assert_contents>
+            </output>
             <output name="report_json">
                 <assert_contents>
                     <has_text text="fastp report"/>
+                    <has_text text="&quot;duplication&quot;:"/>
                 </assert_contents>
             </output>
         </test>
@@ -716,6 +783,69 @@
                 <element name="reverse" value="bwa-mem-fastq-paired-collection/output_reverse.fastqsanger.gz" decompress="true" ftype="fastqsanger.gz"/>
             </output_collection>
         </test>
+        <!-- 17. Ensure quality cutting work -->
+        <test expect_num_outputs="3">
+            <conditional name="single_paired">
+                <param name="single_paired_selector" value="single"/>
+                <param name="in1" ftype="fastqsanger.gz" value="R1.fq.gz"/>
+            </conditional>
+            <section name="read_mod_options">
+                <section name="cutting_by_quality_options">
+                    <conditional name="cut_front_select">
+                        <param name="cut_front" value="--cut_front"/>
+                        <param name="cut_front_window_size" value="2"/>
+                        <param name="cut_front_mean_quality" value="3"/>
+                    </conditional>
+                    <conditional name="cut_tail_select">
+                        <param name="cut_tail" value="--cut_tail"/>
+                        <param name="cut_tail_window_size" value="4"/>
+                        <param name="cut_tail_mean_quality" value="5"/>
+                    </conditional>
+                    <conditional name="cut_right_select">
+                        <param name="cut_right" value="--cut_right"/>
+                        <param name="cut_right_window_size" value="6"/>
+                        <param name="cut_right_mean_quality" value="7"/>
+                    </conditional>
+                </section>
+            </section>
+            <output name="out1" ftype="fastqsanger.gz" decompress="true" file="quality_cutting_output.fq.gz"/>
+            <output name="report_json">
+                <assert_contents>
+                    <has_text text="--cut_front"/>
+                    <has_text text="--cut_tail"/>
+                    <has_text text="--cut_right"/>
+                    <has_text text="--cut_front_window_size 2"/>
+                    <has_text text="--cut_front_mean_quality 3"/>
+                    <has_text text="--cut_tail_window_size 4"/>
+                    <has_text text="--cut_tail_mean_quality 5"/>
+                    <has_text text="--cut_right_window_size 6"/>
+                    <has_text text="--cut_right_mean_quality 7"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- 18. Ensure deduplication works -->
+        <test expect_num_outputs="2">
+            <conditional name="single_paired">
+                <param name="single_paired_selector" value="single"/>
+                <param name="in1" ftype="fastqsanger" value="R1_with_dup.fq"/>
+            </conditional>
+            <section name="duplicated_reads">
+                <conditional name="handling_options">
+                    <param name="eval_dups" value=""/>
+                    <param name="dedup" value="true"/>
+                </conditional>
+            </section>
+            <section name="output_options">
+                <param name="report_html" value="false"/>
+            </section>
+            <output name="out1" ftype="fastqsanger" file="out1.fq"/>
+            <output name="report_json">
+                <assert_contents>
+                    <has_text text="fastp report"/>
+                    <has_text text="&quot;duplication&quot;:"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
@@ -727,7 +857,7 @@
 
 *Features*
 
-1. Filter out bad reads (too low quality, too short, or too many N...)
+1. Filter out bad (too low quality, too short, or too many N...) and/or duplicate reads
 
 2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster)
 
--- a/macros.xml	Mon Jun 16 08:56:29 2025 +0000
+++ b/macros.xml	Sun Oct 19 07:26:44 2025 +0000
@@ -1,5 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.0.0</token>
+    <token name="@TOOL_VERSION@">1.0.1</token>
+    <token name="@VERSION_SUFFIX@">3</token>
     <xml name="biotools">
         <xrefs>
             <xref type="bio.tools">
@@ -69,4 +70,4 @@
             help="The minimum length to detect polyG in the read tail. 10 by default."/>
     </xml>
 
-</macros>
\ No newline at end of file
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R1_with_dup.fq	Sun Oct 19 07:26:44 2025 +0000
@@ -0,0 +1,12 @@
+@NS500713:64:HFKJJBGXY:1:11101:1675:1101 1:N:0:TATAGCCT+GACCCCCA
+TAGGAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTTCCTGGTACCCAGACATTTCAGGAGGTCGGGAAATTTTTAAACCCAGGCAGCTTCCTGGCAGTGACATTTGGAGCATCAAAGTGGTAAATAAAATTTCATTTACATTAATAT
++
+6AAAAAEEEEE/E/EA/E/AEA6EE//AEE66/AAE//EEE/E//E/AA/EEE/A/AEE/EEA//EEEEEEEE6EEAAA/E/A/6E/6//6<EAAEEE/EEEA/EA/EEEEEE/<<EEEE//A/EE<AEEEEE/</AA</E<AAAE/E<E/
+@NS500713:64:HFKJJBGXY:1:11101:17113:1101 1:N:0:TATAGCCT+GTTTCTTA
+TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
++
+AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
+@NS500713:64:HFKJJBGXY:1:11101:17114:1101 1:N:0:TATAGCCT+GTTTCTTA
+TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT
++
+AAAAAEEEEE6EEAAAEEEEE6EEEEEEEBBBBBBBBBEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEEEEE/EEEEEEE6EE<AAEEEAEEEEEEEEEEEEAEEEEEEEA<E/AAEEEAEEEEE/EEEEAAEEE
Binary file test-data/quality_cutting_output.fq.gz has changed