diff dedup.xml @ 2:74df4a44471b draft

planemo upload for repository https://github.com/open2c/pairtools commit abd18c860877b2100e02019b4b597d6d2569994e
author iuc
date Thu, 18 Sep 2025 05:56:33 +0000
parents c99b93043a7f
children e36bce668b44
line wrap: on
line diff
--- a/dedup.xml	Mon Mar 10 09:35:12 2025 +0000
+++ b/dedup.xml	Thu Sep 18 05:56:33 2025 +0000
@@ -1,59 +1,123 @@
-<tool id="pairtools_dedup" name="Pairtools dedup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="23.2" license="MIT">
+<tool id="pairtools_dedup" name="Pairtools dedup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE_VERSION@" license="MIT">
     <description>Find and remove PCR/optical duplicates</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
+        #if $pairs_path.is_of_type('4dn_pairs.gz') or $pairs_path.is_of_type('4dn_pairsam.gz'):
+            #set $input_link = "input.gz"
+            #set $output_dedup_pairs_link = "output_dedup_pairs.ga"
+            #set $output_dups_pairs_link = "output_dups_pairs.gz"
+        #else
+            #set $input_link = "input"
+            #set $output_dedup_pairs_link = "output_dedup_pairs"
+            #set $output_dups_pairs_link = "output_dups_pairs"
+        #end if
+        ln -s '$pairs_path' '$input_link' &&
+        ln -s '$output_dedup_pairs' '$output_dedup_pairs_link' &&
+        ln -s '$output_dups_pairs' '$output_dups_pairs_link' &&
         pairtools dedup
-            '$pairs_path'
-            -o '$output_dedup_pairs'
+            '$input_link'
+            -o '$output_dedup_pairs_link'
             #if $output_dups:
-                --output-dups '$output_dups_pairs'
+                --output-dups '$output_dups_pairs_link'
             #end if
             $mark_dups
             #if $output_stats:
                   --output-stats '$dedup_pairs_stats'
             #end if
+            #if $output_bytile_stats:
+                  --keep-parent-id
+                  --output-bytile-stats '$dedup_bytile_stats'
+            #end if
             --nproc-in \${GALAXY_SLOTS:-4}
             --nproc-out \${GALAXY_SLOTS:-4}
+            --max-mismatch ${max_mismatch}
     ]]></command>
     <inputs>      
-        <param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
+        <param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam,4dn_pairs.gz,4dn_pairsam.gz" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
         <param argument="--mark-dups" type="boolean" truevalue="--mark-dups" falsevalue="" checked="True" label="Duplicate pairs are marked as DD in pair_type and as a duplicate in the SAM entries"/>
         <param argument="--output-dups" type="boolean" truevalue="--output-dups" falsevalue="" checked="False" label="Output file for duplicate pairs"/>
         <param argument="--output-stats" type="boolean" truevalue="--output-stats" falsevalue="" checked="False" label="Output file for duplicate statistics"/>
+        <param argument="--max-mismatch" type="integer" value="3" min="0" label="Maximum number of mismatches. Pairs with both sides mapped within this distance &quot;bp&quot; from each other are considered duplicates."/>
+        <param argument="--output-bytile-stats" type="boolean" truevalue="--output-bytile-stats" falsevalue="" checked="False" label="Output file for optical duplicate statistics for datasets with original Illumina-generated read IDs."/>
+        <param name="compress_output" type="boolean" truevalue=".gz" falsevalue="" checked="false" label="Compress output files" />
     </inputs>
     <outputs>
-        <data name="output_dedup_pairs" format_source="pairs_path" label="${tool.name} on ${on_string}: Deduplicated Pairs"/>
-        <data name="output_dups_pairs" format_source="pairs_path" label="${tool.name} on ${on_string}: Duplicate Pairs">
+        <data name="output_dedup_pairs" label="${tool.name} on ${on_string}: Deduplicated Pairs" format_source="pairs_path" />
+        <data name="output_dups_pairs" label="${tool.name} on ${on_string}: Duplicate Pairs" format_source="pairs_path" >
             <filter>output_dups</filter>
         </data>
         <data name="dedup_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: Deduplicated stats">
             <filter>output_stats</filter>
         </data>
+        <data name="dedup_bytile_stats" format="tabular" label="${tool.name} on ${on_string}: By-tile stats">
+            <filter>output_bytile_stats</filter>
+        </data>
     </outputs>
     <tests>
         <!--Test 01 with default parameters-->
         <test expect_num_outputs="1">
-            <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
-            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" lines_diff="20"/>
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
+            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20" />
+        </test>
+        <!--Test 02 with default parameters and compressed output-->
+        <test expect_num_outputs="1">
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
+            <param name="compress_output" value="true"/>
+            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
         </test>
-        <!--Test 02 mark_dups enabled and output_dups-->
+        <!--Test 03 with default parameters and compressed iput-->
+        <test expect_num_outputs="1">
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam.gz"/>
+            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
+        </test>
+        <!--Test 04 mark_dups enabled and output_dups-->
         <test expect_num_outputs="2">
-            <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
             <param name="mark_dups" value="true"></param>
             <param name="output_dups" value="true"></param>
-            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" lines_diff="20"/>
-            <output name="output_dups_pairs"  file="output_dups_pairs_markdups.pairsam" lines_diff="20"/>
+            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
+            <output name="output_dups_pairs"  file="output_dups_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
         </test>
-        <!--Test 03 mark_dups and output_stats enabled-->
+        <!--Test 05 mark_dups and output_stats enabled-->
         <test expect_num_outputs="2">
-            <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
+            <param name="mark_dups" value="true"></param>
+            <param name="output_stats" value="true"></param>
+            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_pairs.stats" ftype="tabular" lines_diff="20"/>
+        </test>
+        <!--Test 06 mark_dups and output_stats enabled, max_mismatch set to 0-->
+        <test expect_num_outputs="2">
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
             <param name="mark_dups" value="true"></param>
             <param name="output_stats" value="true"></param>
-            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" lines_diff="20"/>
-            <output name="dedup_pairs_stats"  file="output_dedup_pairs.stats" lines_diff="20"/>
+            <param name="max_mismatch"  value="0"></param>
+            <output name="output_dedup_pairs" file="output_dedup_max_mismatch0_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_max_mismatch0_sorted.stats" ftype="tabular" lines_diff="20"/>
+        </test>
+        <!--Test 07 mark_dups and output_stats + bytile_stats enabled-->
+        <test expect_num_outputs="3">
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
+            <param name="mark_dups" value="true"></param>
+            <param name="output_stats" value="true"></param>
+            <param name="output_bytile_stats" value="true"></param>
+            <output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" lines_diff="20"/>
+            <output name="dedup_bytile_stats"  file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" lines_diff="20"/>
+        </test>
+        <!--Test 08 mark_dups and output_stats + bytile_stats enabled, compress output-->
+        <test expect_num_outputs="3">
+            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
+            <param name="mark_dups" value="true"></param>
+            <param name="output_stats" value="true"></param>
+            <param name="compress_output" value="true"></param>
+            <param name="output_bytile_stats" value="true"></param>
+            <output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" decompress="true" lines_diff="20"/>
+            <output name="dedup_bytile_stats"  file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" decompress="true" lines_diff="20"/>
         </test>
     </tests>
     <help><![CDATA[