view dedup.xml @ 3:e36bce668b44 draft default tip

planemo upload for repository https://github.com/open2c/pairtools commit f60e7663dc3fd1a564e691ef35d35f35b0a851ad
author iuc
date Wed, 24 Sep 2025 11:48:35 +0000
parents 74df4a44471b
children
line wrap: on
line source

<tool id="pairtools_dedup" name="Pairtools dedup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE_VERSION@" license="MIT">
    <description>Find and remove PCR/optical duplicates</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        #if $pairs_path.is_of_type('4dn_pairs.gz') or $pairs_path.is_of_type('4dn_pairsam.gz'):
            #set $input_link = "input.gz"
            #set $output_dedup_pairs_link = "output_dedup_pairs.gz"
            #set $output_dups_pairs_link = "output_dups_pairs.gz"
        #else
            #set $input_link = "input"
            #set $output_dedup_pairs_link = "output_dedup_pairs"
            #set $output_dups_pairs_link = "output_dups_pairs"
        #end if
        ln -s '$pairs_path' '$input_link' &&
        ln -s '$output_dedup_pairs' '$output_dedup_pairs_link' &&
        ln -s '$output_dups_pairs' '$output_dups_pairs_link' &&
        pairtools dedup
            '$input_link'
            -o '$output_dedup_pairs_link'
            #if $output_dups:
                --output-dups '$output_dups_pairs_link'
            #end if
            $mark_dups
            #if $output_stats:
                  --output-stats '$dedup_pairs_stats'
            #end if
            #if $output_bytile_stats:
                  --keep-parent-id
                  --output-bytile-stats '$dedup_bytile_stats'
            #end if
            --nproc-in \${GALAXY_SLOTS:-4}
            --nproc-out \${GALAXY_SLOTS:-4}
            --max-mismatch ${max_mismatch}
    ]]></command>
    <inputs>      
        <param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam,4dn_pairs.gz,4dn_pairsam.gz" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
        <param argument="--mark-dups" type="boolean" truevalue="--mark-dups" falsevalue="" checked="True" label="Duplicate pairs are marked as DD in pair_type and as a duplicate in the SAM entries"/>
        <param argument="--output-dups" type="boolean" truevalue="--output-dups" falsevalue="" checked="False" label="Output file for duplicate pairs"/>
        <param argument="--output-stats" type="boolean" truevalue="--output-stats" falsevalue="" checked="False" label="Output file for duplicate statistics"/>
        <param argument="--max-mismatch" type="integer" value="3" min="0" label="Maximum number of mismatches. Pairs with both sides mapped within this distance &quot;bp&quot; from each other are considered duplicates."/>
        <param argument="--output-bytile-stats" type="boolean" truevalue="--output-bytile-stats" falsevalue="" checked="False" label="Output file for optical duplicate statistics for datasets with original Illumina-generated read IDs."/>
        <param name="compress_output" type="boolean" truevalue=".gz" falsevalue="" checked="false" label="Compress output files" />
    </inputs>
    <outputs>
        <data name="output_dedup_pairs" label="${tool.name} on ${on_string}: Deduplicated Pairs" format_source="pairs_path" />
        <data name="output_dups_pairs" label="${tool.name} on ${on_string}: Duplicate Pairs" format_source="pairs_path" >
            <filter>output_dups</filter>
        </data>
        <data name="dedup_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: Deduplicated stats">
            <filter>output_stats</filter>
        </data>
        <data name="dedup_bytile_stats" format="tabular" label="${tool.name} on ${on_string}: By-tile stats">
            <filter>output_bytile_stats</filter>
        </data>
    </outputs>
    <tests>
        <!--Test 01 with default parameters-->
        <test expect_num_outputs="1">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20" />
        </test>
        <!--Test 02 with default parameters and compressed output-->
        <test expect_num_outputs="1">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="compress_output" value="true"/>
            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
        </test>
        <!--Test 03 with default parameters and compressed iput-->
        <test expect_num_outputs="1">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam.gz"/>
            <output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
        </test>
        <!--Test 04 mark_dups enabled and output_dups-->
        <test expect_num_outputs="2">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="mark_dups" value="true"></param>
            <param name="output_dups" value="true"></param>
            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
            <output name="output_dups_pairs"  file="output_dups_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
        </test>
        <!--Test 05 mark_dups and output_stats enabled-->
        <test expect_num_outputs="2">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="mark_dups" value="true"></param>
            <param name="output_stats" value="true"></param>
            <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
            <output name="dedup_pairs_stats"  file="output_dedup_pairs.stats" ftype="tabular" lines_diff="20"/>
        </test>
        <!--Test 06 mark_dups and output_stats enabled, max_mismatch set to 0-->
        <test expect_num_outputs="2">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="mark_dups" value="true"></param>
            <param name="output_stats" value="true"></param>
            <param name="max_mismatch"  value="0"></param>
            <output name="output_dedup_pairs" file="output_dedup_max_mismatch0_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
            <output name="dedup_pairs_stats"  file="output_dedup_max_mismatch0_sorted.stats" ftype="tabular" lines_diff="20"/>
        </test>
        <!--Test 07 mark_dups and output_stats + bytile_stats enabled-->
        <test expect_num_outputs="3">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="mark_dups" value="true"></param>
            <param name="output_stats" value="true"></param>
            <param name="output_bytile_stats" value="true"></param>
            <output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
            <output name="dedup_pairs_stats"  file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" lines_diff="20"/>
            <output name="dedup_bytile_stats"  file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" lines_diff="20"/>
        </test>
        <!--Test 08 mark_dups and output_stats + bytile_stats enabled, compress output-->
        <test expect_num_outputs="3">
            <param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
            <param name="mark_dups" value="true"></param>
            <param name="output_stats" value="true"></param>
            <param name="compress_output" value="true"></param>
            <param name="output_bytile_stats" value="true"></param>
            <output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
            <output name="dedup_pairs_stats"  file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" decompress="true" lines_diff="20"/>
            <output name="dedup_bytile_stats"  file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" decompress="true" lines_diff="20"/>
        </test>
    </tests>
    <help><![CDATA[
        **Pairtools dedup**

        Find PCR/optical duplicates in an upper-triangular flipped sorted pairs/pairsam file. Allow for a +/-N bp mismatch at each side of duplicated molecules.
              
    ]]></help>
    <expand macro="citations"/>
    <expand macro="creator"/>
</tool>