view fgbio_findswitchbackreads.xml @ 1:62f157ab67d1 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/fgbio commit fdd4ffa3adad8412f98305d809927691bafd2ed7
author iuc
date Tue, 04 Nov 2025 18:39:24 +0000
parents fe8744fd4198
children
line wrap: on
line source

<tool id="fgbio_findswitchbackreads" name="fgbio FindSwitchbackReads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>identifies reads derived from a template switch during library construction</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
        @PREPARE_FASTA_IDX@

        ## Link input BAM file
        ln -s '$input' input.bam &&

        fgbio FindSwitchbackReads
        --input input.bam
        --output output.bam
        --ref \$reffa
        --sort-order Coordinate
        #if str($max_offset) != '':
            --max-offset $max_offset
        #end if
        #if str($max_gap) != '':
            --max-gap $max_gap
        #end if
        #if str($min_length) != '':
            --min-length $min_length
        #end if
        #if str($max_error_rate) != '':
            --max-error-rate $max_error_rate
        #end if
        $dont_unmap
        $metrics
    ]]></command>
    <inputs>
        <param argument="--input" type="data" format="bam,sam" label="Input BAM/SAM file"/>
        <expand macro="mandatory_reference" argument="--ref" help="Reference genome fasta file"/>
        <param argument="--max-offset" type="integer" optional="true" min="0" value="35" label="Maximum offset" help="Maximum offset between end the two segments of the read on the reference. Set to 0 to disable read-based checks."/>
        <param argument="--max-gap" type="integer" optional="true" min="0" value="500" label="Maximum gap" help="Maximum gap between R1 and R2 of tandem reads to call a template a switchback. Set to 0 to disable tandem-based checks."/>
        <param argument="--min-length" type="integer" optional="true" min="1" value="6" label="Minimum match length" help="Minimum match length of the switched back segment."/>
        <param argument="--max-error-rate" type="float" optional="true" min="0.0" max="1.0" value="0.1" label="Maximum error rate" help="Maximum mismatch error rate of switchback match to genome."/>
        <param argument="--dont-unmap" type="boolean" truevalue="--dont-unmap" falsevalue="" checked="false" label="Don't unmap reads" help="IF true, do NOT unmap reads from switchback templates."/>
        <param argument="--metrics" type="boolean" truevalue="--metrics metrics" falsevalue="" checked="false" label="Output metrics" help="Generate metrics files."/>
    </inputs>
    <outputs>
        <data name="output" format="bam" from_work_dir="output.bam" label="${tool.name} on ${on_string}: output"/>
        <collection name="metrics_collection" type="list" label="${tool.name} on ${on_string}: metrics">
            <filter>metrics</filter>
            <discover_datasets pattern="metrics\.(?P&lt;designation&gt;.+)\.txt" format="txt"/>
        </collection>
        <data name="metrics_plots" format="pdf" from_work_dir="metrics.plots.pdf" label="${tool.name} on ${on_string}: plots">
            <filter>metrics</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="test1_in.bam"/>
            <conditional name="addref_cond">
                <param name="addref_select" value="history"/>
                <param name="ref" value="test1.fa"/>
            </conditional>
            <output name="output" file="test1_out.bam"/>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="test1_in.bam"/>
            <conditional name="addref_cond">
                <param name="addref_select" value="history"/>
                <param name="ref" value="test1.fa"/>
            </conditional>
            <param name="max_gap" value="0"/>
            <output name="output" file="test2_out.bam"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

FindSwitchbackReads finds reads where a template switch occurred during library construction. Some library construction methods, notably ultra-low-input shotgun methods, are prone to template switching events that create molecules (templates, inserts) that instead of being a linear copy of a segment of genomic DNA, instead are chimeras formed by starting on one strand of DNA and then switching to the opposite strand.

**Algorithm**

Templates that contain strand switch events (switch-backs) are found by this tool in two different ways:

1. By looking at reads that contain soft-clipped bases at their 5' end that, when reverse complemented, matches the genome proximal to the 5'-most mapped base of the read (read-based switchbacks).

2. By identifying templates with 'FF' or 'RR' (aka tandem) orientation where it is surmised that the template switch occurred in the un-sequenced region of the template between R1 and R2 (tandem-based switchbacks).

**Input and Output**

The tool takes as input a SAM or BAM file and produces a filtered BAM file where switchback templates are identified and optionally unmapped. All reads from a switch-back template are tagged with an 'sb' tag that describes the nature of the switchback.

**Parameters**

- **Maximum offset**: Controls how far away to search for the reverse-complemented sequence
- **Maximum gap**: Maximum gap between R1 and R2 of tandem reads to call a template a switchback
- **Minimum match length**: Minimum number of soft-clipped bases that must exist to trigger the search
- **Maximum error rate**: Allows for some mismatches between the soft-clipped sequence and the genome
- **Don't unmap reads**: By default, switchback reads are made unmapped. This option prevents that behavior
- **Output metrics**: Generate summary metrics, length distributions, and plots

    ]]></help>
    <expand macro="citations"/>
</tool>