diff fgbio_findswitchbackreads.xml @ 0:fe8744fd4198 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/fgbio commit 88711b338e90adc003a90930a43df3315b2ece70
author iuc
date Tue, 04 Nov 2025 12:26:38 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fgbio_findswitchbackreads.xml	Tue Nov 04 12:26:38 2025 +0000
@@ -0,0 +1,102 @@
+<tool id="fgbio_findswitchbackreads" name="fgbio FindSwitchbackReads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>identifies reads derived from a template switch during library construction</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command><![CDATA[
+        @PREPARE_FASTA_IDX@
+
+        ## Link input BAM file
+        ln -s '$input' input.bam &&
+
+        fgbio FindSwitchbackReads
+        --input input.bam
+        --output output.bam
+        --ref \$reffa
+        --sort-order Coordinate
+        #if str($max_offset) != '':
+            --max-offset $max_offset
+        #end if
+        #if str($max_gap) != '':
+            --max-gap $max_gap
+        #end if
+        #if str($min_length) != '':
+            --min-length $min_length
+        #end if
+        #if str($max_error_rate) != '':
+            --max-error-rate $max_error_rate
+        #end if
+        $dont_unmap
+        $metrics
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="bam,sam" label="Input BAM/SAM file"/>
+        <expand macro="mandatory_reference" argument="--ref" help="Reference genome fasta file"/>
+        <param argument="--max-offset" type="integer" optional="true" min="0" value="35" label="Maximum offset" help="Maximum offset between end the two segments of the read on the reference. Set to 0 to disable read-based checks."/>
+        <param argument="--max-gap" type="integer" optional="true" min="0" value="500" label="Maximum gap" help="Maximum gap between R1 and R2 of tandem reads to call a template a switchback. Set to 0 to disable tandem-based checks."/>
+        <param argument="--min-length" type="integer" optional="true" min="1" value="6" label="Minimum match length" help="Minimum match length of the switched back segment."/>
+        <param argument="--max-error-rate" type="float" optional="true" min="0.0" max="1.0" value="0.1" label="Maximum error rate" help="Maximum mismatch error rate of switchback match to genome."/>
+        <param argument="--dont-unmap" type="boolean" truevalue="--dont-unmap" falsevalue="" checked="false" label="Don't unmap reads" help="IF true, do NOT unmap reads from switchback templates."/>
+        <param argument="--metrics" type="boolean" truevalue="--metrics metrics" falsevalue="" checked="false" label="Output metrics" help="Generate metrics files."/>
+    </inputs>
+    <outputs>
+        <data name="output" format="bam" from_work_dir="output.bam" label="${tool.name} on ${on_string}: output"/>
+        <collection name="metrics_collection" type="list" label="${tool.name} on ${on_string}: metrics">
+            <filter>metrics</filter>
+            <discover_datasets pattern="metrics\.(?P&lt;designation&gt;.+)\.txt" format="txt"/>
+        </collection>
+        <data name="metrics_plots" format="pdf" from_work_dir="metrics.plots.pdf" label="${tool.name} on ${on_string}: plots">
+            <filter>metrics</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="test1_in.bam"/>
+            <conditional name="addref_cond">
+                <param name="addref_select" value="history"/>
+                <param name="ref" value="test1.fa"/>
+            </conditional>
+            <output name="output" file="test1_out.bam"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input" value="test1_in.bam"/>
+            <conditional name="addref_cond">
+                <param name="addref_select" value="history"/>
+                <param name="ref" value="test1.fa"/>
+            </conditional>
+            <param name="max_gap" value="0"/>
+            <output name="output" file="test2_out.bam"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+FindSwitchbackReads finds reads where a template switch occurred during library construction. Some library construction methods, notably ultra-low-input shotgun methods, are prone to template switching events that create molecules (templates, inserts) that instead of being a linear copy of a segment of genomic DNA, instead are chimeras formed by starting on one strand of DNA and then switching to the opposite strand.
+
+**Algorithm**
+
+Templates that contain strand switch events (switch-backs) are found by this tool in two different ways:
+
+1. By looking at reads that contain soft-clipped bases at their 5' end that, when reverse complemented, matches the genome proximal to the 5'-most mapped base of the read (read-based switchbacks).
+
+2. By identifying templates with 'FF' or 'RR' (aka tandem) orientation where it is surmised that the template switch occurred in the un-sequenced region of the template between R1 and R2 (tandem-based switchbacks).
+
+**Input and Output**
+
+The tool takes as input a SAM or BAM file and produces a filtered BAM file where switchback templates are identified and optionally unmapped. All reads from a switch-back template are tagged with an 'sb' tag that describes the nature of the switchback.
+
+**Parameters**
+
+- **Maximum offset**: Controls how far away to search for the reverse-complemented sequence
+- **Maximum gap**: Maximum gap between R1 and R2 of tandem reads to call a template a switchback
+- **Minimum match length**: Minimum number of soft-clipped bases that must exist to trigger the search
+- **Maximum error rate**: Allows for some mismatches between the soft-clipped sequence and the genome
+- **Don't unmap reads**: By default, switchback reads are made unmapped. This option prevents that behavior
+- **Output metrics**: Generate summary metrics, length distributions, and plots
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>