Mercurial > repos > iuc > fgbio_findswitchbackreads
diff fgbio_findswitchbackreads.xml @ 0:fe8744fd4198 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/fgbio commit 88711b338e90adc003a90930a43df3315b2ece70
| author | iuc |
|---|---|
| date | Tue, 04 Nov 2025 12:26:38 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fgbio_findswitchbackreads.xml Tue Nov 04 12:26:38 2025 +0000 @@ -0,0 +1,102 @@ +<tool id="fgbio_findswitchbackreads" name="fgbio FindSwitchbackReads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>identifies reads derived from a template switch during library construction</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + <command><![CDATA[ + @PREPARE_FASTA_IDX@ + + ## Link input BAM file + ln -s '$input' input.bam && + + fgbio FindSwitchbackReads + --input input.bam + --output output.bam + --ref \$reffa + --sort-order Coordinate + #if str($max_offset) != '': + --max-offset $max_offset + #end if + #if str($max_gap) != '': + --max-gap $max_gap + #end if + #if str($min_length) != '': + --min-length $min_length + #end if + #if str($max_error_rate) != '': + --max-error-rate $max_error_rate + #end if + $dont_unmap + $metrics + ]]></command> + <inputs> + <param argument="--input" type="data" format="bam,sam" label="Input BAM/SAM file"/> + <expand macro="mandatory_reference" argument="--ref" help="Reference genome fasta file"/> + <param argument="--max-offset" type="integer" optional="true" min="0" value="35" label="Maximum offset" help="Maximum offset between end the two segments of the read on the reference. Set to 0 to disable read-based checks."/> + <param argument="--max-gap" type="integer" optional="true" min="0" value="500" label="Maximum gap" help="Maximum gap between R1 and R2 of tandem reads to call a template a switchback. Set to 0 to disable tandem-based checks."/> + <param argument="--min-length" type="integer" optional="true" min="1" value="6" label="Minimum match length" help="Minimum match length of the switched back segment."/> + <param argument="--max-error-rate" type="float" optional="true" min="0.0" max="1.0" value="0.1" label="Maximum error rate" help="Maximum mismatch error rate of switchback match to genome."/> + <param argument="--dont-unmap" type="boolean" truevalue="--dont-unmap" falsevalue="" checked="false" label="Don't unmap reads" help="IF true, do NOT unmap reads from switchback templates."/> + <param argument="--metrics" type="boolean" truevalue="--metrics metrics" falsevalue="" checked="false" label="Output metrics" help="Generate metrics files."/> + </inputs> + <outputs> + <data name="output" format="bam" from_work_dir="output.bam" label="${tool.name} on ${on_string}: output"/> + <collection name="metrics_collection" type="list" label="${tool.name} on ${on_string}: metrics"> + <filter>metrics</filter> + <discover_datasets pattern="metrics\.(?P<designation>.+)\.txt" format="txt"/> + </collection> + <data name="metrics_plots" format="pdf" from_work_dir="metrics.plots.pdf" label="${tool.name} on ${on_string}: plots"> + <filter>metrics</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input" value="test1_in.bam"/> + <conditional name="addref_cond"> + <param name="addref_select" value="history"/> + <param name="ref" value="test1.fa"/> + </conditional> + <output name="output" file="test1_out.bam"/> + </test> + <test expect_num_outputs="1"> + <param name="input" value="test1_in.bam"/> + <conditional name="addref_cond"> + <param name="addref_select" value="history"/> + <param name="ref" value="test1.fa"/> + </conditional> + <param name="max_gap" value="0"/> + <output name="output" file="test2_out.bam"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +FindSwitchbackReads finds reads where a template switch occurred during library construction. Some library construction methods, notably ultra-low-input shotgun methods, are prone to template switching events that create molecules (templates, inserts) that instead of being a linear copy of a segment of genomic DNA, instead are chimeras formed by starting on one strand of DNA and then switching to the opposite strand. + +**Algorithm** + +Templates that contain strand switch events (switch-backs) are found by this tool in two different ways: + +1. By looking at reads that contain soft-clipped bases at their 5' end that, when reverse complemented, matches the genome proximal to the 5'-most mapped base of the read (read-based switchbacks). + +2. By identifying templates with 'FF' or 'RR' (aka tandem) orientation where it is surmised that the template switch occurred in the un-sequenced region of the template between R1 and R2 (tandem-based switchbacks). + +**Input and Output** + +The tool takes as input a SAM or BAM file and produces a filtered BAM file where switchback templates are identified and optionally unmapped. All reads from a switch-back template are tagged with an 'sb' tag that describes the nature of the switchback. + +**Parameters** + +- **Maximum offset**: Controls how far away to search for the reverse-complemented sequence +- **Maximum gap**: Maximum gap between R1 and R2 of tandem reads to call a template a switchback +- **Minimum match length**: Minimum number of soft-clipped bases that must exist to trigger the search +- **Maximum error rate**: Allows for some mismatches between the soft-clipped sequence and the genome +- **Don't unmap reads**: By default, switchback reads are made unmapped. This option prevents that behavior +- **Output metrics**: Generate summary metrics, length distributions, and plots + + ]]></help> + <expand macro="citations"/> +</tool>
