Mercurial > repos > iuc > longdust

diff longdust.xml @ 0:54a33dfe0187 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/longdust/ commit 4f91febe7ba34b5ba5ae14368508cc09e5339c4b
author: iuc
date: Wed, 01 Oct 2025 12:22:23 +0000
children: 9607b6eccee4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/longdust.xml	Wed Oct 01 12:22:23 2025 +0000
@@ -0,0 +1,79 @@
+<tool id="longdust" name="longdust" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description>Detect low-complexity regions in long sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        longdust '$input'
+        -k $k
+        -w $w
+        -t $t
+        -e $e
+        $f
+        $a
+        > '$output'
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="fasta,fastq.gz" label="Input FASTA file"/>
+        <param argument="-k" type="integer" label="k-mer length" value="7" help="k-mer length"/>
+        <param argument="-w" type="integer" label="Window size" value="5000" help="Window size"/>
+        <param argument="-t" type="float" label="Score threshold" value="0.6" help="Score threshold"/>
+        <param argument="-e" type="integer" label="Extension X-drop length" value="50" help="Extension X-drop length (0 to disable)"/>
+        <param argument="-f" type="boolean" label="Forward strand only" truevalue="-f" falsevalue="" checked="false" help="Limit analysis to forward strand only" />
+        <param argument="-a" type="boolean" label="Enable Guaranteed O(Lw) Algorithm" truevalue="-a" falsevalue="" checked="false" help="Use the guaranteed O(Lw) algorithm with increased approximation for faster runtime on large genomes. This mode evaluates only the smallest candidate start per position, reducing runtime to a strict O(Lw) but may miss ~5-10% of low-complexity regions compared to the default."/>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" location="https://zenodo.org/records/17226147/files/GCF_000146045.2_R64_genomic.fna.gz"/>
+            <param name="k" value="6"/>
+            <param name="w" value="1000"/>
+            <param name="t" value="0.55"/>
+            <param name="e" value="0"/>
+            <param name="f" value="false"/>
+            <param name="a" value="false"/>
+            <output name="output" ftype="bed">
+                <assert_contents>
+                    <has_n_columns n="3"/>
+                    <has_n_lines n="7426"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+*longdust* detects low-complexity (dusty) regions in long DNA sequences. It scans input FASTA sequences using k-mer statistics and reports regions that fall below a complexity threshold.  
+These regions are often repetitive or homopolymeric stretches that may interfere with sequence analysis, alignment, or downstream bioinformatics pipelines.  
+
+The method is tunable via parameters for k-mer size, window size, score threshold, and extension length, allowing you to control how strict or relaxed the detection should be.
+
+
+**Input**
+- A FASTA file containing DNA sequences (typically long reads or assembled contigs).
+- Optional parameters to configure detection:
+  
+  - **-k** : k-mer length (default 7)
+  - **-w** : window size (default 5000)
+  - **-t** : score threshold (default 0.6)
+  - **-e** : extension X-drop length, 0 disables extension (default 50)
+  - **-f** : forward strand only (optional flag)
+  - **-a** : approximate O(Lw) algorithm (optional flag)
+
+  * Recommend w < 4^k for performance, especially given large w
+  * Use "-k6 -w1000 -t.55" for more relaxed but shorter regions
+
+**Output**
+
+- A BED file listing detected low-complexity regions
+
+    ]]></help>
+    <expand macro="citations"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file
author	iuc
date	Wed, 01 Oct 2025 12:22:23 +0000
parents
children	9607b6eccee4