comparison longdust.xml @ 0:54a33dfe0187 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/longdust/ commit 4f91febe7ba34b5ba5ae14368508cc09e5339c4b
author iuc
date Wed, 01 Oct 2025 12:22:23 +0000
parents
children 9607b6eccee4
comparison
equal deleted inserted replaced
-1:000000000000 0:54a33dfe0187
1 <tool id="longdust" name="longdust" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>Detect low-complexity regions in long sequences</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 longdust '$input'
9 -k $k
10 -w $w
11 -t $t
12 -e $e
13 $f
14 $a
15 > '$output'
16 ]]></command>
17 <inputs>
18 <param name="input" type="data" format="fasta,fastq.gz" label="Input FASTA file"/>
19 <param argument="-k" type="integer" label="k-mer length" value="7" help="k-mer length"/>
20 <param argument="-w" type="integer" label="Window size" value="5000" help="Window size"/>
21 <param argument="-t" type="float" label="Score threshold" value="0.6" help="Score threshold"/>
22 <param argument="-e" type="integer" label="Extension X-drop length" value="50" help="Extension X-drop length (0 to disable)"/>
23 <param argument="-f" type="boolean" label="Forward strand only" truevalue="-f" falsevalue="" checked="false" help="Limit analysis to forward strand only" />
24 <param argument="-a" type="boolean" label="Enable Guaranteed O(Lw) Algorithm" truevalue="-a" falsevalue="" checked="false" help="Use the guaranteed O(Lw) algorithm with increased approximation for faster runtime on large genomes. This mode evaluates only the smallest candidate start per position, reducing runtime to a strict O(Lw) but may miss ~5-10% of low-complexity regions compared to the default."/>
25 </inputs>
26 <outputs>
27 <data name="output" format="bed"/>
28 </outputs>
29 <tests>
30 <test expect_num_outputs="1">
31 <param name="input" location="https://zenodo.org/records/17226147/files/GCF_000146045.2_R64_genomic.fna.gz"/>
32 <param name="k" value="6"/>
33 <param name="w" value="1000"/>
34 <param name="t" value="0.55"/>
35 <param name="e" value="0"/>
36 <param name="f" value="false"/>
37 <param name="a" value="false"/>
38 <output name="output" ftype="bed">
39 <assert_contents>
40 <has_n_columns n="3"/>
41 <has_n_lines n="7426"/>
42 </assert_contents>
43 </output>
44 </test>
45 </tests>
46 <help><![CDATA[
47
48 .. class:: infomark
49
50 **What it does**
51
52 *longdust* detects low-complexity (dusty) regions in long DNA sequences. It scans input FASTA sequences using k-mer statistics and reports regions that fall below a complexity threshold.
53 These regions are often repetitive or homopolymeric stretches that may interfere with sequence analysis, alignment, or downstream bioinformatics pipelines.
54
55 The method is tunable via parameters for k-mer size, window size, score threshold, and extension length, allowing you to control how strict or relaxed the detection should be.
56
57
58 **Input**
59 - A FASTA file containing DNA sequences (typically long reads or assembled contigs).
60 - Optional parameters to configure detection:
61
62 - **-k** : k-mer length (default 7)
63 - **-w** : window size (default 5000)
64 - **-t** : score threshold (default 0.6)
65 - **-e** : extension X-drop length, 0 disables extension (default 50)
66 - **-f** : forward strand only (optional flag)
67 - **-a** : approximate O(Lw) algorithm (optional flag)
68
69 * Recommend w < 4^k for performance, especially given large w
70 * Use "-k6 -w1000 -t.55" for more relaxed but shorter regions
71
72 **Output**
73
74 - A BED file listing detected low-complexity regions
75
76 ]]></help>
77 <expand macro="citations"/>
78 <expand macro="creator"/>
79 </tool>