comparison mash_sketch.xml @ 0:1aab9cf41bd0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit f4e45447b1a8f2d99f3feda11a7c7d2bc7f84a57
author iuc
date Wed, 24 Sep 2025 13:46:33 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1aab9cf41bd0
1 <tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@">
2 <description>Create a reduced sequence representation based on min-hashes</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xref"/>
7 <expand macro="requirements" />
8 <expand macro="version_command" />
9 <command detect_errors="exit_code"><![CDATA[
10
11 #if str ( $reads_assembly.reads_assembly_selector ) == "reads":
12 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired":
13 ln -s '$reads_assembly.reads_input.reads_1' "${reads_assembly.reads_input.reads_1.element_identifier}" &&
14 ln -s '$reads_assembly.reads_input.reads_2' "${reads_assembly.reads_input.reads_1.element_identifier}" &&
15 #end if
16 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection":
17 ln -s '$reads_assembly.reads_input.reads.forward' "${reads_assembly.reads_input.reads.forward.element_identifier}" &&
18 ln -s '$reads_assembly.reads_input.reads.reverse' "${reads_assembly.reads_input.reads.reverse.element_identifier}" &&
19 #end if
20 #if str( $reads_assembly.reads_input.reads_input_selector ) == "single":
21 ln -s '$reads_assembly.reads_input.reads' "${reads_assembly.reads_input.reads.element_identifier}" &&
22 #end if
23 #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly":
24 ln -s '${reads_assembly.assembly}' "${reads_assembly.assembly.element_identifier}" &&
25 #end if
26
27 mash sketch
28 -s '${sketch_size}'
29 -k '${kmer_size}'
30 -w '${prob_threshold}'
31 #if str( $reads_assembly.reads_assembly_selector ) == "reads":
32 -m '${reads_assembly.minimum_kmer_copies}'
33 -r
34 #if $reads_assembly.target_coverage :
35 -c '${reads_assembly.target_coverage}'
36 #end if
37 #if $reads_assembly.genome_size :
38 -g '${reads_assembly.genome_size}'
39 #end if
40 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" :
41 "${reads_assembly.reads_input.reads_1.element_identifier}" "${reads_assembly.reads_input.reads_1.element_identifier}}"
42 #end if
43 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection":
44 "${reads_assembly.reads_input.reads.forward.element_identifier}" "${reads_assembly.reads_input.reads.reverse.element_identifier}"
45 #end if
46 #if str( $reads_assembly.reads_input.reads_input_selector ) == "single":
47 "${reads_assembly.reads_input.reads.element_identifier}"
48 #end if
49 #elif str( $reads_assembly.reads_assembly_selector ) == "assembly":
50 -p \${GALAXY_SLOTS:-1}
51 ${reads_assembly.individual_sequences}
52 "${reads_assembly.assembly.element_identifier}"
53 #end if
54 -o 'sketch'
55 ]]></command>
56 <inputs>
57 <conditional name="reads_assembly">
58 <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
59 <option selected="True" value="reads">Reads</option>
60 <option value="assembly">Assembly</option>
61 </param>
62 <when value="reads">
63 <conditional name="reads_input">
64 <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
65 <option value="paired">Paired</option>
66 <option value="single">Single</option>
67 <option value="paired_collection">Paired Collection</option>
68 </param>
69 <when value="paired">
70 <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
71 <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
72 </when>
73 <when value="single">
74 <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
75 </when>
76 <when value="paired_collection">
77 <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
78 </when>
79 </conditional>
80 <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
81 <param type="integer" name="target_coverage" argument="-c" value="" min="0" max="500" optional="true" label="Target coverage" help="If specified, sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity)"/>
82 <param type="integer" name="genome_size" argument="-g" value="" min="1000" max="100000000000" optional="true" label="Genome size" help="If specified, will be used for p-value calculation instead of an estimated size from k-mer content"/>
83 </when>
84 <when value="assembly">
85 <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
86 <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences" help="Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons"/>
87 </when>
88 </conditional>
89 <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes"/>
90 <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
91 <param type="float" name="prob_threshold" argument="-w" value="0.01" min="0" max="1" label="Probability threshold for warning about low k-mer size" />
92 </inputs>
93 <outputs>
94 <data name="sketch" format="msh" from_work_dir="sketch.msh"/>
95 </outputs>
96 <tests>
97 <test expect_num_outputs="1">
98 <conditional name="reads_assembly">
99 <param name="reads_assembly_selector" value="reads"/>
100 <conditional name="reads_input">
101 <param name="reads_input_selector" value="single"/>
102 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
103 </conditional>
104 </conditional>
105 <output name="sketch" file="test_01_mash_sketch.msh" compare="sim_size" />
106 </test>
107 <test expect_num_outputs="1">
108 <conditional name="reads_assembly">
109 <param name="reads_assembly_selector" value="reads"/>
110 <conditional name="reads_input">
111 <param name="reads_input_selector" value="single"/>
112 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
113 </conditional>
114 <param name="minimum_kmer_copies" value="10"/>
115 </conditional>
116 <output name="sketch" file="test_02_mash_sketch.msh" compare="sim_size" />
117 </test>
118 <test expect_num_outputs="1">
119 <conditional name="reads_assembly">
120 <param name="reads_assembly_selector" value="reads"/>
121 <conditional name="reads_input">
122 <param name="reads_input_selector" value="single"/>
123 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
124 </conditional>
125 <param name="target_coverage" value="1"/>
126 </conditional>
127 <output name="sketch" file="test_03_mash_sketch.msh" compare="sim_size" />
128 </test>
129 <test expect_num_outputs="1">
130 <conditional name="reads_assembly">
131 <param name="reads_assembly_selector" value="reads"/>
132 <conditional name="reads_input">
133 <param name="reads_input_selector" value="single"/>
134 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
135 </conditional>
136 <param name="genome_size" value="1000"/>
137 </conditional>
138 <output name="sketch" file="test_04_mash_sketch.msh" compare="sim_size" />
139 </test>
140 <test expect_num_outputs="1">
141 <conditional name="reads_assembly">
142 <param name="reads_assembly_selector" value="reads"/>
143 <conditional name="reads_input">
144 <param name="reads_input_selector" value="single"/>
145 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
146 </conditional>
147 </conditional>
148 <param name="sketch_size" value="500"/>
149 <output name="sketch" file="test_05_mash_sketch.msh" compare="sim_size" />
150 </test>
151 <test expect_num_outputs="1">
152 <conditional name="reads_assembly">
153 <param name="reads_assembly_selector" value="reads"/>
154 <conditional name="reads_input">
155 <param name="reads_input_selector" value="single"/>
156 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
157 </conditional>
158 </conditional>
159 <param name="kmer_size" value="17"/>
160 <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
161 </test>
162 <test expect_num_outputs="1">
163 <conditional name="reads_assembly">
164 <param name="reads_assembly_selector" value="reads"/>
165 <conditional name="reads_input">
166 <param name="reads_input_selector" value="single"/>
167 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
168 </conditional>
169 </conditional>
170 <param name="prob_threshold" value="0.1"/>
171 <output name="sketch" file="test_06_mash_sketch.msh" compare="sim_size" />
172 </test>
173 <test expect_num_outputs="1">
174 <conditional name="reads_assembly">
175 <param name="reads_assembly_selector" value="assembly"/>
176 <param name="assembly" value="test_assembly.fasta"/>
177 </conditional>
178 <output name="sketch" file="test_07_mash_sketch.msh" compare="sim_size" />
179 </test>
180 </tests>
181 <help><![CDATA[
182
183 **What it does**
184
185 Create a sketch file, which is a reduced representation of a sequence or set
186 of sequences (based on min-hashes) that can be used for fast distance
187 estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can
188 be given to read from standard input. Input files can also be files of file
189 names (see -l). For output, one sketch file will be generated, but it can have
190 multiple sketches within it, divided by sequences or files (see -i). By
191 default, the output file name will be the first input file with a '.msh'
192 extension, or 'stdin.msh' if standard input is used (see -o).
193 ]]></help>
194 <expand macro="citations"/>
195 </tool>