comparison kmindex_query.xml @ 1:5ff85ac22974 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kmindex commit 6e165c3acaf1cda32e0c51a72b89eca059a93e3a
author iuc
date Wed, 11 Mar 2026 11:36:47 +0000
parents 58820a4096c1
children
comparison
equal deleted inserted replaced
0:58820a4096c1 1:5ff85ac22974
1 <tool id="kmindex_query" name="kmindex query" version="@VERSION@" profile="@PROFILE@"> 1 <tool id="kmindex_query" name="kmindex query" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
2 <description>query k-mer index with sequencing data</description> 2 <description>query k-mer index with sequencing data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="edam_ontology"/> 6 <expand macro="edam_ontology"/>
7 <expand macro="xrefs"/> 7 <expand macro="xrefs"/>
8 <expand macro="requirements"/> 8 <expand macro="requirements"/>
9 <expand macro="version_command"/> 9 <expand macro="version_command"/>
10 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
11 ## Run kmindex query 11 ## Run kmindex query2
12 #import re 12 #import re
13 #set $identifier = str($fastx.element_identifier) 13 #set $identifier = str($fastx.element_identifier)
14 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier) 14 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier)
15 ## Add extension only if filename doesn't already have appropriate extension 15 ## Add extension only if filename doesn't already have appropriate extension
16 #if $fastx.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz')) 16 #if $fastx.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz'))
22 #elif $fastx.is_of_type('fastqsanger', 'fastq') and not ($safe_name.endswith('.fq') or $safe_name.endswith('.fastq')) 22 #elif $fastx.is_of_type('fastqsanger', 'fastq') and not ($safe_name.endswith('.fq') or $safe_name.endswith('.fastq'))
23 #set $safe_name = $safe_name + '.fq' 23 #set $safe_name = $safe_name + '.fq'
24 #end if 24 #end if
25 #if $db_opts.db_opts_selector == "histdb" 25 #if $db_opts.db_opts_selector == "histdb"
26 #set INDEX = $db_opts.histdb.extra_files_path 26 #set INDEX = $db_opts.histdb.extra_files_path
27 #else: 27 #else
28 #set INDEX = $db_opts.kmindex.fields.path 28 #set INDEX = $db_opts.kmindex.fields.path
29 #end if 29 #end if
30 ln -s '$fastx' '$safe_name' && 30 ln -s '$fastx' '$safe_name' &&
31 kmindex query 31 kmindex query2
32 --index '$INDEX' 32 --index '$INDEX'
33 --fastx '$safe_name' 33 --fastx '$safe_name'
34 --zvalue $zvalue 34 --zvalue $zvalue
35 --threshold $threshold 35 --threshold $threshold
36 --output query_output 36 --output query_output
37 --format $format 37 --format $format
38 #if $single_query:
39 --single-query '$single_query'
40 #end if
41
42 $fast 38 $fast
43 --aggregate
44 --threads "\${GALAXY_SLOTS:-1}" 39 --threads "\${GALAXY_SLOTS:-1}"
45 --verbose '$verbose' 40 --verbose '$verbose'
46 &&
47 ## Copy appropriate output based on format and aggregation
48 #if $format == 'matrix':
49 cp query_output/abundance_test.tsv '$output'
50 #else
51 cp query_output/all_results.json '$output' || cp query_output/*.json '$output'
52 #end if
53 ]]></command> 41 ]]></command>
54 <inputs> 42 <inputs>
55 <conditional name="db_opts"> 43 <conditional name="db_opts">
56 <param name="db_opts_selector" type="select" label="Kmindex source"> 44 <param name="db_opts_selector" type="select" label="Kmindex source">
57 <option value="histdb">From your history</option> 45 <option value="histdb">From your history</option>
58 <option value="db" selected="true">Locally installed kmindex indexes</option> 46 <option value="db" selected="true">Locally installed kmindex indexes</option>
59 </param> 47 </param>
60 <when value="histdb"> 48 <when value="histdb">
61 <param name="histdb" type="data" format="kmindex" optional="false" multiple="false" label="Kmindex" /> 49 <param name="histdb" type="data" format="kmindex" label="Kmindex" />
62 </when> 50 </when>
63 <when value="db"> 51 <when value="db">
64 <param name="kmindex" type="select" optional="false" multiple="false" label="kmindex"> 52 <param name="kmindex" type="select" label="kmindex">
65 <options from_data_table="kmindex"/> 53 <options from_data_table="kmindex"/>
66 </param> 54 </param>
67 </when> 55 </when>
68 </conditional> 56 </conditional>
69 <param argument="--fastx" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Query sequences" help="FASTA or FASTQ file to query (supports gzip/bzip2)"/> 57 <param argument="--fastx" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Query sequences" help="FASTA or FASTQ file to query (supports gzip/bzip2)"/>
70 <param argument="--zvalue" type="integer" value="0" min="0" label="Z-value" help="Index s-mers and query (s+z)-mers (0 = standard k-mer query)"/> 58 <param argument="--zvalue" type="integer" value="0" min="0" label="Z-value" help="Index s-mers and query (s+z)-mers (0 = standard k-mer query)"/>
71 <param argument="--threshold" type="float" value="0.0" min="0.0" max="1.0" label="Shared k-mers threshold" help="Minimum proportion of shared k-mers (0.0-1.0)"/> 59 <param argument="--threshold" type="float" value="0.0" min="0.0" max="1.0" label="Shared k-mers threshold" help="Minimum proportion of shared k-mers (0.0-1.0)"/>
72 <param argument="--format" type="select" label="Output format" help="Format of the output file"> 60 <param argument="--format" type="select" label="Output format" help="Format of the output file">
73 <option value="json" selected="true">JSON</option> 61 <option value="json" selected="true">JSON</option>
74 <option value="matrix">Matrix</option> 62 <option value="matrix">Matrix</option>
75 <option value="json_vec">JSON vector</option>
76 </param> 63 </param>
77 <param argument="--single-query" type="text" value="" optional="true" label="Single query identifier" help="Optional: treat all sequences as a single query with this identifier"/>
78 <param argument="--fast" type="boolean" truevalue="--fast" falsevalue="" checked="false" label="Fast mode" help="Keep more pages in cache for faster queries"/> 64 <param argument="--fast" type="boolean" truevalue="--fast" falsevalue="" checked="false" label="Fast mode" help="Keep more pages in cache for faster queries"/>
79 <expand macro="common_params"/> 65 <expand macro="common_params"/>
80 </inputs> 66 </inputs>
81 <outputs> 67 <outputs>
82 <data name="output" format="json" label="${tool.name} on ${on_string}: results"> 68 <collection name="output_matrix" type="list" label="${tool.name} on ${on_string}: results (matrix)">
83 <change_format> 69 <filter>format == 'matrix'</filter>
84 <when input="format" value="matrix" format="tsv"/> 70 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tsv$" directory="query_output" format="tabular"/>
85 </change_format> 71 </collection>
86 </data> 72 <collection name="output" type="list" label="${tool.name} on ${on_string}: results (json)">
73 <filter>format != 'matrix'</filter>
74 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.json$" directory="query_output" format="json"/>
75 </collection>
87 </outputs> 76 </outputs>
88 <tests> 77 <tests>
89 <!-- Test 1: Basic JSON query --> 78 <!-- Test 1: Basic JSON query -->
90 <test expect_num_outputs="1"> 79 <test expect_num_outputs="1">
91 <conditional name="db_opts"> 80 <conditional name="db_opts">
92 <param name="db_opts_selector" value="histdb"/> 81 <param name="db_opts_selector" value="histdb"/>
93 <param name="histdb" ftype="kmindex" class="Directory" value="index1" /> 82 <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
94 </conditional> 83 </conditional>
95 <param name="fastx" value="query1.fasta"/> 84 <param name="fastx" value="query1.fasta"/>
96 <param name="format" value="json"/> 85 <param name="format" value="json"/>
97 <output name="output" ftype="json" value="expected_query_t1.json" /> 86 <output_collection name="output" type="list" count="1">
87 <element name="abundance_test" ftype="json" value="expected_query_t1.json" />
88 </output_collection>
98 </test> 89 </test>
99 <!-- Test 2: Matrix output format --> 90 <!-- Test 2: Matrix output format -->
100 <test expect_num_outputs="1"> 91 <test expect_num_outputs="1">
101 <conditional name="db_opts"> 92 <conditional name="db_opts">
102 <param name="db_opts_selector" value="histdb"/> 93 <param name="db_opts_selector" value="histdb"/>
103 <param name="histdb" ftype="kmindex" class="Directory" value="index1" /> 94 <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
104 </conditional> 95 </conditional>
105 <param name="fastx" value="query1.fasta.gz"/> 96 <param name="fastx" value="query1.fasta.gz"/>
106 <param name="format" value="matrix"/> 97 <param name="format" value="matrix"/>
107 <output name="output" ftype="tsv"> 98 <output_collection name="output_matrix" type="list" count="1">
108 <assert_contents> 99 <element name="abundance_test" ftype="tabular" value="expected_query2_index1.tsv" />
109 <has_line_matching expression="abundance_test\tMySuperIndex"/> 100 </output_collection>
110 <has_line_matching expression="abundance_test:query_seq1\t5"/>
111 <has_line_matching expression="abundance_test:query_seq2\t6"/>
112 <has_line_matching expression="abundance_test:query_seq3\t2"/>
113 </assert_contents>
114 </output>
115 </test> 101 </test>
116 <!-- Test 3: Json vector output format --> 102 <!-- Test 3: Query with threshold and z-value -->
117 <test expect_num_outputs="1">
118 <conditional name="db_opts">
119 <param name="db_opts_selector" value="histdb"/>
120 <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
121 </conditional>
122 <param name="fastx" value="query2.fastq"/>
123 <param name="format" value="json_vec"/>
124 <output name="output" ftype="json" value="expected_query_t3.json" />
125 </test>
126 <!-- Test 4: Query with threshold and z-value -->
127 <test expect_num_outputs="1"> 103 <test expect_num_outputs="1">
128 <conditional name="db_opts"> 104 <conditional name="db_opts">
129 <param name="db_opts_selector" value="histdb"/> 105 <param name="db_opts_selector" value="histdb"/>
130 <param name="histdb" ftype="kmindex" class="Directory" value="index1" /> 106 <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
131 </conditional> 107 </conditional>
132 <param name="fastx" value="query2.fastq.gz"/> 108 <param name="fastx" value="query2.fastq.gz"/>
133 <param name="threshold" value="0.5"/> 109 <param name="threshold" value="0.5"/>
134 <param name="zvalue" value="5"/> 110 <param name="zvalue" value="5"/>
135 <param name="format" value="json"/> 111 <param name="format" value="json"/>
136 <output name="output" ftype="json" value="expected_query_t4.json" /> 112 <output_collection name="output" type="list" count="1">
113 <element name="abundance_test" ftype="json" value="expected_query_t4.json" />
114 </output_collection>
137 </test> 115 </test>
138 <!-- Test 5: query pre-built configured indices --> 116 <!-- Test 4: query pre-built configured index1 -->
139 <test expect_num_outputs="1"> 117 <test expect_num_outputs="1">
140 <conditional name="db_opts"> 118 <conditional name="db_opts">
141 <param name="db_opts_selector" value="db"/> 119 <param name="db_opts_selector" value="db"/>
142 <param name="kmindex" ftype="kmindex" value="index1" /> 120 <param name="kmindex" value="index1" />
143 </conditional> 121 </conditional>
144 <param name="fastx" value="query1.fasta"/> 122 <param name="fastx" value="query1.fasta"/>
145 <param name="format" value="json"/> 123 <param name="format" value="json"/>
146 <output name="output" ftype="json" value="expected_query_t1.json" /> 124 <output_collection name="output" type="list" count="1">
125 <element name="abundance_test" ftype="json" value="expected_query_t1.json" />
126 </output_collection>
147 </test> 127 </test>
148 <!-- Test 6: query pre-built configured indices --> 128 <!-- Test 5: query pre-built configured index2 -->
149 <test expect_num_outputs="1"> 129 <test expect_num_outputs="1">
150 <conditional name="db_opts"> 130 <conditional name="db_opts">
151 <param name="db_opts_selector" value="db"/> 131 <param name="db_opts_selector" value="db"/>
152 <param name="kmindex" ftype="kmindex" value="index2" /> 132 <param name="kmindex" value="index2" />
153 </conditional> 133 </conditional>
154 <param name="fastx" value="query1.fasta"/> 134 <param name="fastx" value="query1.fasta"/>
155 <param name="format" value="json"/> 135 <param name="format" value="json"/>
156 <output name="output" ftype="json" value="expected_query_t6.json" /> 136 <output_collection name="output" type="list" count="1">
137 <element name="test_index" ftype="json" value="expected_query_t6.json" />
138 </output_collection>
139 </test>
140 <!-- Test 6: using register index, JSON output -->
141 <test expect_num_outputs="1">
142 <conditional name="db_opts">
143 <param name="db_opts_selector" value="db"/>
144 <param name="kmindex" ftype="kmindex" value="register" />
145 </conditional>
146 <param name="fastx" value="query1.fasta"/>
147 <param name="format" value="json"/>
148 <output_collection name="output" type="list" count="2">
149 <element name="index1" ftype="json" value="expected_query2_index1.json" />
150 <element name="index2" ftype="json" value="expected_query2_index2.json" />
151 </output_collection>
152 </test>
153 <!-- Test 7: using register index, matrix output -->
154 <test expect_num_outputs="1">
155 <conditional name="db_opts">
156 <param name="db_opts_selector" value="db"/>
157 <param name="kmindex" ftype="kmindex" value="register" />
158 </conditional>
159 <param name="fastx" value="query1.fasta"/>
160 <param name="format" value="matrix"/>
161 <output_collection name="output_matrix" type="list" count="2">
162 <element name="index1" ftype="tabular" value="expected_query2_index1_register.tsv" />
163 <element name="index2" ftype="tabular" value="expected_query2_index2_register.tsv" />
164 </output_collection>
157 </test> 165 </test>
158 </tests> 166 </tests>
159 <help><![CDATA[ 167 <help><![CDATA[
160 **What it does** 168 **What it does**
161 169
162 kmindex query searches a pre-built k-mer index to find the percentage of shared k-mers between query sequences and indexed samples. 170 kmindex query2 searches a pre-built k-mer index to find the percentage of shared k-mers between query sequences and indexed samples.
163 171
164 **Input** 172 **Input**
165 173
166 - A k-mer index (created by kmindex build) 174 - A k-mer index (created by kmindex build) or kmindex register
167 - Query sequences in FASTA or FASTQ format (can be gzipped) 175 - Query sequences in FASTA or FASTQ format (can be gzipped)
168 176
169 **Output** 177 **Output**
170 178
171 The output format depends on your selection: 179 The output format depends on your selection:
172 180
173 - **JSON**: Detailed results in JSON format 181 - **JSON**: Detailed results in JSON format
174 - **Matrix**: Tab-separated matrix of query-sample similarities 182 - **Matrix**: Tab-separated matrix of query-sample similarities
175 - **JSON vector**: JSON vector format for downstream processing
176
177 183
178 **Parameters** 184 **Parameters**
179 185
180 - **Z-value**: Query with (k+z)-mers instead of k-mers to reduce false positives (Findere algorithm) 186 - **Z-value**: Query with (k+z)-mers instead of k-mers to reduce false positives (Findere algorithm)
181 - **Threshold**: Filter results to show only matches above this similarity threshold (0.0-1.0) 187 - **Threshold**: Filter results to show only matches above this similarity threshold (0.0-1.0)