comparison bmtagger.xml @ 0:55b963dc5f76 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bmtagger commit e3d492d96b0ffe79370ca090b3f749b0869e8b60
author iuc
date Wed, 12 Nov 2025 12:03:11 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:55b963dc5f76
1 <tool id="bmtagger" name="bmtagger" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>remove contaminant reads</description>
3 <macros>
4 <token name="@TOOL_VERSION@">3.101</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">25.0</token>
7
8 <xml name="assert">
9 <assert_contents>
10 <has_n_lines n="2668"/>
11 </assert_contents>
12 </xml>
13 <xml name="element_assert" tokens="name,ftype" token_decompress="false">
14 <element name="@NAME@" ftype="@FTYPE@" decompress="@DECOMPRESS@">
15 <expand macro="assert"/>
16 </element>
17 </xml>
18 </macros>
19 <xrefs>
20 <xref type="bio.tools">bmtagger</xref>
21 </xrefs>
22 <requirements>
23 <requirement type="package" version="@TOOL_VERSION@">bmtagger</requirement>
24 </requirements>
25 <version_command><![CDATA[bmtagger.sh -V 2> /dev/null | grep version | cut -d" " -f2]]></version_command>
26 <command detect_errors="exit_code"><![CDATA[
27 set -eo pipefail;
28 #set gz = False
29 #set fasta = False
30 #if $sequences.type == "single"
31 #if $sequences.reads.ext.startswith("fasta")
32 #set fasta = True
33 #end if
34 #if $sequences.reads.ext.endswith(".gz")
35 gunzip -c '$sequences.reads' > forward &&
36 #set gz = True
37 #else
38 ln -s '$sequences.reads' forward &&
39 #end if
40
41 #else
42 #if $sequences.reads.forward.ext.startswith("fasta")
43 #set fasta = True
44 #end if
45 #if $sequences.reads.forward.ext.endswith(".gz")
46 gunzip -c '$sequences.reads.forward' > forward &&
47 gunzip -c '$sequences.reads.reverse' > reverse &&
48 #set gz = True
49 #else
50 ln -s '$sequences.reads.forward' forward &&
51 ln -s '$sequences.reads.reverse' reverse &&
52 #end if
53 #end if
54
55 #if $host.source == "cached"
56 #set reference = $host.reference.fields.path
57 ## srprism test data is to large (>100MB) to store ar IUC
58 ## hence we generate it on the fly for tool tests using the
59 ## fasta file which we keep in the path referred by the
60 ## data table (not needed otherwise)
61 #if $test == "true"
62 srprism mkindex -i '${host.reference.fields.path}.fa' -o reference.srprism &&
63 #end if
64 #else
65 #if $host.sequence.ext == "fasta.gz"
66 gunzip -c '$host.sequence' > reference.fa &&
67 #else
68 ln -s '$host.sequence' reference.fa &&
69 #end if
70 ## bmtool creates multi GB file if used with default parameters
71 ## -> use much smaller word size for testing
72 bmtool -d reference.fa -o reference.bitmask -w #if $test != "" then 10 else 18 # &&
73 srprism mkindex -i reference.fa -o reference.srprism &&
74 makeblastdb -in reference.fa -dbtype nucl &&
75 #set reference = "reference"
76 #end if
77
78 bmtagger.sh
79 -q #if $fasta then 0 else 1#
80 -1 forward
81 #if $sequences.type == "paired"
82 -2 reverse
83 #end if
84 -b '${reference}.bitmask'
85 #if $test == "" or $host.source != "cached"
86 -x '${reference}.srprism'
87 #else
88 -x reference.srprism
89 #end if
90 -d '${reference}'
91 -o host_ids
92 &&
93
94 extract_fullseq host_ids -keep -fastq
95 #if $sequences.type == "single"
96 -single
97 #else
98 -mate1
99 #end if
100 'forward'
101 #if $gz
102 | gzip -c
103 #end if
104 #if $sequences.type == "single"
105 > '$out_single'
106 #else
107 > '$out_pair.forward'
108 &&
109 extract_fullseq host_ids -keep -fastq -mate2 'reverse'
110 #if $gz
111 | gzip -c
112 #end if
113 > '$out_pair.reverse'
114 #end if
115 ]]></command>
116 <inputs>
117 <conditional name="sequences">
118 <param name="type" type="select" label="Sequence type">
119 <option value="single">Single end data</option>
120 <option value="paired">Paired end data</option>
121 </param>
122 <when value="single">
123 <param name="reads" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Single end reads"/>
124 </when>
125 <when value="paired">
126 <param name="reads" type="data_collection" collection_type="paired" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Paired end reads" />
127 </when>
128 </conditional>
129 <conditional name="host">
130 <param name="source" type="select" label="Host data source">
131 <option value="cached">Precomputed indices</option>
132 <option value="history">Sequence from History</option>
133 </param>
134 <when value="cached">
135 <param name="reference" type="select" label="Reference">
136 <options from_data_table="bmtagger">
137 <filter type="sort_by" column="2"/>
138 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
139 </options>
140 </param>
141 </when>
142 <when value="history">
143 <param name="sequence" type="data" format="fasta,fasta.gz" label="Host sequence" help="nucleotide sequence" />
144 </when>
145 </conditional>
146 <param name="test" type="hidden"/>
147 </inputs>
148 <outputs>
149 <data name="out_single" format_source="reads" label="${tool.name} on ${on_string}">
150 <filter>sequences["type"] == "single"</filter>
151 </data>
152 <collection name="out_pair" type="paired" label="${tool.name} on ${on_string}: pairs">
153 <data name="forward" format_source="reads" />
154 <data name="reverse" format_source="reads" />
155 <filter>sequences["type"] == "paired"</filter>
156 </collection>
157 </outputs>
158 <tests>
159 <!-- single input, cached reference -->
160 <test expect_num_outputs="1">
161 <conditional name="sequences">
162 <param name="type" value="single"/>
163 <param name="reads" value="host_and_contaminant.fq1.fq" ftype="fastqsanger"/>
164 </conditional>
165 <param name="test" value="true"/>
166 <output name="out_single" ftype="fastqsanger">
167 <expand macro="assert"/>
168 </output>
169 </test>
170 <!-- paired input, cached reference -->
171 <test expect_num_outputs="3">
172 <conditional name="sequences">
173 <param name="type" value="paired"/>
174 <param name="reads">
175 <collection type="paired_or_unpaired" name="reads">
176 <element name="forward" value="host_and_contaminant.fq1.fq" ftype="fastqsanger"/>
177 <element name="reverse" value="host_and_contaminant.fq2.fq" ftype="fastqsanger"/>
178 </collection>
179 </param>
180 </conditional>
181 <param name="test" value="true"/>
182 <output_collection name="out_pair" count="2">
183 <expand macro="element_assert" name="forward" ftype="fastqsanger"/>
184 <expand macro="element_assert" name="reverse" ftype="fastqsanger"/>
185 </output_collection>
186 </test>
187 <!-- gz input, cached reference -->
188 <test expect_num_outputs="3">
189 <conditional name="sequences">
190 <param name="type" value="paired"/>
191 <param name="reads">
192 <collection type="paired_or_unpaired" name="reads">
193 <element name="forward" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/>
194 <element name="reverse" value="host_and_contaminant.fq2.fq.gz" ftype="fastqsanger.gz"/>
195 </collection>
196 </param>
197 </conditional>
198 <param name="test" value="true"/>
199 <output_collection name="out_pair" count="2">
200 <expand macro="element_assert" name="forward" ftype="fastqsanger.gz" decompress="true"/>
201 <expand macro="element_assert" name="reverse" ftype="fastqsanger.gz" decompress="true"/>
202 </output_collection>
203 </test>
204
205 <!-- single gz input, fasta reference -->
206 <test expect_num_outputs="1">
207 <conditional name="sequences">
208 <param name="type" value="single"/>
209 <param name="reads" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/>
210 </conditional>
211 <conditional name="host">
212 <param name="source" value="history"/>
213 <param name="sequence" value="host.fa" ftype="fasta"/>
214 </conditional>
215 <param name="test" value="true"/>
216 <output name="out_single" ftype="fastqsanger.gz" decompress="true">
217 <expand macro="assert"/>
218 </output>
219 </test>
220
221 <!-- gz input, gzipped fasta reference -->
222 <test expect_num_outputs="3">
223 <conditional name="sequences">
224 <param name="type" value="paired"/>
225 <param name="reads">
226 <collection type="paired_or_unpaired" name="reads">
227 <element name="forward" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/>
228 <element name="reverse" value="host_and_contaminant.fq2.fq.gz" ftype="fastqsanger.gz"/>
229 </collection>
230 </param>
231 </conditional>
232 <conditional name="host">
233 <param name="source" value="history"/>
234 <param name="sequence" value="host.fa.gz" ftype="fasta.gz"/>
235 </conditional>
236 <param name="test" value="true"/>
237 <output_collection name="out_pair" count="2">
238 <expand macro="element_assert" name="forward" ftype="fastqsanger.gz" decompress="true"/>
239 <expand macro="element_assert" name="reverse" ftype="fastqsanger.gz" decompress="true"/>
240 </output_collection>
241 </test>
242 </tests>
243 <help><![CDATA[
244
245 .. class:: infomark
246
247 **What it does**
248
249 Filter contaminant sequences from input FASTA or FASTQ sequences.
250
251 This is done by iteratively applying
252
253 - bmfilter
254 - srprism
255 - blastn (megablast)
256
257 Usage
258 .....
259
260 **Input**
261
262 FASTA/FASTQ sequences and a reference database.
263
264 **Output**
265
266 FASTA/FASTQ sequences
267
268 ]]></help>
269 <citations>
270 <citation type="bibtex">@article{rotmistrovsky2011bmtagger,
271 title={BMTagger: Best Match Tagger for removing human reads from metagenomics datasets},
272 author={Rotmistrovsky, Kirill and Agarwala, Richa},
273 journal={NCBI/NLM, National Institutes of Health},
274 year={2011}
275 }</citation>
276 </citations>
277 </tool>