comparison bakta.xml @ 9:8c7499c6f32a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit 5e03f5cfb2216af80ad3ca499504ebdb92ade2c5
author pimarin
date Mon, 23 Jan 2023 16:29:03 +0000
parents c75629db91ff
children ed4bc0577be6
comparison
equal deleted inserted replaced
8:c75629db91ff 9:8c7499c6f32a
1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description> 2 <description>
3 Genome annotation via alignment-free sequence identification 3 genome annotation via alignment-free sequence identification
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macro.xml</import> 6 <import>macro.xml</import>
7 </macros> 7 </macros>
8 <expand macro='edam'/> 8 <expand macro='edam'/>
9 <expand macro='xrefs'/> 9 <expand macro='xrefs'/>
10 <expand macro="requirements"/> 10 <expand macro="requirements"/>
11 <expand macro="version_command"/> 11 <expand macro="version_command"/>
12 12
13 <command detect_errors="aggressive"><![CDATA[ 13 <command detect_errors="aggressive"><![CDATA[
14 mkdir -p ./database_path/amrfinderplus-db && 14 mkdir ./database_path &&
15 ln -s '$(input_option.bakta_db_select.fields.path)'/* database_path && 15 ln -s '$(input_option.bakta_db_select.fields.path)/'* database_path &&
16 ln -s '$(input_option.amrfinder_db_select.fields.path)/' database_path/amrfinderplus-db/latest && 16 ln -s '$(input_option.amrfinder_db_select.fields.path)' database_path &&
17
17 bakta 18 bakta
18 #*====================================== 19 #*======================================
19 CPU option 20 CPU option
20 ======================================*# 21 ======================================*#
21 --threads \${GALAXY_SLOTS:-1} 22 --threads \${GALAXY_SLOTS:-1}
56 --prodigal-tf '$annotation.prodigal' 57 --prodigal-tf '$annotation.prodigal'
57 #end if 58 #end if
58 #if $annotation.translation_table 59 #if $annotation.translation_table
59 --translation-table '$annotation.translation_table' 60 --translation-table '$annotation.translation_table'
60 #end if 61 #end if
61 --gram '?' 62 #if $annotation.gram
63 --gram '$annotation.gram'
64 #end if
62 $annotation.keep_contig_headers 65 $annotation.keep_contig_headers
63 #if $annotation.replicons 66 #if $annotation.replicons
64 --replicons '$annotation.replicons' 67 --replicons '$annotation.replicons'
65 #end if 68 #end if
66 $annotation.compliant 69 $annotation.compliant
121 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> 124 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
122 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> 125 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
123 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> 126 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
124 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> 127 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
125 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> 128 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
129 </param>
130 <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
131 <option value="+">Gram+</option>
132 <option value="-">Gram-</option>
133 <option value="?" selected="true">Unknown</option>
126 </param> 134 </param>
127 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> 135 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
128 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> 136 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
129 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> 137 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
130 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> 138 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
205 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter> 213 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
206 </data> 214 </data>
207 </outputs> 215 </outputs>
208 <tests> 216 <tests>
209 <test expect_num_outputs="13"> <!-- TEST_1 database + input --> 217 <test expect_num_outputs="13"> <!-- TEST_1 database + input -->
210 <section name="input_option" > 218 <section name="input_option" >
211 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 219 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
212 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 220 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
213 <param name="input_file" value="NC_002127.1.fna"/> 221 <param name="input_file" value="NC_002127.1.fna"/>
214 <param name="min_contig_length" value="250"/> 222 <param name="min_contig_length" value="250"/>
215 </section> 223 </section>
216 <section name="output_files"> 224 <section name="output_files">
217 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> 225 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/>
218 </section> 226 </section>
219 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/> 227 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/>
220 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/> 228 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/>
221 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/> 229 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/>
222 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/> 230 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/>
223 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> 231 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
224 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> 232 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
225 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> 233 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
226 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/> 234 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/>
227 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> 235 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
228 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/> 236 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/>
229 <output name="annotation_plot"> 237 <output name="annotation_plot">
230 <assert_contents> 238 <assert_contents>
231 <has_size value="418991" delta="1000"/> 239 <has_size value="418991" delta="1000"/>
232 </assert_contents> 240 </assert_contents>
233 </output> 241 </output>
234 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/> 242
235 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/> 243 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/>
236 </test> 244 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/>
237 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> 245 </test>
238 <section name="input_option" > 246 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
239 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 247 <section name="input_option" >
240 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 248 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
241 <param name="input_file" value="NC_002127.1.fna"/> 249 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
242 <param name="min_contig_length" value="250"/> 250 <param name="input_file" value="NC_002127.1.fna"/>
243 </section> 251 <param name="min_contig_length" value="250"/>
244 <section name="organism"> 252 </section>
245 <param name="genus" value="Escherichia"/> 253 <section name="organism">
246 <param name="species" value="coli O157:H7"/> 254 <param name="genus" value="Escherichia"/>
247 <param name="strain" value="Sakai"/> 255 <param name="species" value="coli O157:H7"/>
248 <param name="plasmid" value="pOSAK1"/> 256 <param name="strain" value="Sakai"/>
249 </section> 257 <param name="plasmid" value="pOSAK1"/>
250 <section name="annotation"> 258 </section>
251 <param name="keep_contig_headers" value="true"/> 259 <section name="annotation">
252 </section> 260 <param name="--gram" value="-"/>
253 <section name="workflow"> 261 <param name="keep_contig_headers" value="true"/>
254 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/> 262 </section>
255 </section> 263 <section name="workflow">
256 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4"> 264 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/>
257 <assert_contents> 265 </section>
258 <has_text_matching expression="IHHALP_00005"/> 266 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4">
259 </assert_contents> 267 <assert_contents>
260 </output> 268 <has_text_matching expression="IHHALP_00005"/>
261 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4"> 269 </assert_contents>
262 <assert_contents> 270 </output>
263 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> 271 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4">
264 </assert_contents> 272 <assert_contents>
265 </output> 273 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
266 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> 274 </assert_contents>
267 <output name="annotation_plot"> 275 </output>
268 <assert_contents> 276 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
269 <has_size value="418991" delta="1000"/>
270 </assert_contents>
271 </output>
272 </test>
273 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps -->
274 <section name="input_option" >
275 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
276 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
277 <param name="input_file" value="NC_002127.1.fna"/>
278 <param name="min_contig_length" value="350"/>
279 </section>
280 <section name="workflow">
281 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
282 </section>
283 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
284 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
285 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
286 <output name="annotation_plot">
287 <assert_contents>
288 <has_size value="418399" delta="1000"/>
289 </assert_contents>
290 </output>
291 </test>
292 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
293 <section name="input_option" >
294 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
295 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
296 <param name="input_file" value="NC_002127.1.fna"/>
297 </section>
298 <section name="annotation">
299 <param name="complete" value="true"/>
300 <param name="prodigal" value="prodigal.tf"/>
301 <param name="translation_table" value="4"/>
302 <param name="replicons" value="replicons.tsv"/>
303 <param name="compliant" value="true"/>
304 <param name="proteins" value="user-proteins.faa"/>
305 </section>
306 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
307 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
308 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
309 <output name="annotation_plot"> 277 <output name="annotation_plot">
310 <assert_contents> 278 <assert_contents>
311 <has_size value="418399" delta="1000"/> 279 <has_size value="418991" delta="1000"/>
312 </assert_contents> 280 </assert_contents>
313 </output> 281 </output>
314 </test> 282 </test>
315 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> 283 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps -->
316 <section name="input_option" > 284 <section name="input_option" >
317 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 285 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
318 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 286 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
319 <param name="input_file" value="NC_002127.1.fna"/> 287 <param name="input_file" value="NC_002127.1.fna"/>
320 </section> 288 <param name="min_contig_length" value="350"/>
321 <section name="annotation"> 289 </section>
322 <param name="complete" value="true"/> 290 <section name="workflow">
323 <param name="translation_table" value="4"/> 291 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
324 </section> 292 </section>
325 <section name="workflow"> 293 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
326 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/> 294 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
327 </section> 295 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
328 <section name="output_files"> 296 <output name="annotation_plot">
329 <param name="output_selection" value="log_txt,sum_txt"/> 297 <assert_contents>
330 </section> 298 <has_size value="418399" delta="1000"/>
331 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/> 299 </assert_contents>
332 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/> 300 </output>
333 </test> 301 </test>
302 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
303 <section name="input_option" >
304 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
305 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
306 <param name="input_file" value="NC_002127.1.fna"/>
307 </section>
308 <section name="annotation">
309 <param name="complete" value="true"/>
310 <param name="prodigal" value="prodigal.tf"/>
311 <param name="translation_table" value="4"/>
312 <param name="replicons" value="replicons.tsv"/>
313 <param name="compliant" value="true"/>
314 <param name="proteins" value="user-proteins.faa"/>
315 </section>
316 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
317 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
318 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
319 <output name="annotation_plot">
320 <assert_contents>
321 <has_size value="418399" delta="1000"/>
322 </assert_contents>
323 </output>
324 </test>
325 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
326 <section name="input_option" >
327 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
328 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
329 <param name="input_file" value="NC_002127.1.fna"/>
330 </section>
331 <section name="annotation">
332 <param name="complete" value="true"/>
333 <param name="translation_table" value="4"/>
334 </section>
335 <section name="workflow">
336 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
337 </section>
338 <section name="output_files">
339 <param name="output_selection" value="log_txt,sum_txt"/>
340 </section>
341 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
342 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
343 </test>
334 </tests> 344 </tests>
335 <help><![CDATA[**What it does** 345 <help><![CDATA[**What it does**
336 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. 346 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
337 347
338 *Comprehensive & taxonomy-independent database* 348 *Comprehensive & taxonomy-independent database*
378 388
379 **Annotation options** 389 **Annotation options**
380 1. You can specify if all sequences (chromosome or plasmids) are complete or not 390 1. You can specify if all sequences (chromosome or plasmids) are complete or not
381 2. You can add your own prodigal training file for CDS predictionœ 391 2. You can add your own prodigal training file for CDS predictionœ
382 3. The translation table could be modified, default is the 11th for bacteria 392 3. The translation table could be modified, default is the 11th for bacteria
383 4. You can specify if bacteria is gram -/+ or unknonw (default value is unknow) 393 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)
384 5. You can keep the name of contig present in the input file 394 5. You can keep the name of contig present in the input file
385 6. You can specify your own replicon table as a TSV/CSV file 395 6. You can specify your own replicon table as a TSV/CSV file
386 7. The compliance option is for ready to submit annotation file to Public database 396 7. The compliance option is for ready to submit annotation file to Public database
387 as ENA, Genbank EMBL 397 as ENA, Genbank EMBL
388 8. You can specify a protein sequence file for annotation in GenBank or fasta formats 398 8. You can specify a protein sequence file for annotation in GenBank or fasta formats