Mercurial > repos > pimarin > bakta
view bakta.xml @ 2:ca9e2125c5de draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit fe1cdf884df206d842be4f0768acb06b0bbcf56f"
author | pimarin |
---|---|
date | Wed, 17 Aug 2022 10:29:37 +0000 |
parents | 4d315de96666 |
children | eea334d9988b |
line wrap: on
line source
<?xml version="1.0" encoding="UTF-8"?> <tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification </description> <macros> <import>macro.xml</import> </macros> <expand macro='edam'/> <expand macro='xrefs'/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="aggressive"><![CDATA[ bakta #*====================================== CPU option ======================================*# --threads \${GALAXY_SLOTS:-1} #*====================================== Bakta database ======================================*# --db $input_option.db_select.fields.path #if $input_option.min_contig_length --min-contig-length $input_option.min_contig_length #else if $annotation.compliant --min-contig-length 200 #else --min-contig-length 1 #end if --prefix bakta_output #*====================================== Organism options genus/species/strain/plasmid ======================================*# #if $organism.genus --genus '$organism.genus' #end if #if $organism.species --species '$organism.species' #end if #if $organism.strain --strain '$organism.strain' #end if #if $organism.plasmid --plasmid '$organism.plasmid' #end if #*====================================== Annotation options gram type, prodigal/protein file ======================================*# $annotation.complete #if $annotation.prodigal --prodigal-tf '$annotation.prodigal' #end if #if $annotation.translation_table --translation-table '$annotation.translation_table' #end if #if $annotation.gram --gram '$annotation.gram' #end if $annotation.keep_contig_headers #if $annotation.replicons --replicons '$annotation.replicons' #end if $annotation.compliant #if $annotation.proteins --proteins '$annotation.proteins' #end if #*====================================== Workflow OPTIONS skip some step of the bakta analysis ======================================*# $workflow.skip_trna $workflow.skip_tmrna $workflow.skip_rrna $workflow.skip_ncrna $workflow.skip_ncrna_region $workflow.skip_crispr $workflow.skip_cds $workflow.skip_sorf $workflow.skip_gap $workflow.skip_ori #*====================================== Genome file ======================================*# '$input_option.input_file' #*====================================== LOG file ======================================*# &> '$logfile' ]]></command> <inputs> <!-- DB and file INPUT --> <section name="input_option" title="Input/Output options" expanded="true"> <param name="db_select" type="select" label="The bakta database"> <options from_data_table="bakta_database"> <validator message="No bakta database is available" type="no_options"/> </options> </param> <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> </section> <!-- Organism INFORMATION OPTIONS --> <section name="organism" title="Optional organism options" expanded="false"> <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> <validator type="regex">^[A-Z]</validator> </param> <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/> <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> <validator type="regex">^[A-Z]</validator> </param> <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/> </section> <!-- ANNOTATION --> <section name="annotation" title="Optional annotation"> <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> </param> <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown"> <option value="+">Gram+</option> <option value="-">Gram-</option> <option value="?" selected="true">Unknown</option> </param> <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> </section> <!-- PARAMETER FOR WORKFLOW ANALYSIS --> <section name="workflow" title="Workflow option to skip steps"> <param name="skip_trna" type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/> <param name="skip_tmrna" type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/> <param name="skip_rrna" type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/> <param name="skip_ncrna" type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/> <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/> <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/> <param name="skip_cds" type="boolean" truevalue="--skip-cds" falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/> <param name="skip_sorf" type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/> <param name="skip_gap" type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/> <param name="skip_ori" type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/> </section> </inputs> <outputs> <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/> <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/> <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/> <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/> <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/> <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/> <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/> <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> <filter>workflow['skip_cds'] == False</filter> </data> <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> <filter>workflow['skip_cds'] == False</filter> </data> <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/> <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/> </outputs> <tests> <test expect_num_outputs="12"> <!-- TEST_1 database + input --> <section name="input_option" > <param name="db_select" value="test-db-bakta"/> <param name="input_file" value="NC_002127.1.fna"/> </section> <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4"> <assert_contents> <has_text_matching n="1" expression="Genome size: 3,306 bp"/> <has_n_lines n="90" delta="1"/> </assert_contents> </output> <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"> <assert_contents> <has_text_matching n="3" expression="contig_1"/> <has_n_lines n="6" delta="1"/> </assert_contents> </output> <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> <assert_contents> <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/> <has_n_lines n="70" delta="1"/> </assert_contents> </output> <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> <assert_contents> <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> <has_n_lines n="133" delta="1"/> </assert_contents> </output> <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"> <assert_contents> <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/> <has_n_lines n="137" delta="2"/> </assert_contents> </output> <output name="annotation_fna" value="TEST_1/TEST_1.fna"> <assert_contents> <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/> <has_n_lines n="57"/> </assert_contents> </output> <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"> <assert_contents> <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="annotation_faa" value="TEST_1/TEST_1.faa"> <assert_contents> <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"> <assert_contents> <has_text_matching expression="DOGAIA_00010"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"> <assert_contents> <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="summary_txt" value="TEST_1/TEST_1.txt"> <assert_contents> <has_text_matching expression="N50: 3306"/> <has_n_lines n="29"/> </assert_contents> </output> <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4"> <assert_contents> <has_text_matching expression="0.6524500907441017"/> <has_n_lines n="112" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> <section name="input_option" > <param name="db_select" value="test-db-bakta"/> <param name="input_file" value="NC_002127.1.fna"/> <param name="min_contig_length" value="250"/> </section> <section name="organism"> <param name="genus" value="Escherichia"/> <param name="species" value="coli O157:H7"/> <param name="strain" value="Sakai"/> <param name="plasmid" value="pOSAK1"/> </section> <section name="annotation"> <param name="--gram" value="-"/> <param name="keep_contig_headers" value="true"/> </section> <section name="workflow"> <param name="skip_crispr" value="true"/> <param name="skip_gap" value="true"/> </section> <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4"> <assert_contents> <has_text_matching expression="Genome size: 3,306 bp"/> </assert_contents> </output> <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2"> <assert_contents> <has_text_matching expression="DOGAIA_00005"/> </assert_contents> </output> <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2"> <assert_contents> <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> </assert_contents> </output> <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5"> <assert_contents> <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSV"/> </assert_contents> </output> <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4"> <assert_contents> <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/> </assert_contents> </output> <output name="annotation_fna" value="TEST_2/TEST_2.fna"/> <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> <output name="annotation_faa" value="TEST_2/TEST_2.faa"/> <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/> <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/> <output name="summary_txt" value="TEST_2/TEST_2.txt"> <assert_contents> <has_text_matching expression="N50: 3306"/> </assert_contents> </output> <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4"> <assert_contents> <has_text_matching expression="0.6524500907441017"/> </assert_contents> </output> </test> <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps --> <section name="input_option" > <param name="db_select" value="test-db-bakta"/> <param name="input_file" value="NC_002127.1.fna"/> <param name="min_contig_length" value="250"/> </section> <section name="workflow"> <param name="skip_trna" value="true"/> <param name="skip_tmrna" value="true"/> <param name="skip_rrna" value="true"/> <param name="skip_ncrna" value="true"/> <param name="skip_ncrna_region" value="true"/> <param name="skip_crispr" value="true"/> <param name="skip_cds" value="true"/> <param name="skip_sorf" value="true"/> <param name="skip_gap" value="true"/> <param name="skip_ori" value="true"/> </section> <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4"> <assert_contents> <has_text_matching expression="Genome size: 3,306 bp"/> </assert_contents> </output> <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"> <assert_contents> <has_n_lines n="3" delta="1"/> </assert_contents> </output> <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"> <assert_contents> <has_n_lines n="67" delta="1"/> </assert_contents> </output> <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/> <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/> <output name="annotation_fna" value="TEST_3/TEST_3.fna"/> <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> <output name="annotation_faa" value="TEST_3/TEST_3.faa"/> <output name="summary_txt" value="TEST_3/TEST_3.txt"> <assert_contents> <has_text_matching expression="GC: 43.4"/> </assert_contents> </output> <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/> </test> <test expect_num_outputs="12"> <!-- TEST_4 annotations --> <section name="input_option" > <param name="db_select" value="test-db-bakta"/> <param name="input_file" value="NC_002127.1.fna"/> </section> <section name="annotation"> <param name="complete" value="true"/> <param name="translation_table" value="4"/> <param name="prodigal" value="prodigal.tf"/> <param name="replicons" value="replicons.tsv"/> <param name="compliant" value="true"/> <param name="proteins" value="user-proteins.faa"/> </section> <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> <assert_contents> <has_text_matching expression="Genome size: 3,306 bp"/> </assert_contents> </output> <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"> <assert_contents> <has_text_matching expression="mock1"/> </assert_contents> </output> <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> <assert_contents> <has_text_matching expression="ID=DOGAIA_00005_gene;locus_tag=DOGAIA_00005"/> </assert_contents> </output> <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4"> <assert_contents> <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> </assert_contents> </output> <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4"> <assert_contents> <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/> </assert_contents> </output> <output name="annotation_fna" value="TEST_4/TEST_4.fna"/> <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/> <output name="annotation_faa" value="TEST_4/TEST_4.faa"/> <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/> <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/> <output name="summary_txt" value="TEST_4/TEST_4.txt"> <assert_contents> <has_text_matching expression="CDSs: 3"/> </assert_contents> </output> <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4"> <assert_contents> <has_text_matching expression="0.4340592861464005"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH] [--prefix PREFIX] [--output OUTPUT] [--genus GENUS] [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID] [--complete] [--prodigal-tf PRODIGAL_TF] [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS] [--locus-tag LOCUS_TAG] [--keep-contig-headers] [--replicons REPLICONS] [--compliant] [--proteins PROTEINS] [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna] [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf] [--skip-gap] [--skip-ori] [--help] [--verbose] [--threads THREADS] [--tmp-dir TMP_DIR] [--version] <genome> Rapid & standardized annotation of bacterial genomes, MAGs & plasmids positional arguments: <genome> Genome sequences in (zipped) fasta format Input / Output: --db DB, -d DB Database path (default = <bakta_path>/db). Can also be provided as BAKTA_DB environment variable. --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH Minimum contig size (default = 1; 200 in compliant mode) --prefix PREFIX, -p PREFIX Prefix for output files --output OUTPUT, -o OUTPUT Output directory (default = current working directory) Organism: --genus GENUS Genus name --species SPECIES Species name --strain STRAIN Strain name --plasmid PLASMID Plasmid name Annotation: --complete All sequences are complete replicons (chromosome/plasmid[s]) --prodigal-tf PRODIGAL_TF Path to existing Prodigal training file to use for CDS prediction --translation-table {11,4} Translation table: 11/4 (default = 11) --gram {+,-,?} Gram type for signal peptide predictions: +/-/? (default = ?) --locus LOCUS Locus prefix (default = 'contig') --locus-tag LOCUS_TAG Locus tag prefix (default = autogenerated) --keep-contig-headers Keep original contig headers --replicons REPLICONS Replicon information table (tsv/csv) --compliant Force Genbank/ENA/DDJB compliance --proteins PROTEINS Fasta file of trusted protein sequences for CDS annotation Workflow: --skip-trna Skip tRNA detection & annotation --skip-tmrna Skip tmRNA detection & annotation --skip-rrna Skip rRNA detection & annotation --skip-ncrna Skip ncRNA detection & annotation --skip-ncrna-region Skip ncRNA region detection & annotation --skip-crispr Skip CRISPR array detection & annotation --skip-cds Skip CDS detection & annotation --skip-sorf Skip sORF detection & annotation --skip-gap Skip gap detection & annotation --skip-ori Skip oriC/oriT detection & annotation General: --help, -h Show this help message and exit --verbose, -v Print verbose information --threads THREADS, -t THREADS Number of threads to use (default = number of available CPUs) --tmp-dir TMP_DIR Location for temporary files (default = system dependent auto detection) --version show program's version number and exit ]]></help> <expand macro="citations"/> </tool>