Mercurial > repos > pimarin > bakta
diff bakta.xml @ 0:4d315de96666 draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit bf30715c881a622947d3d099d7a22e323e2ceef3-dirty"
| author | pimarin |
|---|---|
| date | Wed, 18 May 2022 11:13:45 +0000 |
| parents | |
| children | ca9e2125c5de |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bakta.xml Wed May 18 11:13:45 2022 +0000 @@ -0,0 +1,573 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification + </description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro='xrefs'/> + <expand macro="requirements"/> + <expand macro="version_command"/> + + <command detect_errors="aggressive"><![CDATA[ + bakta + #*====================================== + Bakta database + ======================================*# + --db '$input_option.db_select.fields.path' + #if $input_option.min_contig_length + --min-contig-length $input_option.min_contig_length + #else if $annotation.compliant + --min-contig-length 200 + #else + --min-contig-length 1 + #end if + --prefix bakta_output + #*====================================== + Organism options + genus/species/strain/plasmid + ======================================*# + #if $organism.genus + --genus '$organism.genus' + #end if + #if $organism.species + --species '$organism.species' + #end if + #if $organism.strain + --strain '$organism.strain' + #end if + #if $organism.plasmid + --plasmid '$organism.plasmid' + #end if + #*====================================== + Annotation options + gram type, prodigal/protein file + ======================================*# + $annotation.complete + #if $annotation.prodigal + --prodigal-tf '$annotation.prodigal' + #end if + #if $annotation.translation_table + --translation-table '$annotation.translation_table' + #end if + #if $annotation.gram + --gram '$annotation.gram' + #end if + $annotation.keep_contig_headers + #if $annotation.replicons + --replicons '$annotation.replicons' + #end if + $annotation.compliant + #if $annotation.proteins + --proteins '$annotation.proteins' + #end if + #*====================================== + Workflow OPTIONS + skip some step of the bakta analysis + ======================================*# + $workflow.skip_trna + $workflow.skip_tmrna + $workflow.skip_rrna + $workflow.skip_ncrna + $workflow.skip_ncrna_region + $workflow.skip_crispr + $workflow.skip_cds + $workflow.skip_sorf + $workflow.skip_gap + $workflow.skip_ori + #*====================================== + Genome file + ======================================*# + '$input_option.input_file' + #*====================================== + LOG file + ======================================*# + &> '$logfile' + ]]></command> + <inputs> + <!-- DB and file INPUT --> + <section name="input_option" title="Input/Output options" expanded="true"> + <param name="db_select" type="select" label="The bakta database"> + <options from_data_table="bakta_database"> + <validator message="No bakta database is available" type="no_options"/> + </options> + </param> + <param name="input_file" type="data" format="fasta" label="Select genome in fasta format"/> + <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> + </section> + <!-- Organism INFORMATION OPTIONS --> + <section name="organism" title="Optional organism options" expanded="false"> + <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> + <validator type="regex">^[A-Z]</validator> + </param> + <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/> + <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> + <validator type="regex">^[A-Z]</validator> + </param> + <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/> + </section> + <!-- ANNOTATION --> + <section name="annotation" title="Optional annotation"> + <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> + <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> + <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> + <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> + </param> + <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown"> + <option value="+">Gram+</option> + <option value="-">Gram-</option> + <option value="?" selected="true">Unknown</option> + </param> + <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> + <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> + <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> + <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> + </section> + <!-- PARAMETER FOR WORKFLOW ANALYSIS --> + <section name="workflow" title="Workflow option to skip steps"> + <param name="skip_trna" type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/> + <param name="skip_tmrna" type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/> + <param name="skip_rrna" type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/> + <param name="skip_ncrna" type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/> + <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/> + <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/> + <param name="skip_cds" type="boolean" truevalue="--skip-cds" falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/> + <param name="skip_sorf" type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/> + <param name="skip_gap" type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/> + <param name="skip_ori" type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/> + </section> + </inputs> + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> + <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/> + <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/> + <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/> + <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/> + <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/> + <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/> + <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/> + <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> + <filter>workflow['skip_cds'] == False</filter> + </data> + <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> + <filter>workflow['skip_cds'] == False</filter> + </data> + <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/> + <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/> + </outputs> + + <tests> + <test expect_num_outputs="12"> <!-- TEST_1 database + input --> + <section name="input_option" > + <param name="db_select" value="test-db-bakta"/> + <param name="input_file" value="NC_002127.1.fna"/> + </section> + <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4"> + <assert_contents> + <has_text_matching n="1" expression="Genome size: 3,306 bp"/> + <has_n_lines n="90" delta="1"/> + </assert_contents> + </output> + <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"> + <assert_contents> + <has_text_matching n="3" expression="contig_1"/> + <has_n_lines n="6" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> + <assert_contents> + <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/> + <has_n_lines n="70" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> + <assert_contents> + <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> + <has_n_lines n="133" delta="1"/> + </assert_contents> + </output> + <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"> + <assert_contents> + <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/> + <has_n_lines n="137" delta="2"/> + </assert_contents> + </output> + <output name="annotation_fna" value="TEST_1/TEST_1.fna"> + <assert_contents> + <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/> + <has_n_lines n="57"/> + </assert_contents> + </output> + <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"> + <assert_contents> + <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/> + <has_n_lines n="6"/> + </assert_contents> + </output> + <output name="annotation_faa" value="TEST_1/TEST_1.faa"> + <assert_contents> + <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/> + <has_n_lines n="6"/> + </assert_contents> + </output> + <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"> + <assert_contents> + <has_text_matching expression="DOGAIA_00010"/> + <has_n_lines n="6"/> + </assert_contents> + </output> + <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"> + <assert_contents> + <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/> + <has_n_lines n="6"/> + </assert_contents> + </output> + <output name="summary_txt" value="TEST_1/TEST_1.txt"> + <assert_contents> + <has_text_matching expression="N50: 3306"/> + <has_n_lines n="29"/> + </assert_contents> + </output> + <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4"> + <assert_contents> + <has_text_matching expression="0.6524500907441017"/> + <has_n_lines n="112" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> + <section name="input_option" > + <param name="db_select" value="test-db-bakta"/> + <param name="input_file" value="GCF_000008865.2.fna"/> + <param name="min_contig_length" value="250"/> + </section> + <section name="organism"> + <param name="genus" value="Escherichia"/> + <param name="species" value="coli O157:H7"/> + <param name="strain" value="Sakai"/> + <param name="plasmid" value="pOSAK1"/> + </section> + <section name="annotation"> + <param name="--gram" value="-"/> + <param name="keep_contig_headers" value="true"/> + </section> + <section name="workflow"> + <param name="skip_crispr" value="true"/> + <param name="skip_gap" value="true"/> + </section> + <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4"> + <assert_contents> + <has_text_matching expression="Genome size: 5,501,884 bp"/> + </assert_contents> + </output> + <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2"> + <assert_contents> + <has_text_matching expression="RFAM:RF00506"/> + <has_n_lines n="5468" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2"> + <assert_contents> + <has_text_matching expression="ID=NC_002695.2;Name=NC_002695.2;Is_circular=true"/> + <has_n_lines n="97179" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="4"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="172343" delta="1"/> + </assert_contents> + </output> + <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="172350" delta="2"/> + </assert_contents> + </output> + <output name="annotation_fna" value="TEST_2/TEST_2.fna"> + <assert_contents> + <has_text_matching expression="NC_002695.2"/> + <has_n_lines n="91701"/> + </assert_contents> + </output> + <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"> + <assert_contents> + <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/> + <has_n_lines n="10928"/> + </assert_contents> + </output> + <output name="annotation_faa" value="TEST_2/TEST_2.faa"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="10584"/> + </assert_contents> + </output> + <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"> + <assert_contents> + <has_text_matching expression="NC_002695.2" n="5285"/> + <has_n_lines n="5292"/> + </assert_contents> + </output> + <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="10578"/> + </assert_contents> + </output> + <output name="summary_txt" value="TEST_2/TEST_2.txt"> + <assert_contents> + <has_text_matching expression="N50: 5498578"/> + <has_n_lines n="29"/> + </assert_contents> + </output> + <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4"> + <assert_contents> + <has_text_matching expression="0.505327629590155"/> + <has_n_lines n="125400" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps --> + <section name="input_option" > + <param name="db_select" value="test-db-bakta"/> + <param name="input_file" value="GCF_000008865.2.fna"/> + <param name="min_contig_length" value="250"/> + </section> + <section name="workflow"> + <param name="skip_trna" value="true"/> + <param name="skip_tmrna" value="true"/> + <param name="skip_rrna" value="true"/> + <param name="skip_ncrna" value="true"/> + <param name="skip_ncrna_region" value="true"/> + <param name="skip_crispr" value="true"/> + <param name="skip_cds" value="true"/> + <param name="skip_sorf" value="true"/> + <param name="skip_gap" value="true"/> + <param name="skip_ori" value="true"/> + </section> + <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4"> + <assert_contents> + <has_text_matching expression="Genome size: 5,501,884 bp"/> + </assert_contents> + </output> + <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="2"> + <assert_contents> + <has_text_matching expression="Ile_trna" n="3"/> + <has_n_lines n="179" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"> + <assert_contents> + <has_text_matching expression="RFAM:RF02564" n="57"/> + <has_n_lines n="91889" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="4"> + <assert_contents> + <has_text_matching expression="RF02564" n="513"/> + <has_n_lines n="93592" delta="1"/> + </assert_contents> + </output> + <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"> + <assert_contents> + <has_text_matching expression="RF02564" n="513"/> + <has_n_lines n="93600" delta="2"/> + </assert_contents> + </output> + <output name="annotation_fna" value="TEST_3/TEST_3.fna"> + <assert_contents> + <has_text_matching expression="CATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAA"/> + <has_n_lines n="91701"/> + </assert_contents> + </output> + <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"> + <assert_contents> + <has_text_matching expression="AGGCTTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGG"/> + <has_n_lines n="346"/> + </assert_contents> + </output> + <output name="annotation_faa" value="TEST_3/TEST_3.faa"> + <assert_contents> + <has_text_matching expression="MIRIISRANSVTSSNEVNRLVTGQIPHD"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="summary_txt" value="TEST_3/TEST_3.txt"> + <assert_contents> + <has_text_matching expression="N50: 5498578"/> + <has_n_lines n="29"/> + </assert_contents> + </output> + <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"> + <assert_contents> + <has_text_matching expression="0.004420849294532563"/> + <has_n_lines n="3681" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="12"> <!-- TEST_4 annotations --> + <section name="input_option" > + <param name="db_select" value="test-db-bakta"/> + <param name="input_file" value="GCF_000008865.2.fna"/> + </section> + <section name="annotation"> + <param name="complete" value="true"/> + <param name="translation_table" value="4"/> + <param name="prodigal" value="prodigal.tf"/> + <param name="replicons" value="replicons.tsv"/> + <param name="compliant" value="true"/> + <param name="proteins" value="user-proteins.faa"/> + </section> + <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> + <assert_contents> + <has_text_matching expression="Genome size: 5,501,884 bp"/> + </assert_contents> + </output> + <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"> + <assert_contents> + <has_text_matching expression="mock1"/> + <has_n_lines n="5470" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> + <assert_contents> + <has_text_matching expression="ID=BALIOE_00005_gene;locus_tag=BALIOE_00005"/> + <has_n_lines n="10942" delta="1"/> + </assert_contents> + </output> + <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="172334" delta="1"/> + </assert_contents> + </output> + <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="172342" delta="2"/> + </assert_contents> + </output> + <output name="annotation_fna" value="TEST_4/TEST_4.fna"> + <assert_contents> + <has_text_matching expression="ACTTTAACCAATATAGGCATAGCG"/> + <has_n_lines n="91701"/> + </assert_contents> + </output> + <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"> + <assert_contents> + <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/> + <has_n_lines n="10928"/> + </assert_contents> + </output> + <output name="annotation_faa" value="TEST_4/TEST_4.faa"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="10584"/> + </assert_contents> + </output> + <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"> + <assert_contents> + <has_text_matching expression="p2" n="3"/> + <has_n_lines n="5292"/> + </assert_contents> + </output> + <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"> + <assert_contents> + <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/> + <has_n_lines n="10578"/> + </assert_contents> + </output> + <output name="summary_txt" value="TEST_4/TEST_4.txt"> + <assert_contents> + <has_text_matching expression="CDSs: 5292"/> + <has_n_lines n="29"/> + </assert_contents> + </output> + <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4"> + <assert_contents> + <has_text_matching expression="0.8808858929050485"/> + <has_n_lines n="125423" delta="1"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ + usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH] + [--prefix PREFIX] [--output OUTPUT] [--genus GENUS] + [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID] + [--complete] [--prodigal-tf PRODIGAL_TF] + [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS] + [--locus-tag LOCUS_TAG] [--keep-contig-headers] + [--replicons REPLICONS] [--compliant] [--proteins PROTEINS] + [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna] + [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf] + [--skip-gap] [--skip-ori] [--help] [--verbose] + [--threads THREADS] [--tmp-dir TMP_DIR] [--version] + <genome> + +Rapid & standardized annotation of bacterial genomes, MAGs & plasmids + +positional arguments: + <genome> Genome sequences in (zipped) fasta format + +Input / Output: + --db DB, -d DB Database path (default = <bakta_path>/db). Can also be + provided as BAKTA_DB environment variable. + --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH + Minimum contig size (default = 1; 200 in compliant + mode) + --prefix PREFIX, -p PREFIX + Prefix for output files + --output OUTPUT, -o OUTPUT + Output directory (default = current working directory) + +Organism: + --genus GENUS Genus name + --species SPECIES Species name + --strain STRAIN Strain name + --plasmid PLASMID Plasmid name + +Annotation: + --complete All sequences are complete replicons (chromosome/plasmid[s]) + --prodigal-tf PRODIGAL_TF Path to existing Prodigal training file to use for CDS prediction + + --translation-table {11,4} Translation table: 11/4 (default = 11) + --gram {+,-,?} Gram type for signal peptide predictions: +/-/? (default = ?) + --locus LOCUS Locus prefix (default = 'contig') + --locus-tag LOCUS_TAG Locus tag prefix (default = autogenerated) + --keep-contig-headers Keep original contig headers + --replicons REPLICONS Replicon information table (tsv/csv) + --compliant Force Genbank/ENA/DDJB compliance + --proteins PROTEINS Fasta file of trusted protein sequences for CDS annotation + + +Workflow: + --skip-trna Skip tRNA detection & annotation + --skip-tmrna Skip tmRNA detection & annotation + --skip-rrna Skip rRNA detection & annotation + --skip-ncrna Skip ncRNA detection & annotation + --skip-ncrna-region Skip ncRNA region detection & annotation + --skip-crispr Skip CRISPR array detection & annotation + --skip-cds Skip CDS detection & annotation + --skip-sorf Skip sORF detection & annotation + --skip-gap Skip gap detection & annotation + --skip-ori Skip oriC/oriT detection & annotation + +General: + --help, -h Show this help message and exit + --verbose, -v Print verbose information + --threads THREADS, -t THREADS + Number of threads to use (default = number of + available CPUs) + --tmp-dir TMP_DIR Location for temporary files (default = system + dependent auto detection) + --version show program's version number and exit + + + ]]></help> + <expand macro="citations"/> +</tool>
