diff bakta.xml @ 0:4d315de96666 draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit bf30715c881a622947d3d099d7a22e323e2ceef3-dirty"
author pimarin
date Wed, 18 May 2022 11:13:45 +0000
parents
children ca9e2125c5de
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bakta.xml	Wed May 18 11:13:45 2022 +0000
@@ -0,0 +1,573 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification
+    </description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+        <expand macro='xrefs'/>
+        <expand macro="requirements"/>
+        <expand macro="version_command"/>
+
+    <command detect_errors="aggressive"><![CDATA[
+        bakta
+        #*======================================
+                    Bakta database
+        ======================================*#
+            --db '$input_option.db_select.fields.path'
+            #if $input_option.min_contig_length
+                --min-contig-length $input_option.min_contig_length
+            #else if $annotation.compliant
+                --min-contig-length 200
+            #else
+                --min-contig-length 1
+            #end if
+            --prefix bakta_output
+        #*======================================
+                  Organism options
+              genus/species/strain/plasmid
+        ======================================*#
+            #if $organism.genus
+                --genus '$organism.genus'
+            #end if
+            #if $organism.species
+                --species '$organism.species'
+            #end if
+            #if $organism.strain
+                --strain '$organism.strain'
+            #end if
+            #if $organism.plasmid
+                --plasmid '$organism.plasmid'
+            #end if
+        #*======================================
+                    Annotation options
+            gram type, prodigal/protein file
+        ======================================*#
+            $annotation.complete
+            #if $annotation.prodigal
+                --prodigal-tf '$annotation.prodigal'
+            #end if
+            #if $annotation.translation_table
+                --translation-table '$annotation.translation_table'
+            #end if
+            #if $annotation.gram
+                --gram '$annotation.gram'
+            #end if
+            $annotation.keep_contig_headers
+            #if $annotation.replicons
+                --replicons '$annotation.replicons'
+            #end if
+            $annotation.compliant
+            #if $annotation.proteins
+                --proteins '$annotation.proteins'
+            #end if
+        #*======================================
+                    Workflow OPTIONS
+         skip some step of the bakta analysis
+        ======================================*#
+            $workflow.skip_trna
+            $workflow.skip_tmrna
+            $workflow.skip_rrna
+            $workflow.skip_ncrna
+            $workflow.skip_ncrna_region
+            $workflow.skip_crispr
+            $workflow.skip_cds
+            $workflow.skip_sorf
+            $workflow.skip_gap
+            $workflow.skip_ori
+        #*======================================
+                        Genome file
+        ======================================*#
+            '$input_option.input_file'
+        #*======================================
+                    LOG file
+        ======================================*#
+            &> '$logfile'
+        ]]></command>
+    <inputs>
+      <!-- DB and file INPUT -->
+        <section name="input_option" title="Input/Output options" expanded="true">
+            <param name="db_select" type="select" label="The bakta database">
+                <options from_data_table="bakta_database">
+                    <validator message="No bakta database is available" type="no_options"/>
+                </options>
+            </param>
+            <param name="input_file" type="data" format="fasta" label="Select genome in fasta format"/>
+            <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
+        </section>
+        <!-- Organism INFORMATION OPTIONS -->
+        <section name="organism" title="Optional organism options" expanded="false">
+            <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia">
+                <validator type="regex">^[A-Z]</validator>
+            </param>
+            <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/>
+            <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai">
+                <validator type="regex">^[A-Z]</validator>
+            </param>
+            <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/>
+        </section>
+        <!-- ANNOTATION -->
+        <section name="annotation" title="Optional annotation">
+            <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
+            <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
+            <param name="translation_table"  type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
+                <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
+            </param>
+            <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
+                <option value="+">Gram+</option>
+                <option value="-">Gram-</option>
+                <option value="?" selected="true">Unknown</option>
+            </param>
+            <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
+            <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
+            <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
+            <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
+        </section>
+        <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
+        <section name="workflow" title="Workflow option to skip steps">
+            <param name="skip_trna"   type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/>
+            <param name="skip_tmrna"  type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/>
+            <param name="skip_rrna"  type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/>
+            <param name="skip_ncrna"  type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/>
+            <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/>
+            <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/>
+            <param name="skip_cds"    type="boolean" truevalue="--skip-cds"  falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/>
+            <param name="skip_sorf"   type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/>
+            <param name="skip_gap"    type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/>
+            <param name="skip_ori"    type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/>
+        <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/>
+        <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/>
+        <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/>
+        <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/>
+        <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/>
+        <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/>
+        <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/>
+        <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv">
+            <filter>workflow['skip_cds'] == False</filter>
+        </data>
+        <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa">
+          <filter>workflow['skip_cds'] == False</filter>
+        </data>
+        <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/>
+        <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/>
+    </outputs>
+
+    <tests>
+      <test expect_num_outputs="12"> <!-- TEST_1 database + input -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
+          </section>
+          <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching n="1" expression="Genome size: 3,306 bp"/>
+                  <has_n_lines n="90" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching n="3" expression="contig_1"/>
+                  <has_n_lines n="6" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/>
+                  <has_n_lines n="70" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/>
+                  <has_n_lines n="133" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/>
+                  <has_n_lines n="137" delta="2"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_fna" value="TEST_1/TEST_1.fna">
+              <assert_contents>
+                  <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/>
+                  <has_n_lines n="57"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_ffn" value="TEST_1/TEST_1.ffn">
+              <assert_contents>
+                  <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/>
+                  <has_n_lines n="6"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_faa" value="TEST_1/TEST_1.faa">
+              <assert_contents>
+                  <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/>
+                  <has_n_lines n="6"/>
+              </assert_contents>
+          </output>
+          <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv">
+              <assert_contents>
+                  <has_text_matching expression="DOGAIA_00010"/>
+                  <has_n_lines n="6"/>
+              </assert_contents>
+          </output>
+          <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa">
+              <assert_contents>
+                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/>
+                  <has_n_lines n="6"/>
+              </assert_contents>
+          </output>
+          <output name="summary_txt" value="TEST_1/TEST_1.txt">
+              <assert_contents>
+                  <has_text_matching expression="N50: 3306"/>
+                  <has_n_lines n="29"/>
+              </assert_contents>
+            </output>
+          <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="0.6524500907441017"/>
+                  <has_n_lines n="112" delta="1"/>
+              </assert_contents>
+          </output>
+      </test>
+      <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps  -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="GCF_000008865.2.fna"/>
+              <param name="min_contig_length" value="250"/>
+          </section>
+          <section name="organism">
+              <param name="genus" value="Escherichia"/>
+              <param name="species" value="coli O157:H7"/>
+              <param name="strain" value="Sakai"/>
+              <param name="plasmid" value="pOSAK1"/>
+          </section>
+          <section name="annotation">
+              <param name="--gram" value="-"/>
+              <param name="keep_contig_headers" value="true"/>
+          </section>
+          <section name="workflow">
+            <param name="skip_crispr" value="true"/>
+              <param name="skip_gap" value="true"/>
+          </section>
+          <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="Genome size: 5,501,884 bp"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="RFAM:RF00506"/>
+                  <has_n_lines n="5468" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="ID=NC_002695.2;Name=NC_002695.2;Is_circular=true"/>
+                  <has_n_lines n="97179" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                  <has_n_lines n="172343" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                  <has_n_lines n="172350" delta="2"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_fna" value="TEST_2/TEST_2.fna">
+              <assert_contents>
+                  <has_text_matching expression="NC_002695.2"/>
+                  <has_n_lines n="91701"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_ffn" value="TEST_2/TEST_2.ffn">
+              <assert_contents>
+                  <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/>
+                  <has_n_lines n="10928"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_faa" value="TEST_2/TEST_2.faa">
+              <assert_contents>
+                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                  <has_n_lines n="10584"/>
+              </assert_contents>
+          </output>
+          <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv">
+              <assert_contents>
+                  <has_text_matching expression="NC_002695.2" n="5285"/>
+                  <has_n_lines n="5292"/>
+              </assert_contents>
+          </output>
+          <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa">
+              <assert_contents>
+                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                  <has_n_lines n="10578"/>
+              </assert_contents>
+          </output>
+          <output name="summary_txt" value="TEST_2/TEST_2.txt">
+              <assert_contents>
+                  <has_text_matching expression="N50: 5498578"/>
+                  <has_n_lines n="29"/>
+              </assert_contents>
+            </output>
+          <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="0.505327629590155"/>
+                  <has_n_lines n="125400" delta="1"/>
+              </assert_contents>
+          </output>
+      </test>
+      <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps  -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="GCF_000008865.2.fna"/>
+              <param name="min_contig_length" value="250"/>
+          </section>
+          <section name="workflow">
+            <param name="skip_trna" value="true"/>
+            <param name="skip_tmrna" value="true"/>
+            <param name="skip_rrna" value="true"/>
+            <param name="skip_ncrna" value="true"/>
+            <param name="skip_ncrna_region" value="true"/>
+            <param name="skip_crispr" value="true"/>
+            <param name="skip_cds" value="true"/>
+            <param name="skip_sorf" value="true"/>
+            <param name="skip_gap" value="true"/>
+            <param name="skip_ori" value="true"/>
+          </section>
+          <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="Genome size: 5,501,884 bp"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="Ile_trna" n="3"/>
+                  <has_n_lines n="179" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="RFAM:RF02564" n="57"/>
+                  <has_n_lines n="91889" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="RF02564" n="513"/>
+                  <has_n_lines n="93592" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="RF02564" n="513"/>
+                  <has_n_lines n="93600" delta="2"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_fna" value="TEST_3/TEST_3.fna">
+              <assert_contents>
+                  <has_text_matching expression="CATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAA"/>
+                  <has_n_lines n="91701"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_ffn" value="TEST_3/TEST_3.ffn">
+              <assert_contents>
+                  <has_text_matching expression="AGGCTTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGG"/>
+                  <has_n_lines n="346"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_faa" value="TEST_3/TEST_3.faa">
+              <assert_contents>
+                  <has_text_matching expression="MIRIISRANSVTSSNEVNRLVTGQIPHD"/>
+                  <has_n_lines n="2"/>
+              </assert_contents>
+          </output>
+          <output name="summary_txt" value="TEST_3/TEST_3.txt">
+              <assert_contents>
+                  <has_text_matching expression="N50: 5498578"/>
+                  <has_n_lines n="29"/>
+              </assert_contents>
+            </output>
+          <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="0.004420849294532563"/>
+                  <has_n_lines n="3681" delta="1"/>
+              </assert_contents>
+          </output>
+        </test>
+        <test expect_num_outputs="12"> <!-- TEST_4 annotations   -->
+            <section name="input_option" >
+                <param name="db_select" value="test-db-bakta"/>
+                <param name="input_file" value="GCF_000008865.2.fna"/>
+            </section>
+            <section name="annotation">
+                <param name="complete" value="true"/>
+                <param name="translation_table" value="4"/>
+                <param name="prodigal" value="prodigal.tf"/>
+                <param name="replicons" value="replicons.tsv"/>
+                <param name="compliant" value="true"/>
+                <param name="proteins" value="user-proteins.faa"/>
+            </section>
+            <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="Genome size: 5,501,884 bp"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2">
+                <assert_contents>
+                    <has_text_matching expression="mock1"/>
+                    <has_n_lines n="5470" delta="1"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2">
+                <assert_contents>
+                    <has_text_matching expression="ID=BALIOE_00005_gene;locus_tag=BALIOE_00005"/>
+                    <has_n_lines n="10942" delta="1"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                    <has_n_lines n="172334" delta="1"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                    <has_n_lines n="172342" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_fna" value="TEST_4/TEST_4.fna">
+                <assert_contents>
+                    <has_text_matching expression="ACTTTAACCAATATAGGCATAGCG"/>
+                    <has_n_lines n="91701"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_ffn" value="TEST_4/TEST_4.ffn">
+                <assert_contents>
+                    <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/>
+                    <has_n_lines n="10928"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_faa" value="TEST_4/TEST_4.faa">
+                <assert_contents>
+                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                    <has_n_lines n="10584"/>
+                </assert_contents>
+            </output>
+            <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv">
+                <assert_contents>
+                    <has_text_matching expression="p2" n="3"/>
+                    <has_n_lines n="5292"/>
+                </assert_contents>
+            </output>
+            <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa">
+                <assert_contents>
+                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
+                    <has_n_lines n="10578"/>
+                </assert_contents>
+            </output>
+            <output name="summary_txt" value="TEST_4/TEST_4.txt">
+                <assert_contents>
+                    <has_text_matching expression="CDSs: 5292"/>
+                    <has_n_lines n="29"/>
+                </assert_contents>
+              </output>
+            <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="0.8808858929050485"/>
+                    <has_n_lines n="125423" delta="1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH]
+             [--prefix PREFIX] [--output OUTPUT] [--genus GENUS]
+             [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID]
+             [--complete] [--prodigal-tf PRODIGAL_TF]
+             [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS]
+             [--locus-tag LOCUS_TAG] [--keep-contig-headers]
+             [--replicons REPLICONS] [--compliant] [--proteins PROTEINS]
+             [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna]
+             [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf]
+             [--skip-gap] [--skip-ori] [--help] [--verbose]
+             [--threads THREADS] [--tmp-dir TMP_DIR] [--version]
+             <genome>
+
+Rapid & standardized annotation of bacterial genomes, MAGs & plasmids
+
+positional arguments:
+  <genome>                      Genome sequences in (zipped) fasta format
+
+Input / Output:
+  --db DB, -d DB        Database path (default = <bakta_path>/db). Can also be
+                        provided as BAKTA_DB environment variable.
+  --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH
+                        Minimum contig size (default = 1; 200 in compliant
+                        mode)
+  --prefix PREFIX, -p PREFIX
+                        Prefix for output files
+  --output OUTPUT, -o OUTPUT
+                        Output directory (default = current working directory)
+
+Organism:
+  --genus GENUS         Genus name
+  --species SPECIES     Species name
+  --strain STRAIN       Strain name
+  --plasmid PLASMID     Plasmid name
+
+Annotation:
+  --complete                    All sequences are complete replicons (chromosome/plasmid[s])
+  --prodigal-tf PRODIGAL_TF     Path to existing Prodigal training file to use for CDS prediction
+
+  --translation-table {11,4}    Translation table: 11/4 (default = 11)
+  --gram {+,-,?}                Gram type for signal peptide predictions: +/-/? (default = ?)
+  --locus LOCUS                 Locus prefix (default = 'contig')
+  --locus-tag LOCUS_TAG         Locus tag prefix (default = autogenerated)
+  --keep-contig-headers         Keep original contig headers
+  --replicons REPLICONS         Replicon information table (tsv/csv)
+  --compliant                   Force Genbank/ENA/DDJB compliance
+  --proteins PROTEINS   Fasta file of trusted protein sequences for CDS annotation
+
+
+Workflow:
+  --skip-trna           Skip tRNA detection & annotation
+  --skip-tmrna          Skip tmRNA detection & annotation
+  --skip-rrna           Skip rRNA detection & annotation
+  --skip-ncrna          Skip ncRNA detection & annotation
+  --skip-ncrna-region   Skip ncRNA region detection & annotation
+  --skip-crispr         Skip CRISPR array detection & annotation
+  --skip-cds            Skip CDS detection & annotation
+  --skip-sorf           Skip sORF detection & annotation
+  --skip-gap            Skip gap detection & annotation
+  --skip-ori            Skip oriC/oriT detection & annotation
+
+General:
+  --help, -h            Show this help message and exit
+  --verbose, -v         Print verbose information
+  --threads THREADS, -t THREADS
+                        Number of threads to use (default = number of
+                        available CPUs)
+  --tmp-dir TMP_DIR     Location for temporary files (default = system
+                        dependent auto detection)
+  --version             show program's version number and exit
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>