Mercurial > repos > pimarin > bakta

<?xml version="1.0" encoding="UTF-8"?>

<tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification
    </description>
    <macros>
        <import>macro.xml</import>
    </macros>
    <expand macro='edam'/>
    <expand macro='xrefs'/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>

    <command detect_errors="aggressive"><![CDATA[
        bakta
        #*======================================
                    CPU option
        ======================================*#
        --threads \${GALAXY_SLOTS:-1}
        #*======================================
                    Bakta database
        ======================================*#
        --db $input_option.db_select.fields.path
        #if $input_option.min_contig_length
            --min-contig-length $input_option.min_contig_length
        #else if $annotation.compliant
            --min-contig-length 200
        #else
            --min-contig-length 1
        #end if
        --prefix bakta_output
        #*======================================
                  Organism options
              genus/species/strain/plasmid
        ======================================*#
        #if $organism.genus
            --genus '$organism.genus'
        #end if
        #if $organism.species
            --species '$organism.species'
        #end if
        #if $organism.strain
            --strain '$organism.strain'
        #end if
        #if $organism.plasmid
            --plasmid '$organism.plasmid'
        #end if
        #*======================================
                    Annotation options
            gram type, prodigal/protein file
        ======================================*#
        $annotation.complete
        #if $annotation.prodigal
            --prodigal-tf '$annotation.prodigal'
        #end if
        #if $annotation.translation_table
            --translation-table '$annotation.translation_table'
        #end if
        #if $annotation.gram
            --gram '$annotation.gram'
        #end if
        $annotation.keep_contig_headers
        #if $annotation.replicons
            --replicons '$annotation.replicons'
        #end if
        $annotation.compliant
        #if $annotation.proteins
            --proteins '$annotation.proteins'
        #end if
        #*======================================
                    Workflow OPTIONS
         skip some step of the bakta analysis
        ======================================*#
        $workflow.skip_trna
        $workflow.skip_tmrna
        $workflow.skip_rrna
        $workflow.skip_ncrna
        $workflow.skip_ncrna_region
        $workflow.skip_crispr
        $workflow.skip_cds
        $workflow.skip_sorf
        $workflow.skip_gap
        $workflow.skip_ori
        #*======================================
                    Genome file
        ======================================*#
        '$input_option.input_file'
        #*======================================
                    LOG file
        ======================================*#
        &> '$logfile'
        ]]></command>
    <inputs>
      <!-- DB and file INPUT -->
        <section name="input_option" title="Input/Output options" expanded="true">
            <param name="db_select" type="select" label="The bakta database">
                <options from_data_table="bakta_database">
                    <validator message="No bakta database is available" type="no_options"/>
                </options>
            </param>
            <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
            <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
        </section>
        <!-- Organism INFORMATION OPTIONS -->
        <section name="organism" title="Optional organism options" expanded="false">
            <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia">
                <validator type="regex">^[A-Z]</validator>
            </param>
            <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/>
            <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai">
                <validator type="regex">^[A-Z]</validator>
            </param>
            <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/>
        </section>
        <!-- ANNOTATION -->
        <section name="annotation" title="Optional annotation">
            <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
            <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
            <param name="translation_table"  type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
                <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
                <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
            </param>
            <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
                <option value="+">Gram+</option>
                <option value="-">Gram-</option>
                <option value="?" selected="true">Unknown</option>
            </param>
            <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
            <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
            <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
            <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
        </section>
        <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
        <section name="workflow" title="Workflow option to skip steps">
            <param name="skip_trna"   type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/>
            <param name="skip_tmrna"  type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/>
            <param name="skip_rrna"  type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/>
            <param name="skip_ncrna"  type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/>
            <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/>
            <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/>
            <param name="skip_cds"    type="boolean" truevalue="--skip-cds"  falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/>
            <param name="skip_sorf"   type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/>
            <param name="skip_gap"    type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/>
            <param name="skip_ori"    type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/>
        </section>
    </inputs>
    <outputs>
        <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/>
        <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/>
        <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/>
        <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/>
        <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/>
        <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/>
        <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/>
        <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/>
        <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv">
            <filter>workflow['skip_cds'] == False</filter>
        </data>
        <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa">
          <filter>workflow['skip_cds'] == False</filter>
        </data>
        <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/>
        <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/>
    </outputs>

    <tests>
      <test expect_num_outputs="12"> <!-- TEST_1 database + input -->
          <section name="input_option" >
              <param name="db_select" value="test-db-bakta"/>
              <param name="input_file" value="NC_002127.1.fna"/>
          </section>
          <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4">
              <assert_contents>
                  <has_text_matching n="1" expression="Genome size: 3,306 bp"/>
                  <has_n_lines n="90" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2">
              <assert_contents>
                  <has_text_matching n="3" expression="contig_1"/>
                  <has_n_lines n="6" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2">
              <assert_contents>
                  <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/>
                  <has_n_lines n="70" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/>
                  <has_n_lines n="133" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2">
              <assert_contents>
                  <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/>
                  <has_n_lines n="137" delta="2"/>
              </assert_contents>
          </output>
          <output name="annotation_fna" value="TEST_1/TEST_1.fna">
              <assert_contents>
                  <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/>
                  <has_n_lines n="57"/>
              </assert_contents>
          </output>
          <output name="annotation_ffn" value="TEST_1/TEST_1.ffn">
              <assert_contents>
                  <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/>
                  <has_n_lines n="6"/>
              </assert_contents>
          </output>
          <output name="annotation_faa" value="TEST_1/TEST_1.faa">
              <assert_contents>
                  <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/>
                  <has_n_lines n="6"/>
              </assert_contents>
          </output>
          <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv">
              <assert_contents>
                  <has_text_matching expression="DOGAIA_00010"/>
                  <has_n_lines n="6"/>
              </assert_contents>
          </output>
          <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa">
              <assert_contents>
                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/>
                  <has_n_lines n="6"/>
              </assert_contents>
          </output>
          <output name="summary_txt" value="TEST_1/TEST_1.txt">
              <assert_contents>
                  <has_text_matching expression="N50: 3306"/>
                  <has_n_lines n="29"/>
              </assert_contents>
            </output>
          <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="0.6524500907441017"/>
                  <has_n_lines n="112" delta="1"/>
              </assert_contents>
          </output>
      </test>
      <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps  -->
          <section name="input_option" >
              <param name="db_select" value="test-db-bakta"/>
              <param name="input_file" value="NC_002127.1.fna"/>
              <param name="min_contig_length" value="250"/>
          </section>
          <section name="organism">
              <param name="genus" value="Escherichia"/>
              <param name="species" value="coli O157:H7"/>
              <param name="strain" value="Sakai"/>
              <param name="plasmid" value="pOSAK1"/>
          </section>
          <section name="annotation">
              <param name="--gram" value="-"/>
              <param name="keep_contig_headers" value="true"/>
          </section>
          <section name="workflow">
            <param name="skip_crispr" value="true"/>
              <param name="skip_gap" value="true"/>
          </section>
          <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="Genome size: 3,306 bp"/>
              </assert_contents>
          </output>
          <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
              <assert_contents>
                  <has_text_matching expression="DOGAIA_00005"/>
              </assert_contents>
          </output>
          <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
              <assert_contents>
                  <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
              </assert_contents>
          </output>
          <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
              <assert_contents>
                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSV"/>
              </assert_contents>
          </output>
          <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
              </assert_contents>
          </output>
          <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
          <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
          <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
          <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
          <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
          <output name="summary_txt" value="TEST_2/TEST_2.txt">
              <assert_contents>
                  <has_text_matching expression="N50: 3306"/>
              </assert_contents>
            </output>
          <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="0.6524500907441017"/>
              </assert_contents>
          </output>
      </test>
      <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps  -->
          <section name="input_option" >
              <param name="db_select" value="test-db-bakta"/>
              <param name="input_file" value="NC_002127.1.fna"/>
              <param name="min_contig_length" value="250"/>
          </section>
          <section name="workflow">
            <param name="skip_trna" value="true"/>
            <param name="skip_tmrna" value="true"/>
            <param name="skip_rrna" value="true"/>
            <param name="skip_ncrna" value="true"/>
            <param name="skip_ncrna_region" value="true"/>
            <param name="skip_crispr" value="true"/>
            <param name="skip_cds" value="true"/>
            <param name="skip_sorf" value="true"/>
            <param name="skip_gap" value="true"/>
            <param name="skip_ori" value="true"/>
          </section>
          <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
              <assert_contents>
                  <has_text_matching expression="Genome size: 3,306 bp"/>
              </assert_contents>
          </output>
          <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1">
              <assert_contents>
                  <has_n_lines n="3" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2">
              <assert_contents>
                  <has_n_lines n="67" delta="1"/>
              </assert_contents>
          </output>
          <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
          <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
          <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
          <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
          <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
          <output name="summary_txt" value="TEST_3/TEST_3.txt">
              <assert_contents>
                  <has_text_matching expression="GC: 43.4"/>
              </assert_contents>
            </output>
          <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
        </test>
        <test expect_num_outputs="12"> <!-- TEST_4 annotations   -->
            <section name="input_option" >
                <param name="db_select" value="test-db-bakta"/>
                <param name="input_file" value="NC_002127.1.fna"/>
            </section>
            <section name="annotation">
                <param name="complete" value="true"/>
                <param name="translation_table" value="4"/>
                <param name="prodigal" value="prodigal.tf"/>
                <param name="replicons" value="replicons.tsv"/>
                <param name="compliant" value="true"/>
                <param name="proteins" value="user-proteins.faa"/>
            </section>
            <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4">
                <assert_contents>
                    <has_text_matching expression="Genome size: 3,306 bp"/>
                </assert_contents>
            </output>
            <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2">
                <assert_contents>
                    <has_text_matching expression="mock1"/>
                </assert_contents>
            </output>
            <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2">
                <assert_contents>
                    <has_text_matching expression="ID=DOGAIA_00005_gene;locus_tag=DOGAIA_00005"/>
                </assert_contents>
            </output>
            <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
                <assert_contents>
                    <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/>
                </assert_contents>
            </output>
            <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
                <assert_contents>
                    <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
                </assert_contents>
            </output>
            <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
            <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
            <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
            <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
            <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
            <output name="summary_txt" value="TEST_4/TEST_4.txt">
                <assert_contents>
                    <has_text_matching expression="CDSs: 3"/>
                </assert_contents>
              </output>
            <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
                <assert_contents>
                    <has_text_matching expression="0.4340592861464005"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
        usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH]
             [--prefix PREFIX] [--output OUTPUT] [--genus GENUS]
             [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID]
             [--complete] [--prodigal-tf PRODIGAL_TF]
             [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS]
             [--locus-tag LOCUS_TAG] [--keep-contig-headers]
             [--replicons REPLICONS] [--compliant] [--proteins PROTEINS]
             [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna]
             [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf]
             [--skip-gap] [--skip-ori] [--help] [--verbose]
             [--threads THREADS] [--tmp-dir TMP_DIR] [--version]
             <genome>

Rapid & standardized annotation of bacterial genomes, MAGs & plasmids

positional arguments:
  <genome>                      Genome sequences in (zipped) fasta format

Input / Output:
  --db DB, -d DB        Database path (default = <bakta_path>/db). Can also be
                        provided as BAKTA_DB environment variable.
  --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH
                        Minimum contig size (default = 1; 200 in compliant
                        mode)
  --prefix PREFIX, -p PREFIX
                        Prefix for output files
  --output OUTPUT, -o OUTPUT
                        Output directory (default = current working directory)

Organism:
  --genus GENUS         Genus name
  --species SPECIES     Species name
  --strain STRAIN       Strain name
  --plasmid PLASMID     Plasmid name

Annotation:
  --complete                    All sequences are complete replicons (chromosome/plasmid[s])
  --prodigal-tf PRODIGAL_TF     Path to existing Prodigal training file to use for CDS prediction

  --translation-table {11,4}    Translation table: 11/4 (default = 11)
  --gram {+,-,?}                Gram type for signal peptide predictions: +/-/? (default = ?)
  --locus LOCUS                 Locus prefix (default = 'contig')
  --locus-tag LOCUS_TAG         Locus tag prefix (default = autogenerated)
  --keep-contig-headers         Keep original contig headers
  --replicons REPLICONS         Replicon information table (tsv/csv)
  --compliant                   Force Genbank/ENA/DDJB compliance
  --proteins PROTEINS   Fasta file of trusted protein sequences for CDS annotation


Workflow:
  --skip-trna           Skip tRNA detection & annotation
  --skip-tmrna          Skip tmRNA detection & annotation
  --skip-rrna           Skip rRNA detection & annotation
  --skip-ncrna          Skip ncRNA detection & annotation
  --skip-ncrna-region   Skip ncRNA region detection & annotation
  --skip-crispr         Skip CRISPR array detection & annotation
  --skip-cds            Skip CDS detection & annotation
  --skip-sorf           Skip sORF detection & annotation
  --skip-gap            Skip gap detection & annotation
  --skip-ori            Skip oriC/oriT detection & annotation

General:
  --help, -h            Show this help message and exit
  --verbose, -v         Print verbose information
  --threads THREADS, -t THREADS
                        Number of threads to use (default = number of
                        available CPUs)
  --tmp-dir TMP_DIR     Location for temporary files (default = system
                        dependent auto detection)
  --version             show program's version number and exit


    ]]></help>
    <expand macro="citations"/>
</tool>
author	pimarin
date	Wed, 17 Aug 2022 10:29:37 +0000
parents	4d315de96666
children	eea334d9988b