diff bakta.xml @ 2:ca9e2125c5de draft

"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit fe1cdf884df206d842be4f0768acb06b0bbcf56f"
author pimarin
date Wed, 17 Aug 2022 10:29:37 +0000
parents 4d315de96666
children eea334d9988b
line wrap: on
line diff
--- a/bakta.xml	Wed May 18 11:47:02 2022 +0000
+++ b/bakta.xml	Wed Aug 17 10:29:37 2022 +0000
@@ -7,84 +7,89 @@
     <macros>
         <import>macro.xml</import>
     </macros>
-        <expand macro='xrefs'/>
-        <expand macro="requirements"/>
-        <expand macro="version_command"/>
+    <expand macro='edam'/>
+    <expand macro='xrefs'/>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
 
     <command detect_errors="aggressive"><![CDATA[
         bakta
         #*======================================
+                    CPU option
+        ======================================*#
+        --threads \${GALAXY_SLOTS:-1}
+        #*======================================
                     Bakta database
         ======================================*#
-            --db '$input_option.db_select.fields.path'
-            #if $input_option.min_contig_length
-                --min-contig-length $input_option.min_contig_length
-            #else if $annotation.compliant
-                --min-contig-length 200
-            #else
-                --min-contig-length 1
-            #end if
-            --prefix bakta_output
+        --db $input_option.db_select.fields.path
+        #if $input_option.min_contig_length
+            --min-contig-length $input_option.min_contig_length
+        #else if $annotation.compliant
+            --min-contig-length 200
+        #else
+            --min-contig-length 1
+        #end if
+        --prefix bakta_output
         #*======================================
                   Organism options
               genus/species/strain/plasmid
         ======================================*#
-            #if $organism.genus
-                --genus '$organism.genus'
-            #end if
-            #if $organism.species
-                --species '$organism.species'
-            #end if
-            #if $organism.strain
-                --strain '$organism.strain'
-            #end if
-            #if $organism.plasmid
-                --plasmid '$organism.plasmid'
-            #end if
+        #if $organism.genus
+            --genus '$organism.genus'
+        #end if
+        #if $organism.species
+            --species '$organism.species'
+        #end if
+        #if $organism.strain
+            --strain '$organism.strain'
+        #end if
+        #if $organism.plasmid
+            --plasmid '$organism.plasmid'
+        #end if
         #*======================================
                     Annotation options
             gram type, prodigal/protein file
         ======================================*#
-            $annotation.complete
-            #if $annotation.prodigal
-                --prodigal-tf '$annotation.prodigal'
-            #end if
-            #if $annotation.translation_table
-                --translation-table '$annotation.translation_table'
-            #end if
-            #if $annotation.gram
-                --gram '$annotation.gram'
-            #end if
-            $annotation.keep_contig_headers
-            #if $annotation.replicons
-                --replicons '$annotation.replicons'
-            #end if
-            $annotation.compliant
-            #if $annotation.proteins
-                --proteins '$annotation.proteins'
-            #end if
+        $annotation.complete
+        #if $annotation.prodigal
+            --prodigal-tf '$annotation.prodigal'
+        #end if
+        #if $annotation.translation_table
+            --translation-table '$annotation.translation_table'
+        #end if
+        #if $annotation.gram
+            --gram '$annotation.gram'
+        #end if
+        $annotation.keep_contig_headers
+        #if $annotation.replicons
+            --replicons '$annotation.replicons'
+        #end if
+        $annotation.compliant
+        #if $annotation.proteins
+            --proteins '$annotation.proteins'
+        #end if
         #*======================================
                     Workflow OPTIONS
          skip some step of the bakta analysis
         ======================================*#
-            $workflow.skip_trna
-            $workflow.skip_tmrna
-            $workflow.skip_rrna
-            $workflow.skip_ncrna
-            $workflow.skip_ncrna_region
-            $workflow.skip_crispr
-            $workflow.skip_cds
-            $workflow.skip_sorf
-            $workflow.skip_gap
-            $workflow.skip_ori
+        $workflow.skip_trna
+        $workflow.skip_tmrna
+        $workflow.skip_rrna
+        $workflow.skip_ncrna
+        $workflow.skip_ncrna_region
+        $workflow.skip_crispr
+        $workflow.skip_cds
+        $workflow.skip_sorf
+        $workflow.skip_gap
+        $workflow.skip_ori
         #*======================================
-                        Genome file
+                    Genome file
         ======================================*#
-            '$input_option.input_file'
+        '$input_option.input_file'
         #*======================================
                     LOG file
         ======================================*#
-            &> '$logfile'
+        &> '$logfile'
         ]]></command>
     <inputs>
       <!-- DB and file INPUT -->
@@ -94,7 +99,7 @@
                     <validator message="No bakta database is available" type="no_options"/>
                 </options>
             </param>
-            <param name="input_file" type="data" format="fasta" label="Select genome in fasta format"/>
+            <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
             <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
         </section>
         <!-- Organism INFORMATION OPTIONS -->
@@ -241,7 +246,7 @@
       <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps  -->
           <section name="input_option" >
               <param name="db_select" value="test-db-bakta"/>
-              <param name="input_file" value="GCF_000008865.2.fna"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
               <param name="min_contig_length" value="250"/>
           </section>
           <section name="organism">
@@ -260,80 +265,49 @@
           </section>
           <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
               <assert_contents>
-                  <has_text_matching expression="Genome size: 5,501,884 bp"/>
+                  <has_text_matching expression="Genome size: 3,306 bp"/>
               </assert_contents>
           </output>
           <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
               <assert_contents>
-                  <has_text_matching expression="RFAM:RF00506"/>
-                  <has_n_lines n="5468" delta="1"/>
+                  <has_text_matching expression="DOGAIA_00005"/>
               </assert_contents>
           </output>
           <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
               <assert_contents>
-                  <has_text_matching expression="ID=NC_002695.2;Name=NC_002695.2;Is_circular=true"/>
-                  <has_n_lines n="97179" delta="1"/>
+                  <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
               </assert_contents>
           </output>
-          <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="4">
+          <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
               <assert_contents>
-                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                  <has_n_lines n="172343" delta="1"/>
+                  <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSV"/>
               </assert_contents>
           </output>
           <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
               <assert_contents>
-                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                  <has_n_lines n="172350" delta="2"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_fna" value="TEST_2/TEST_2.fna">
-              <assert_contents>
-                  <has_text_matching expression="NC_002695.2"/>
-                  <has_n_lines n="91701"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_ffn" value="TEST_2/TEST_2.ffn">
-              <assert_contents>
-                  <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/>
-                  <has_n_lines n="10928"/>
+                  <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
               </assert_contents>
           </output>
-          <output name="annotation_faa" value="TEST_2/TEST_2.faa">
-              <assert_contents>
-                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                  <has_n_lines n="10584"/>
-              </assert_contents>
-          </output>
-          <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv">
-              <assert_contents>
-                  <has_text_matching expression="NC_002695.2" n="5285"/>
-                  <has_n_lines n="5292"/>
-              </assert_contents>
-          </output>
-          <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa">
-              <assert_contents>
-                  <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                  <has_n_lines n="10578"/>
-              </assert_contents>
-          </output>
+          <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
+          <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
+          <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
+          <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
+          <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
           <output name="summary_txt" value="TEST_2/TEST_2.txt">
               <assert_contents>
-                  <has_text_matching expression="N50: 5498578"/>
-                  <has_n_lines n="29"/>
+                  <has_text_matching expression="N50: 3306"/>
               </assert_contents>
             </output>
           <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
               <assert_contents>
-                  <has_text_matching expression="0.505327629590155"/>
-                  <has_n_lines n="125400" delta="1"/>
+                  <has_text_matching expression="0.6524500907441017"/>
               </assert_contents>
           </output>
       </test>
       <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps  -->
           <section name="input_option" >
               <param name="db_select" value="test-db-bakta"/>
-              <param name="input_file" value="GCF_000008865.2.fna"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
               <param name="min_contig_length" value="250"/>
           </section>
           <section name="workflow">
@@ -350,68 +324,35 @@
           </section>
           <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
               <assert_contents>
-                  <has_text_matching expression="Genome size: 5,501,884 bp"/>
+                  <has_text_matching expression="Genome size: 3,306 bp"/>
               </assert_contents>
           </output>
-          <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="2">
+          <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1">
               <assert_contents>
-                  <has_text_matching expression="Ile_trna" n="3"/>
-                  <has_n_lines n="179" delta="1"/>
+                  <has_n_lines n="3" delta="1"/>
               </assert_contents>
           </output>
           <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2">
               <assert_contents>
-                  <has_text_matching expression="RFAM:RF02564" n="57"/>
-                  <has_n_lines n="91889" delta="1"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="4">
-              <assert_contents>
-                  <has_text_matching expression="RF02564" n="513"/>
-                  <has_n_lines n="93592" delta="1"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4">
-              <assert_contents>
-                  <has_text_matching expression="RF02564" n="513"/>
-                  <has_n_lines n="93600" delta="2"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_fna" value="TEST_3/TEST_3.fna">
-              <assert_contents>
-                  <has_text_matching expression="CATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAA"/>
-                  <has_n_lines n="91701"/>
+                  <has_n_lines n="67" delta="1"/>
               </assert_contents>
           </output>
-          <output name="annotation_ffn" value="TEST_3/TEST_3.ffn">
-              <assert_contents>
-                  <has_text_matching expression="AGGCTTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGG"/>
-                  <has_n_lines n="346"/>
-              </assert_contents>
-          </output>
-          <output name="annotation_faa" value="TEST_3/TEST_3.faa">
-              <assert_contents>
-                  <has_text_matching expression="MIRIISRANSVTSSNEVNRLVTGQIPHD"/>
-                  <has_n_lines n="2"/>
-              </assert_contents>
-          </output>
+          <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
+          <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
+          <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
+          <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
+          <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
           <output name="summary_txt" value="TEST_3/TEST_3.txt">
               <assert_contents>
-                  <has_text_matching expression="N50: 5498578"/>
-                  <has_n_lines n="29"/>
+                  <has_text_matching expression="GC: 43.4"/>
               </assert_contents>
             </output>
-          <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4">
-              <assert_contents>
-                  <has_text_matching expression="0.004420849294532563"/>
-                  <has_n_lines n="3681" delta="1"/>
-              </assert_contents>
-          </output>
+          <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
         </test>
         <test expect_num_outputs="12"> <!-- TEST_4 annotations   -->
             <section name="input_option" >
                 <param name="db_select" value="test-db-bakta"/>
-                <param name="input_file" value="GCF_000008865.2.fna"/>
+                <param name="input_file" value="NC_002127.1.fna"/>
             </section>
             <section name="annotation">
                 <param name="complete" value="true"/>
@@ -423,73 +364,42 @@
             </section>
             <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4">
                 <assert_contents>
-                    <has_text_matching expression="Genome size: 5,501,884 bp"/>
+                    <has_text_matching expression="Genome size: 3,306 bp"/>
                 </assert_contents>
             </output>
             <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2">
                 <assert_contents>
                     <has_text_matching expression="mock1"/>
-                    <has_n_lines n="5470" delta="1"/>
                 </assert_contents>
             </output>
             <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2">
                 <assert_contents>
-                    <has_text_matching expression="ID=BALIOE_00005_gene;locus_tag=BALIOE_00005"/>
-                    <has_n_lines n="10942" delta="1"/>
+                    <has_text_matching expression="ID=DOGAIA_00005_gene;locus_tag=DOGAIA_00005"/>
                 </assert_contents>
             </output>
             <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
                 <assert_contents>
-                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                    <has_n_lines n="172334" delta="1"/>
+                    <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/>
                 </assert_contents>
             </output>
             <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
                 <assert_contents>
-                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                    <has_n_lines n="172342" delta="2"/>
-                </assert_contents>
-            </output>
-            <output name="annotation_fna" value="TEST_4/TEST_4.fna">
-                <assert_contents>
-                    <has_text_matching expression="ACTTTAACCAATATAGGCATAGCG"/>
-                    <has_n_lines n="91701"/>
-                </assert_contents>
-            </output>
-            <output name="annotation_ffn" value="TEST_4/TEST_4.ffn">
-                <assert_contents>
-                    <has_text_matching expression="CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAA"/>
-                    <has_n_lines n="10928"/>
+                    <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
                 </assert_contents>
             </output>
-            <output name="annotation_faa" value="TEST_4/TEST_4.faa">
-                <assert_contents>
-                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                    <has_n_lines n="10584"/>
-                </assert_contents>
-            </output>
-            <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv">
-                <assert_contents>
-                    <has_text_matching expression="p2" n="3"/>
-                    <has_n_lines n="5292"/>
-                </assert_contents>
-            </output>
-            <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa">
-                <assert_contents>
-                    <has_text_matching expression="LFILTATGNMSLCGLKKESLTAASELVTCRE"/>
-                    <has_n_lines n="10578"/>
-                </assert_contents>
-            </output>
+            <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
+            <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
+            <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
+            <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
+            <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
             <output name="summary_txt" value="TEST_4/TEST_4.txt">
                 <assert_contents>
-                    <has_text_matching expression="CDSs: 5292"/>
-                    <has_n_lines n="29"/>
+                    <has_text_matching expression="CDSs: 3"/>
                 </assert_contents>
               </output>
             <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
                 <assert_contents>
-                    <has_text_matching expression="0.8808858929050485"/>
-                    <has_n_lines n="125423" delta="1"/>
+                    <has_text_matching expression="0.4340592861464005"/>
                 </assert_contents>
             </output>
         </test>