changeset 11:272a67da0113 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/gtdbtk commit 1fb19cc0b5b0db43fcb899afc97cba727a3d6a8b
author iuc
date Wed, 10 Dec 2025 18:58:28 +0000
parents 86e44bb25380
children 3a16872c1088
files gtdbtk_classify_wf.xml macros.xml test-data/gtdbtk_database_versioned.loc.test
diffstat 3 files changed, 24 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/gtdbtk_classify_wf.xml	Mon Sep 15 08:23:32 2025 +0000
+++ b/gtdbtk_classify_wf.xml	Wed Dec 10 18:58:28 2025 +0000
@@ -23,11 +23,11 @@
 --genome_dir input_dir
 --extension '$ext'
 --out_dir output_dir
---cpus \${GALAXY_SLOTS:-4}
+--cpus "\${GALAXY_SLOTS:-4}"
 --min_perc_aa $advanced.min_perc_aa
 $advanced.force
 --min_af $advanced.min_af
-
+$full_tree
 ## Required unless mash_db is available:
 --skip_ani_screen
 
@@ -39,7 +39,7 @@
         <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/>
         <param name="gtdbtk_db" type="select" label="GTDB-Tk database" help="This version of GTDB-Tk requires GTDB version 220. Please contact your service administrator if this version is not available to select.">
             <options from_data_table="gtdbtk_database_versioned">
-                <filter type="regexp" column="version" value="^220$"/>
+                <filter type="regexp" column="version" value="^22(0|6)$"/>
                 <validator type="no_options" message="No locally cached GTDB-Tk database is available"/>
             </options>
         </param>
@@ -47,6 +47,7 @@
             <param argument="--min_perc_aa" type="integer" min="0" max="100" value="10" label="Exclude genomes that do not have at least this percentage of AA in the MSA" help="Inclusive bound"/>
             <param argument="--force" type="boolean" truevalue="--force" falsevalue="" checked="false" label="Continue processing if an error occurs on a single genome?"/>
             <param argument="--min_af" type="float" min="0" max="1" value="0.65" label="Minimum alignment fraction to consider closest genome"/>
+            <param argument="--full_tree" type="boolean" truevalue="--full_tree" falsevalue="" checked="false" label="Use the full tree" help="Use the unsplit bacterial tree for the classify step; this is the original GTDB-Tk approach (version &lt; 2) and requires more than 320 GB of RAM to load the reference tree (default: False)"/>
             <param name="output_process_log" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output process log file?"/>
         </section>
     </inputs>
@@ -68,13 +69,15 @@
         </collection>
     </outputs>
     <tests>
-        <!-- The commented test here is valid if we could store the GTDB-Tk database -->
-        <!--
-        <test expect_num_outputs="4">
-            <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
-            <param name="gtdbtk_db" value="gtdbtk214"/>
+        <!-- Test works with mock DB https://data.ace.uq.edu.au/public/gtdb/data/releases/release226/226.0/auxillary_files/gtdbtk_package/mockup_db/ which is still a bit large -->
+        <!-- <test expect_num_outputs="4">
+            <param name="input" location="https://data.ace.uq.edu.au/public/gtdb/data/releases/release226/226.0/auxillary_files/gtdbtk_package/mockup_db/test_genome_G021783475.fna.gz" ftype="fasta.gz"/>
+            <param name="gtdbtk_db" value="gtdbtk226"/>
+            <section name="advanced">
+                <param name="full_tree" value="true"/>
+            </section>
             <output_collection name="output_summary" type="list" count="1">
-                <element name="gtdbtk.ar53.summary" ftype="tsv">
+                <element name="gtdbtk.bac120.summary" ftype="tsv">
                     <assert_contents>
                         <has_text text="user_genome"/>
                     </assert_contents>
@@ -88,7 +91,7 @@
                 </element>
                 <element name="gtdbtk.bac120.markers_summary" ftype="tsv">
                     <assert_contents>
-                        <has_text text="genome_1_fna_gz"/>
+                        <has_text text="test_genome_G021783475"/>
                     </assert_contents>
                 </element>
                 <element name="gtdbtk.failed_genomes" ftype="tsv">
@@ -98,31 +101,31 @@
                 </element>
                 <element name="gtdbtk.translation_table_summary" ftype="tsv">
                     <assert_contents>
-                        <has_text text="genome_1_fna_gz"/>
+                        <has_text text="test_genome_G021783475"/>
                     </assert_contents>
                 </element>
             </output_collection>
             <output_collection name="output_classify" type="list" count="1">
-                <element name="gtdbtk.ar53.classify" ftype="newick">
+                <element name="gtdbtk.bac120.classify" ftype="newick">
                     <assert_contents>
                         <has_text text="GB_GCA_"/>
                     </assert_contents>
                 </element>
             </output_collection>
             <output_collection name="output_align" type="list" count="2">
-                <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true">
+                <element name="gtdbtk.bac120.msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
-                        <has_text text="GB_GCA_000008085"/>
+                        <has_text text="GB_GCA_"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true">
+                <element name="gtdbtk.bac120.user_msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
-                        <has_text text="genome_1_fna_gz"/>
+                        <has_text text="test_genome_G021783475"/>
                     </assert_contents>
                 </element>
             </output_collection>
-        </test>
-        -->
+        </test> -->
+
         <!-- GTDB-Tk databases are far too large to test currently -->
         <test expect_failure="true">
             <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
--- a/macros.xml	Mon Sep 15 08:23:32 2025 +0000
+++ b/macros.xml	Wed Dec 10 18:58:28 2025 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">2.5.2</token>
+    <token name="@TOOL_VERSION@">2.6.0</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">22.05</token>
     <xml name="requirements">
--- a/test-data/gtdbtk_database_versioned.loc.test	Mon Sep 15 08:23:32 2025 +0000
+++ b/test-data/gtdbtk_database_versioned.loc.test	Wed Dec 10 18:58:28 2025 +0000
@@ -23,4 +23,5 @@
 #drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 pplacer/
 #drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 radii/
 #drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 taxonomy/
-gtdbtk214	GTDB-Tk database	214	${__HERE__}/gtdbtk214/
+gtdbtk214	GTDB-Tk database 214	214	${__HERE__}/gtdbtk214/
+gtdbtk226	GTDB-Tk database 226	226	${__HERE__}/226.0-data/