Mercurial > repos > iuc > gtdbtk_classify_wf
changeset 11:272a67da0113 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/gtdbtk commit 1fb19cc0b5b0db43fcb899afc97cba727a3d6a8b
| author | iuc |
|---|---|
| date | Wed, 10 Dec 2025 18:58:28 +0000 |
| parents | 86e44bb25380 |
| children | 3a16872c1088 |
| files | gtdbtk_classify_wf.xml macros.xml test-data/gtdbtk_database_versioned.loc.test |
| diffstat | 3 files changed, 24 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/gtdbtk_classify_wf.xml Mon Sep 15 08:23:32 2025 +0000 +++ b/gtdbtk_classify_wf.xml Wed Dec 10 18:58:28 2025 +0000 @@ -23,11 +23,11 @@ --genome_dir input_dir --extension '$ext' --out_dir output_dir ---cpus \${GALAXY_SLOTS:-4} +--cpus "\${GALAXY_SLOTS:-4}" --min_perc_aa $advanced.min_perc_aa $advanced.force --min_af $advanced.min_af - +$full_tree ## Required unless mash_db is available: --skip_ani_screen @@ -39,7 +39,7 @@ <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/> <param name="gtdbtk_db" type="select" label="GTDB-Tk database" help="This version of GTDB-Tk requires GTDB version 220. Please contact your service administrator if this version is not available to select."> <options from_data_table="gtdbtk_database_versioned"> - <filter type="regexp" column="version" value="^220$"/> + <filter type="regexp" column="version" value="^22(0|6)$"/> <validator type="no_options" message="No locally cached GTDB-Tk database is available"/> </options> </param> @@ -47,6 +47,7 @@ <param argument="--min_perc_aa" type="integer" min="0" max="100" value="10" label="Exclude genomes that do not have at least this percentage of AA in the MSA" help="Inclusive bound"/> <param argument="--force" type="boolean" truevalue="--force" falsevalue="" checked="false" label="Continue processing if an error occurs on a single genome?"/> <param argument="--min_af" type="float" min="0" max="1" value="0.65" label="Minimum alignment fraction to consider closest genome"/> + <param argument="--full_tree" type="boolean" truevalue="--full_tree" falsevalue="" checked="false" label="Use the full tree" help="Use the unsplit bacterial tree for the classify step; this is the original GTDB-Tk approach (version < 2) and requires more than 320 GB of RAM to load the reference tree (default: False)"/> <param name="output_process_log" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output process log file?"/> </section> </inputs> @@ -68,13 +69,15 @@ </collection> </outputs> <tests> - <!-- The commented test here is valid if we could store the GTDB-Tk database --> - <!-- - <test expect_num_outputs="4"> - <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> - <param name="gtdbtk_db" value="gtdbtk214"/> + <!-- Test works with mock DB https://data.ace.uq.edu.au/public/gtdb/data/releases/release226/226.0/auxillary_files/gtdbtk_package/mockup_db/ which is still a bit large --> + <!-- <test expect_num_outputs="4"> + <param name="input" location="https://data.ace.uq.edu.au/public/gtdb/data/releases/release226/226.0/auxillary_files/gtdbtk_package/mockup_db/test_genome_G021783475.fna.gz" ftype="fasta.gz"/> + <param name="gtdbtk_db" value="gtdbtk226"/> + <section name="advanced"> + <param name="full_tree" value="true"/> + </section> <output_collection name="output_summary" type="list" count="1"> - <element name="gtdbtk.ar53.summary" ftype="tsv"> + <element name="gtdbtk.bac120.summary" ftype="tsv"> <assert_contents> <has_text text="user_genome"/> </assert_contents> @@ -88,7 +91,7 @@ </element> <element name="gtdbtk.bac120.markers_summary" ftype="tsv"> <assert_contents> - <has_text text="genome_1_fna_gz"/> + <has_text text="test_genome_G021783475"/> </assert_contents> </element> <element name="gtdbtk.failed_genomes" ftype="tsv"> @@ -98,31 +101,31 @@ </element> <element name="gtdbtk.translation_table_summary" ftype="tsv"> <assert_contents> - <has_text text="genome_1_fna_gz"/> + <has_text text="test_genome_G021783475"/> </assert_contents> </element> </output_collection> <output_collection name="output_classify" type="list" count="1"> - <element name="gtdbtk.ar53.classify" ftype="newick"> + <element name="gtdbtk.bac120.classify" ftype="newick"> <assert_contents> <has_text text="GB_GCA_"/> </assert_contents> </element> </output_collection> <output_collection name="output_align" type="list" count="2"> - <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true"> + <element name="gtdbtk.bac120.msa" ftype="fasta.gz" decompress="true"> <assert_contents> - <has_text text="GB_GCA_000008085"/> + <has_text text="GB_GCA_"/> </assert_contents> </element> - <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true"> + <element name="gtdbtk.bac120.user_msa" ftype="fasta.gz" decompress="true"> <assert_contents> - <has_text text="genome_1_fna_gz"/> + <has_text text="test_genome_G021783475"/> </assert_contents> </element> </output_collection> - </test> - --> + </test> --> + <!-- GTDB-Tk databases are far too large to test currently --> <test expect_failure="true"> <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
--- a/macros.xml Mon Sep 15 08:23:32 2025 +0000 +++ b/macros.xml Wed Dec 10 18:58:28 2025 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">2.5.2</token> + <token name="@TOOL_VERSION@">2.6.0</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">22.05</token> <xml name="requirements">
--- a/test-data/gtdbtk_database_versioned.loc.test Mon Sep 15 08:23:32 2025 +0000 +++ b/test-data/gtdbtk_database_versioned.loc.test Wed Dec 10 18:58:28 2025 +0000 @@ -23,4 +23,5 @@ #drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 pplacer/ #drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 radii/ #drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 taxonomy/ -gtdbtk214 GTDB-Tk database 214 ${__HERE__}/gtdbtk214/ +gtdbtk214 GTDB-Tk database 214 214 ${__HERE__}/gtdbtk214/ +gtdbtk226 GTDB-Tk database 226 226 ${__HERE__}/226.0-data/
