changeset 2:151e8f99baaa draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gtdbtk commit 85683b4edaf9ec44550ff0de861023f794bf35f2
author iuc
date Fri, 13 Sep 2024 11:15:22 +0000
parents ae36206eeeba
children 84e250e98564
files gtdbtk_classify_wf.xml macros.xml test-data/gtdbtk_database.loc test-data/gtdbtk_database_metadata_versioned.loc tool-data/gtdbtk_database.loc.sample tool-data/gtdbtk_database_metadata_versioned.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 8 files changed, 91 insertions(+), 89 deletions(-) [+]
line wrap: on
line diff
--- a/gtdbtk_classify_wf.xml	Thu Feb 16 23:35:41 2023 +0000
+++ b/gtdbtk_classify_wf.xml	Fri Sep 13 11:15:22 2024 +0000
@@ -3,15 +3,15 @@
     <macros>
         <import>macros.xml</import>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">GTDB-Tk</xref>
+    </xrefs>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
 #import re
 
 mkdir input_dir &&
 mkdir output_dir &&
-mkdir output_tsv_dir &&
-mkdir output_newick_dir &&
-mkdir output_fasta_dir &&
 #for $i in $input:
     ## gtdbtk uses the file extension to determine the input format.
     #set ext = "." + $i.ext
@@ -38,7 +38,8 @@
     <inputs>
         <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/>
         <param name="gtdbtk_db" type="select" label="GTDB-Tk database" help="This version of GTDB-Tk required GTDB version R207_v2. Please contact your service administrator if this version is not available to select.">
-            <options from_data_table="gtdbtk_database">
+            <options from_data_table="gtdbtk_database_versioned">
+                <filter type="regexp" column="3" value="^v2(07|14)$"/>
                 <validator type="no_options" message="No locally cached GTDB-Tk database is available"/>
             </options>
         </param>
@@ -51,40 +52,40 @@
     </inputs>
     <outputs>
         <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)">
-            <filter>advanced['output_process_log'] == 'yes'</filter>
+            <filter>advanced['output_process_log']</filter>
         </data>
-        <collection name="output_tsv" type="list" format="tsv" label="${tool.name} on ${on_string} (tsv)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir"/>
+        <collection name="output_align" type="list" format="fasta.gz" label="${tool.name} on ${on_string} (align)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta.gz" ext="fasta.gz" directory="output_dir/align"/>
         </collection>
-        <collection name="output_newick" type="list" format="newick" label="${tool.name} on ${on_string} (newick)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tree" ext="newick" directory="output_dir"/>
+        <collection name="output_identfy" type="list" format="tsv" label="${tool.name} on ${on_string} (identify)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir/identify"/>
         </collection>
-        <collection name="output_fasta" type="list" format="fasta" label="${tool.name} on ${on_string} (fasta)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" ext="fasta" directory="output_dir"/>
+        <collection name="output_classify" type="list" format="newick" label="${tool.name} on ${on_string} (classify)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tree" ext="newick" directory="output_dir/classify"/>
+        </collection>
+        <collection name="output_summary" type="list" format="tsv" label="${tool.name} on ${on_string} (summary)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir"/>
         </collection>
     </outputs>
     <tests>
         <!-- The commented test here is valid if we could store the GTDB-Tk database -->
         <!--
-        <test expect_num_outputs="3">
+        <test expect_num_outputs="4">
             <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
-            <param name="gtdbtk_db" value="gtdbtk202"/>
-            <output_collection name="output_tsv" type="list" count="6">
-                <element name="gtdbtk.ar122.filtered" ftype="tsv">
+            <param name="gtdbtk_db" value="gtdbtk214"/>
+            <output_collection name="output_summary" type="list" count="1">
+                <element name="gtdbtk.ar53.summary" ftype="tsv">
                     <assert_contents>
-                        <has_size value="0"/>
+                        <has_text text="user_genome"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.markers_summary" ftype="tsv">
+            </output_collection>
+            <output_collection name="output_identfy" type="list" count="4">
+                <element name="gtdbtk.ar53.markers_summary" ftype="tsv">
                     <assert_contents>
                         <has_text text="number_unique_genes"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.summary" ftype="tsv">
-                    <assert_contents>
-                        <has_text text="genome_1_fna_gz"/>
-                    </assert_contents>
-                </element>
                 <element name="gtdbtk.bac120.markers_summary" ftype="tsv">
                     <assert_contents>
                         <has_text text="genome_1_fna_gz"/>
@@ -101,20 +102,20 @@
                     </assert_contents>
                 </element>
             </output_collection>
-            <output_collection name="output_newick" type="list" count="1">
-                <element name="gtdbtk.ar122.classify" ftype="newick">
+            <output_collection name="output_classify" type="list" count="1">
+                <element name="gtdbtk.ar53.classify" ftype="newick">
                     <assert_contents>
                         <has_text text="GB_GCA_"/>
                     </assert_contents>
                 </element>
             </output_collection>
-            <output_collection name="output_fasta" type="list" count="2">
-                <element name="gtdbtk.ar122.msa" ftype="fasta">
+            <output_collection name="output_align" type="list" count="2">
+                <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
                         <has_text text="GB_GCA_000008085"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.user_msa" ftype="fasta">
+                <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
                         <has_text text="genome_1_fna_gz"/>
                     </assert_contents>
@@ -125,7 +126,7 @@
         <!-- GTDB-Tk databases are far too large to test currently -->
         <test expect_failure="true">
             <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
-            <param name="gtdbtk_db" value="gtdbtk202"/>
+            <param name="gtdbtk_db" value="gtdbtk214"/>
             <assert_stderr>
                 <has_text text="Fatal error: Exit code 1"/>
             </assert_stderr>
@@ -158,3 +159,4 @@
     ]]></help>
     <expand macro="citations"/>
 </tool>
+
--- a/macros.xml	Thu Feb 16 23:35:41 2023 +0000
+++ b/macros.xml	Fri Sep 13 11:15:22 2024 +0000
@@ -1,7 +1,7 @@
 <macros>
-    <token name="@TOOL_VERSION@">2.2.2</token>
-    <token name="@VERSION_SUFFIX@">0</token>
-    <token name="@PROFILE@">20.09</token>
+    <token name="@TOOL_VERSION@">2.3.2</token>
+    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@PROFILE@">22.05</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">gtdbtk</requirement>
--- a/test-data/gtdbtk_database.loc	Thu Feb 16 23:35:41 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-# This is a sample file distributed with Galaxy that enables tools
-# to use a directory of GTDB-Tk databases.  The gtdbtk_databases.loc
-# file has this format (longer white space characters are TAB characters):
-#
-# <unique_build_id> <display_name>  <directory_path>
-#
-# So, for example, if you have the gtdbtk 202 stored in 
-# /depot/data2/galaxy/gtdbtk/202/, 
-# then the gtdbtk_databases.loc entry would look like this:
-#
-# release202    gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202
-#
-# and your /depot/data2/galaxy/gtdbtk/release202 directory
-# would contain GTDB-Tk database files for release 202, sommething like this:
-#
-#drwxr-sr-x  3 gvk G-824019    4096 Apr 20  2021 fastani/
-#-rw-r--r--  1 gvk G-824019 4810764 Apr 22  2021 manifest.tsv
-#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 markers/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 masks/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 metadata/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 21  2021 mrca_red/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 msa/
-#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 pplacer/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 radii/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 taxonomy/
-gtdbtk202	GTDB-Tk database v202	${__HERE__}/gtdbtk202
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtdbtk_database_metadata_versioned.loc	Fri Sep 13 11:15:22 2024 +0000
@@ -0,0 +1,26 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of GTDB-Tk databases.  The gtdbtk_databases.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <unique_build_id> <display_name>   <version> <directory_path>
+#
+# So, for example, if you have the gtdbtk 202 stored in 
+# /depot/data2/galaxy/gtdbtk/202/, 
+# then the gtdbtk_databases.loc entry would look like this:
+#
+# release202    gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202
+#
+# and your /depot/data2/galaxy/gtdbtk/release202 directory
+# would contain GTDB-Tk database files for release 202, sommething like this:
+#
+#drwxr-sr-x  3 gvk G-824019    4096 Apr 20  2021 fastani/
+#-rw-r--r--  1 gvk G-824019 4810764 Apr 22  2021 manifest.tsv
+#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 markers/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 masks/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 metadata/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 21  2021 mrca_red/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 msa/
+#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 pplacer/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 radii/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 taxonomy/
+gtdbtk214	GTDB-Tk database v214	${__HERE__}/gtdbtk214/
--- a/tool-data/gtdbtk_database.loc.sample	Thu Feb 16 23:35:41 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-# This is a sample file distributed with Galaxy that enables tools
-# to use a directory of GTDB-Tk databases.  The gtdbtk_databases.loc
-# file has this format (longer white space characters are TAB characters):
-#
-# <unique_build_id> <display_name>  <directory_path>
-#
-# So, for example, if you have the gtdbtk 202 stored in 
-# /depot/data2/galaxy/gtdbtk/202/, 
-# then the gtdbtk_databases.loc entry would look like this:
-#
-# release202    gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202
-#
-# and your /depot/data2/galaxy/gtdbtk/release202 directory
-# would contain GTDB-Tk database files for release 202, sommething like this:
-#
-#drwxr-sr-x  3 gvk G-824019    4096 Apr 20  2021 fastani/
-#-rw-r--r--  1 gvk G-824019 4810764 Apr 22  2021 manifest.tsv
-#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 markers/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 masks/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 metadata/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 21  2021 mrca_red/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 msa/
-#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 pplacer/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 radii/
-#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 taxonomy/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gtdbtk_database_metadata_versioned.loc.sample	Fri Sep 13 11:15:22 2024 +0000
@@ -0,0 +1,25 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of GTDB-Tk databases.  The gtdbtk_databases.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <unique_build_id> <display_name>  <directory_path>
+#
+# So, for example, if you have the gtdbtk 202 stored in 
+# /depot/data2/galaxy/gtdbtk/202/, 
+# then the gtdbtk_databases.loc entry would look like this:
+#
+# release202    gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202
+#
+# and your /depot/data2/galaxy/gtdbtk/release202 directory
+# would contain GTDB-Tk database files for release 202, sommething like this:
+#
+#drwxr-sr-x  3 gvk G-824019    4096 Apr 20  2021 fastani/
+#-rw-r--r--  1 gvk G-824019 4810764 Apr 22  2021 manifest.tsv
+#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 markers/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 masks/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 metadata/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 21  2021 mrca_red/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 msa/
+#drwxr-sr-x  4 gvk G-824019    4096 Apr 21  2021 pplacer/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 radii/
+#drwxr-sr-x  2 gvk G-824019    4096 Apr 20  2021 taxonomy/
--- a/tool_data_table_conf.xml.sample	Thu Feb 16 23:35:41 2023 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Sep 13 11:15:22 2024 +0000
@@ -1,7 +1,7 @@
 <tables>
     <!-- Locations of GTDB-Tk database versions 202 and higher -->
-    <table name="gtdbtk_database" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/gtdbtk_database.loc" />
+    <table name="gtdbtk_database_versioned" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="test-data/gtdbtk_database_versioned.loc" />
     </table>
 </tables>
--- a/tool_data_table_conf.xml.test	Thu Feb 16 23:35:41 2023 +0000
+++ b/tool_data_table_conf.xml.test	Fri Sep 13 11:15:22 2024 +0000
@@ -1,7 +1,7 @@
 <tables>
-    <!-- Location of databases for gtdbtk version 202 and higher -->
-    <table name="gtdbtk_database" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="${__HERE__}/test-data/gtdbtk_database.loc" />
+    <!-- Locations of GTDB-Tk database versions 202 and higher -->
+    <table name="gtdbtk_database_versioned" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="tool-data/gtdbtk_database_versioned.loc" />
     </table>
 </tables>