diff gtdbtk_classify_wf.xml @ 2:151e8f99baaa draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gtdbtk commit 85683b4edaf9ec44550ff0de861023f794bf35f2
author iuc
date Fri, 13 Sep 2024 11:15:22 +0000
parents ae36206eeeba
children 96b18c2e0b1d
line wrap: on
line diff
--- a/gtdbtk_classify_wf.xml	Thu Feb 16 23:35:41 2023 +0000
+++ b/gtdbtk_classify_wf.xml	Fri Sep 13 11:15:22 2024 +0000
@@ -3,15 +3,15 @@
     <macros>
         <import>macros.xml</import>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">GTDB-Tk</xref>
+    </xrefs>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
 #import re
 
 mkdir input_dir &&
 mkdir output_dir &&
-mkdir output_tsv_dir &&
-mkdir output_newick_dir &&
-mkdir output_fasta_dir &&
 #for $i in $input:
     ## gtdbtk uses the file extension to determine the input format.
     #set ext = "." + $i.ext
@@ -38,7 +38,8 @@
     <inputs>
         <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/>
         <param name="gtdbtk_db" type="select" label="GTDB-Tk database" help="This version of GTDB-Tk required GTDB version R207_v2. Please contact your service administrator if this version is not available to select.">
-            <options from_data_table="gtdbtk_database">
+            <options from_data_table="gtdbtk_database_versioned">
+                <filter type="regexp" column="3" value="^v2(07|14)$"/>
                 <validator type="no_options" message="No locally cached GTDB-Tk database is available"/>
             </options>
         </param>
@@ -51,40 +52,40 @@
     </inputs>
     <outputs>
         <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)">
-            <filter>advanced['output_process_log'] == 'yes'</filter>
+            <filter>advanced['output_process_log']</filter>
         </data>
-        <collection name="output_tsv" type="list" format="tsv" label="${tool.name} on ${on_string} (tsv)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir"/>
+        <collection name="output_align" type="list" format="fasta.gz" label="${tool.name} on ${on_string} (align)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta.gz" ext="fasta.gz" directory="output_dir/align"/>
         </collection>
-        <collection name="output_newick" type="list" format="newick" label="${tool.name} on ${on_string} (newick)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tree" ext="newick" directory="output_dir"/>
+        <collection name="output_identfy" type="list" format="tsv" label="${tool.name} on ${on_string} (identify)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir/identify"/>
         </collection>
-        <collection name="output_fasta" type="list" format="fasta" label="${tool.name} on ${on_string} (fasta)">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" ext="fasta" directory="output_dir"/>
+        <collection name="output_classify" type="list" format="newick" label="${tool.name} on ${on_string} (classify)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tree" ext="newick" directory="output_dir/classify"/>
+        </collection>
+        <collection name="output_summary" type="list" format="tsv" label="${tool.name} on ${on_string} (summary)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="output_dir"/>
         </collection>
     </outputs>
     <tests>
         <!-- The commented test here is valid if we could store the GTDB-Tk database -->
         <!--
-        <test expect_num_outputs="3">
+        <test expect_num_outputs="4">
             <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
-            <param name="gtdbtk_db" value="gtdbtk202"/>
-            <output_collection name="output_tsv" type="list" count="6">
-                <element name="gtdbtk.ar122.filtered" ftype="tsv">
+            <param name="gtdbtk_db" value="gtdbtk214"/>
+            <output_collection name="output_summary" type="list" count="1">
+                <element name="gtdbtk.ar53.summary" ftype="tsv">
                     <assert_contents>
-                        <has_size value="0"/>
+                        <has_text text="user_genome"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.markers_summary" ftype="tsv">
+            </output_collection>
+            <output_collection name="output_identfy" type="list" count="4">
+                <element name="gtdbtk.ar53.markers_summary" ftype="tsv">
                     <assert_contents>
                         <has_text text="number_unique_genes"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.summary" ftype="tsv">
-                    <assert_contents>
-                        <has_text text="genome_1_fna_gz"/>
-                    </assert_contents>
-                </element>
                 <element name="gtdbtk.bac120.markers_summary" ftype="tsv">
                     <assert_contents>
                         <has_text text="genome_1_fna_gz"/>
@@ -101,20 +102,20 @@
                     </assert_contents>
                 </element>
             </output_collection>
-            <output_collection name="output_newick" type="list" count="1">
-                <element name="gtdbtk.ar122.classify" ftype="newick">
+            <output_collection name="output_classify" type="list" count="1">
+                <element name="gtdbtk.ar53.classify" ftype="newick">
                     <assert_contents>
                         <has_text text="GB_GCA_"/>
                     </assert_contents>
                 </element>
             </output_collection>
-            <output_collection name="output_fasta" type="list" count="2">
-                <element name="gtdbtk.ar122.msa" ftype="fasta">
+            <output_collection name="output_align" type="list" count="2">
+                <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
                         <has_text text="GB_GCA_000008085"/>
                     </assert_contents>
                 </element>
-                <element name="gtdbtk.ar122.user_msa" ftype="fasta">
+                <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true">
                     <assert_contents>
                         <has_text text="genome_1_fna_gz"/>
                     </assert_contents>
@@ -125,7 +126,7 @@
         <!-- GTDB-Tk databases are far too large to test currently -->
         <test expect_failure="true">
             <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/>
-            <param name="gtdbtk_db" value="gtdbtk202"/>
+            <param name="gtdbtk_db" value="gtdbtk214"/>
             <assert_stderr>
                 <has_text text="Fatal error: Exit code 1"/>
             </assert_stderr>
@@ -158,3 +159,4 @@
     ]]></help>
     <expand macro="citations"/>
 </tool>
+