Mercurial > repos > iuc > gtdbtk_classify_wf
changeset 2:151e8f99baaa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gtdbtk commit 85683b4edaf9ec44550ff0de861023f794bf35f2
| author | iuc |
|---|---|
| date | Fri, 13 Sep 2024 11:15:22 +0000 |
| parents | ae36206eeeba |
| children | 84e250e98564 |
| files | gtdbtk_classify_wf.xml macros.xml test-data/gtdbtk_database.loc test-data/gtdbtk_database_metadata_versioned.loc tool-data/gtdbtk_database.loc.sample tool-data/gtdbtk_database_metadata_versioned.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
| diffstat | 8 files changed, 91 insertions(+), 89 deletions(-) [+] |
line wrap: on
line diff
--- a/gtdbtk_classify_wf.xml Thu Feb 16 23:35:41 2023 +0000 +++ b/gtdbtk_classify_wf.xml Fri Sep 13 11:15:22 2024 +0000 @@ -3,15 +3,15 @@ <macros> <import>macros.xml</import> </macros> + <xrefs> + <xref type="bio.tools">GTDB-Tk</xref> + </xrefs> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ #import re mkdir input_dir && mkdir output_dir && -mkdir output_tsv_dir && -mkdir output_newick_dir && -mkdir output_fasta_dir && #for $i in $input: ## gtdbtk uses the file extension to determine the input format. #set ext = "." + $i.ext @@ -38,7 +38,8 @@ <inputs> <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/> <param name="gtdbtk_db" type="select" label="GTDB-Tk database" help="This version of GTDB-Tk required GTDB version R207_v2. Please contact your service administrator if this version is not available to select."> - <options from_data_table="gtdbtk_database"> + <options from_data_table="gtdbtk_database_versioned"> + <filter type="regexp" column="3" value="^v2(07|14)$"/> <validator type="no_options" message="No locally cached GTDB-Tk database is available"/> </options> </param> @@ -51,40 +52,40 @@ </inputs> <outputs> <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)"> - <filter>advanced['output_process_log'] == 'yes'</filter> + <filter>advanced['output_process_log']</filter> </data> - <collection name="output_tsv" type="list" format="tsv" label="${tool.name} on ${on_string} (tsv)"> - <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir"/> + <collection name="output_align" type="list" format="fasta.gz" label="${tool.name} on ${on_string} (align)"> + <discover_datasets pattern="(?P<designation>.+)\.fasta.gz" ext="fasta.gz" directory="output_dir/align"/> </collection> - <collection name="output_newick" type="list" format="newick" label="${tool.name} on ${on_string} (newick)"> - <discover_datasets pattern="(?P<designation>.+)\.tree" ext="newick" directory="output_dir"/> + <collection name="output_identfy" type="list" format="tsv" label="${tool.name} on ${on_string} (identify)"> + <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir/identify"/> </collection> - <collection name="output_fasta" type="list" format="fasta" label="${tool.name} on ${on_string} (fasta)"> - <discover_datasets pattern="(?P<designation>.+)\.fasta" ext="fasta" directory="output_dir"/> + <collection name="output_classify" type="list" format="newick" label="${tool.name} on ${on_string} (classify)"> + <discover_datasets pattern="(?P<designation>.+)\.tree" ext="newick" directory="output_dir/classify"/> + </collection> + <collection name="output_summary" type="list" format="tsv" label="${tool.name} on ${on_string} (summary)"> + <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir"/> </collection> </outputs> <tests> <!-- The commented test here is valid if we could store the GTDB-Tk database --> <!-- - <test expect_num_outputs="3"> + <test expect_num_outputs="4"> <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> - <param name="gtdbtk_db" value="gtdbtk202"/> - <output_collection name="output_tsv" type="list" count="6"> - <element name="gtdbtk.ar122.filtered" ftype="tsv"> + <param name="gtdbtk_db" value="gtdbtk214"/> + <output_collection name="output_summary" type="list" count="1"> + <element name="gtdbtk.ar53.summary" ftype="tsv"> <assert_contents> - <has_size value="0"/> + <has_text text="user_genome"/> </assert_contents> </element> - <element name="gtdbtk.ar122.markers_summary" ftype="tsv"> + </output_collection> + <output_collection name="output_identfy" type="list" count="4"> + <element name="gtdbtk.ar53.markers_summary" ftype="tsv"> <assert_contents> <has_text text="number_unique_genes"/> </assert_contents> </element> - <element name="gtdbtk.ar122.summary" ftype="tsv"> - <assert_contents> - <has_text text="genome_1_fna_gz"/> - </assert_contents> - </element> <element name="gtdbtk.bac120.markers_summary" ftype="tsv"> <assert_contents> <has_text text="genome_1_fna_gz"/> @@ -101,20 +102,20 @@ </assert_contents> </element> </output_collection> - <output_collection name="output_newick" type="list" count="1"> - <element name="gtdbtk.ar122.classify" ftype="newick"> + <output_collection name="output_classify" type="list" count="1"> + <element name="gtdbtk.ar53.classify" ftype="newick"> <assert_contents> <has_text text="GB_GCA_"/> </assert_contents> </element> </output_collection> - <output_collection name="output_fasta" type="list" count="2"> - <element name="gtdbtk.ar122.msa" ftype="fasta"> + <output_collection name="output_align" type="list" count="2"> + <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true"> <assert_contents> <has_text text="GB_GCA_000008085"/> </assert_contents> </element> - <element name="gtdbtk.ar122.user_msa" ftype="fasta"> + <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true"> <assert_contents> <has_text text="genome_1_fna_gz"/> </assert_contents> @@ -125,7 +126,7 @@ <!-- GTDB-Tk databases are far too large to test currently --> <test expect_failure="true"> <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> - <param name="gtdbtk_db" value="gtdbtk202"/> + <param name="gtdbtk_db" value="gtdbtk214"/> <assert_stderr> <has_text text="Fatal error: Exit code 1"/> </assert_stderr> @@ -158,3 +159,4 @@ ]]></help> <expand macro="citations"/> </tool> +
--- a/macros.xml Thu Feb 16 23:35:41 2023 +0000 +++ b/macros.xml Fri Sep 13 11:15:22 2024 +0000 @@ -1,7 +1,7 @@ <macros> - <token name="@TOOL_VERSION@">2.2.2</token> - <token name="@VERSION_SUFFIX@">0</token> - <token name="@PROFILE@">20.09</token> + <token name="@TOOL_VERSION@">2.3.2</token> + <token name="@VERSION_SUFFIX@">2</token> + <token name="@PROFILE@">22.05</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">gtdbtk</requirement>
--- a/test-data/gtdbtk_database.loc Thu Feb 16 23:35:41 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -# This is a sample file distributed with Galaxy that enables tools -# to use a directory of GTDB-Tk databases. The gtdbtk_databases.loc -# file has this format (longer white space characters are TAB characters): -# -# <unique_build_id> <display_name> <directory_path> -# -# So, for example, if you have the gtdbtk 202 stored in -# /depot/data2/galaxy/gtdbtk/202/, -# then the gtdbtk_databases.loc entry would look like this: -# -# release202 gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202 -# -# and your /depot/data2/galaxy/gtdbtk/release202 directory -# would contain GTDB-Tk database files for release 202, sommething like this: -# -#drwxr-sr-x 3 gvk G-824019 4096 Apr 20 2021 fastani/ -#-rw-r--r-- 1 gvk G-824019 4810764 Apr 22 2021 manifest.tsv -#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 markers/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 masks/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 metadata/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 21 2021 mrca_red/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 msa/ -#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 pplacer/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 radii/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 taxonomy/ -gtdbtk202 GTDB-Tk database v202 ${__HERE__}/gtdbtk202
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtdbtk_database_metadata_versioned.loc Fri Sep 13 11:15:22 2024 +0000 @@ -0,0 +1,26 @@ +# This is a sample file distributed with Galaxy that enables tools +# to use a directory of GTDB-Tk databases. The gtdbtk_databases.loc +# file has this format (longer white space characters are TAB characters): +# +# <unique_build_id> <display_name> <version> <directory_path> +# +# So, for example, if you have the gtdbtk 202 stored in +# /depot/data2/galaxy/gtdbtk/202/, +# then the gtdbtk_databases.loc entry would look like this: +# +# release202 gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202 +# +# and your /depot/data2/galaxy/gtdbtk/release202 directory +# would contain GTDB-Tk database files for release 202, sommething like this: +# +#drwxr-sr-x 3 gvk G-824019 4096 Apr 20 2021 fastani/ +#-rw-r--r-- 1 gvk G-824019 4810764 Apr 22 2021 manifest.tsv +#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 markers/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 masks/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 metadata/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 21 2021 mrca_red/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 msa/ +#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 pplacer/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 radii/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 taxonomy/ +gtdbtk214 GTDB-Tk database v214 ${__HERE__}/gtdbtk214/
--- a/tool-data/gtdbtk_database.loc.sample Thu Feb 16 23:35:41 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -# This is a sample file distributed with Galaxy that enables tools -# to use a directory of GTDB-Tk databases. The gtdbtk_databases.loc -# file has this format (longer white space characters are TAB characters): -# -# <unique_build_id> <display_name> <directory_path> -# -# So, for example, if you have the gtdbtk 202 stored in -# /depot/data2/galaxy/gtdbtk/202/, -# then the gtdbtk_databases.loc entry would look like this: -# -# release202 gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202 -# -# and your /depot/data2/galaxy/gtdbtk/release202 directory -# would contain GTDB-Tk database files for release 202, sommething like this: -# -#drwxr-sr-x 3 gvk G-824019 4096 Apr 20 2021 fastani/ -#-rw-r--r-- 1 gvk G-824019 4810764 Apr 22 2021 manifest.tsv -#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 markers/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 masks/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 metadata/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 21 2021 mrca_red/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 msa/ -#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 pplacer/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 radii/ -#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 taxonomy/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gtdbtk_database_metadata_versioned.loc.sample Fri Sep 13 11:15:22 2024 +0000 @@ -0,0 +1,25 @@ +# This is a sample file distributed with Galaxy that enables tools +# to use a directory of GTDB-Tk databases. The gtdbtk_databases.loc +# file has this format (longer white space characters are TAB characters): +# +# <unique_build_id> <display_name> <directory_path> +# +# So, for example, if you have the gtdbtk 202 stored in +# /depot/data2/galaxy/gtdbtk/202/, +# then the gtdbtk_databases.loc entry would look like this: +# +# release202 gtdbtk database release 202 /depot/data2/galaxy/gtdbtk/release202 +# +# and your /depot/data2/galaxy/gtdbtk/release202 directory +# would contain GTDB-Tk database files for release 202, sommething like this: +# +#drwxr-sr-x 3 gvk G-824019 4096 Apr 20 2021 fastani/ +#-rw-r--r-- 1 gvk G-824019 4810764 Apr 22 2021 manifest.tsv +#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 markers/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 masks/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 metadata/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 21 2021 mrca_red/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 msa/ +#drwxr-sr-x 4 gvk G-824019 4096 Apr 21 2021 pplacer/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 radii/ +#drwxr-sr-x 2 gvk G-824019 4096 Apr 20 2021 taxonomy/
--- a/tool_data_table_conf.xml.sample Thu Feb 16 23:35:41 2023 +0000 +++ b/tool_data_table_conf.xml.sample Fri Sep 13 11:15:22 2024 +0000 @@ -1,7 +1,7 @@ <tables> <!-- Locations of GTDB-Tk database versions 202 and higher --> - <table name="gtdbtk_database" comment_char="#"> - <columns>value, name, path</columns> - <file path="tool-data/gtdbtk_database.loc" /> + <table name="gtdbtk_database_versioned" comment_char="#"> + <columns>value, name, version, path</columns> + <file path="test-data/gtdbtk_database_versioned.loc" /> </table> </tables>
--- a/tool_data_table_conf.xml.test Thu Feb 16 23:35:41 2023 +0000 +++ b/tool_data_table_conf.xml.test Fri Sep 13 11:15:22 2024 +0000 @@ -1,7 +1,7 @@ <tables> - <!-- Location of databases for gtdbtk version 202 and higher --> - <table name="gtdbtk_database" comment_char="#"> - <columns>value, name, path</columns> - <file path="${__HERE__}/test-data/gtdbtk_database.loc" /> + <!-- Locations of GTDB-Tk database versions 202 and higher --> + <table name="gtdbtk_database_versioned" comment_char="#"> + <columns>value, name, version, path</columns> + <file path="tool-data/gtdbtk_database_versioned.loc" /> </table> </tables>
