changeset 0:2ea34d422df4 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_sylph_database commit 6ac2834ca0a21512c9986e2fb8d0dcda1c4b0e18
author bgruening
date Fri, 23 May 2025 09:47:40 +0000
parents
children
files data_manager/data_manager_sylph_database.xml data_manager_conf.xml test-data/sylph_databases.loc.test tool-data/sylph_databases.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 6 files changed, 221 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_sylph_database.xml	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,165 @@
+<tool id="data_manager_sylph_database" name="Download pre-built sylph databases" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="@PROFILE@">
+    <description></description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.8.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">23.0</token>
+        <token name="@DB_VERSION@">1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">sylph</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+#set $file = str($db_link).split('/')[-1]
+
+mkdir -p '$out_file.extra_files_path' &&
+wget $db_link &&
+mv '$file' '$out_file.extra_files_path' &&
+cp '$dmjson' '$out_file'
+    ]]></command>
+    <configfiles>
+        <configfile name="dmjson"><![CDATA[
+#from datetime import date
+#set $file = str($db_link).split('/')[-1]
+
+{
+    "data_tables":{
+    "sylph_databases":[
+    {
+        "value": "sylph_downloaded_#echo date.today().strftime('%d%m%Y')#_${file}",
+        "name": "sylph database ${file}",
+        "path": "${file}",
+        "clade": "${clades}",
+        "sylph_tax_identifier": "${name}",
+        "version": "@DB_VERSION@"
+    }
+    ]
+}
+        }]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="db_type">
+            <param argument="clades" type="select" label="Type of Databases">
+                <option value="prokaryote">Prokaryote databases</option>
+                <option value="eukaryote">Eukaryote databases</option>
+                <option value="virus">Virus databases</option>
+            </param>
+            <when value="prokaryote">
+                <conditional name="db_name">
+                    <param argument="name" type="select" label="Database">
+                        <option value="GTDB_r220">GTDB r220</option>
+                        <option value="GTDB_r214">GTDB r214</option>
+                        <option value="OceanDNA">Ocean DNA</option>
+                        <option value="SoilSMAG">Soil MAGs</option>
+                    </param>
+                    <when value="GTDB_r220">
+                        <param name="db_link" type="select" label="Sylph prokaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/gtdb-r220-c200-dbv1.syldb" selected="true">GTDB-r220 (April 2024) -c 200, more sensitive database</option>
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/gtdb-r220-c1000-dbv1.syldb">GTDB-r220 (April 2024) -c 1000 more efficient, less sensitive database</option>
+                        </param>
+                    </when>
+                    <when value="GTDB_r214">
+                        <param name="db_link" type="select" label="Sylph prokaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/v0.3-c200-gtdb-r214.syldb">GTDB-r214 (April 2023) -c 200, more sensitive database</option>
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/v0.3-c1000-gtdb-r214.syldb">GTDB-r214 (April 2023) -c 1000 more efficient, less sensitive database</option>
+                        </param>
+                    </when>
+                    <when value="OceanDNA">
+                        <param name="db_link" type="select" label="Sylph prokaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/OceanDNA-c200-v0.3.syldb">OceanDNA - ocean MAGs from Nishimura and Yoshizawa, -c 200, more sensitive database</option>
+                        </param>
+                    </when>
+                    <when value="SoilSMAG">
+                        <param name="db_link" type="select" label="Sylph prokaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/SMAG-c200-v0.3.syldb">SoilSMAG (SMAG) from Ma et al. -c 200, more sensitive database</option>        
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+            <when value="eukaryote">
+                <conditional name="db_name">
+                    <param argument="name" type="select" label="Database">
+                        <option value="FungiRefSeq-2024-07-25">Refseq fungi 2024-07-25</option>
+                        <option value="TaraEukaryoticSMAG">TARA eukaryotic SMAGs</option>
+                    </param>
+                    <when value="FungiRefSeq-2024-07-25">
+                        <param name="db_link" type="select" label="Sylph eukaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/fungi-refseq-2024-07-25-c200-v0.3.syldb" selected="true">FungiRefSeq-2024-07-25 - Refseq fungi representative genomes collected on 2024-07-25</option>
+                        </param>
+                    </when>
+                    <when value="TaraEukaryoticSMAG">
+                        <param name="db_link" type="select" label="Sylph eukaryote databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/tara-eukmags-c200-v0.3.syldb">TaraEukaryoticSMAG - TARA eukaryotic SMAGs from Delmont et al. -c 200, more sensitive database</option>   
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+            <when value="virus">
+                <conditional name="db_name">
+                    <param argument="name" type="select" label="Database">
+                        <option value="IMGVR_4.1">IMG/VR 4.1</option>
+                    </param>
+                    <when value="IMGVR_4.1">
+                        <param name="db_link" type="select" label="Sylph virus databases">
+                            <option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/imgvr_c200_v0.3.0.syldb" selected="true">IMGVR_4.1 high-confidence viral OTU genomes, -c 200, more sensitive database</option>
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" label="${tool.name}"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="db_type">
+                <param name="clades" value="eukaryote"/>
+                <conditional name="db_name">
+                    <param name="name" value="FungiRefSeq-2024-07-25"/>
+                    <param name="db_link" value="http://faust.compbio.cs.cmu.edu/sylph-stuff/fungi-refseq-2024-07-25-c200-v0.3.syldb"/>
+                </conditional>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text='"sylph_databases":'/>
+                    <has_text_matching expression='"value": "sylph_downloaded_[0-9]{8}_fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
+                    <has_text text='"name": "sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
+                    <has_text_matching expression='"path": "fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
+                    <has_text text='"clade": "eukaryote"'/>
+                    <has_text text='"sylph_tax_identifier": "FungiRefSeq-2024-07-25"'/>
+                    <has_text text='"version": "@DB_VERSION@"'/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="db_type">
+                <param name="clades" value="prokaryote"/>
+                <conditional name="db_name">
+                    <param name="name" value="OceanDNA"/>
+                    <param name="db_link" value="http://faust.compbio.cs.cmu.edu/sylph-stuff/OceanDNA-c200-v0.3.syldb"/>
+                </conditional>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text='"sylph_databases":'/>
+                    <has_text_matching expression='"value": "sylph_downloaded_[0-9]{8}_OceanDNA-c200-v0.3.syldb"'/>
+                    <has_text text='"name": "sylph database OceanDNA-c200-v0.3.syldb"'/>
+                    <has_text_matching expression='"path": "OceanDNA-c200-v0.3.syldb"'/>
+                    <has_text text='"clade": "prokaryote"'/>
+                    <has_text text='"sylph_tax_identifier": "OceanDNA"'/>
+                    <has_text text='"version": "@DB_VERSION@"'/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+This tool downloads pre-built sylph databases.
+-c: the compression parameter. Memory/runtime scale like 1/c; higher c is faster but less sensitive at low coverage.
+Default c = 200. The -c for genomes must be than sup or = the -c for reads (strict sup is allowed)
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/s41587-024-02412-y</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_sylph_database.xml" id="data_manager_sylph_database">
+        <data_table name="sylph_databases">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value"/>  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name"/>  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file">
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">sylph/${path}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/sylph/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+                <column name="clade"/>
+                <column name="sylph_tax_identifier"/>
+                <column name="version"/>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sylph_databases.loc.test	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,12 @@
+#This is a tab separated file describing the location of StarAMR databases 
+#used for the StarAMR tool
+#
+#file has this format (white space characters are TAB characters)
+#
+#The columns are:
+#value	name	path    version
+#
+#For example
+#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502	ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004)	/path/to/data
+sylph_downloaded_22052025_fungi-refseq-2024-07-25-c200-v0.3.syldb	sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb	/tmp/tmpwo1n9334/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb	eukaryote	FungiRefSeq-2024-07-25	1
+sylph_downloaded_22052025_OceanDNA-c200-v0.3.syldb	sylph database OceanDNA-c200-v0.3.syldb	/tmp/tmpwo1n9334/galaxy-dev/tool-data/sylph/OceanDNA-c200-v0.3.syldb	prokaryote	OceanDNA	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/sylph_databases.loc.sample	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,10 @@
+#This is a tab separated file describing the location of StarAMR databases 
+#used for the StarAMR tool
+#
+#file has this format (white space characters are TAB characters)
+#
+#The columns are:
+#value	name	path    version
+#
+#For example
+#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502	ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004)	/path/to/data
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="sylph_databases" comment_char="#">
+        <columns>value, name, path, clade, sylph_tax_identifier, version</columns>
+        <file path="tool-data/sylph_databases.loc"/>
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri May 23 09:47:40 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="sylph_databases" comment_char="#">
+        <columns>value, name, path, clade, sylph_tax_identifier, version</columns>
+        <file path="${__HERE__}/test-data/sylph_databases.loc.test"/>
+    </table>
+</tables>
\ No newline at end of file