Repository 'data_manager_fetch_plasmidfinder'
hg clone https://eddie.galaxyproject.org/repos/pimarin/data_manager_fetch_plasmidfinder

Changeset 0:8ad77a9dc91d (2023-02-14)
Next changeset 1:9ef108e34063 (2023-02-15)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit f3383fdb9a17d1b69d05547cdb96534a5f762bec-dirty
added:
data_manager/macro.xml
data_manager/plasmidfinder_fetch_database.py
data_manager/plasmidfinder_fetch_database.xml
data_manager_conf.xml
plasmidfinder_database.loc
test-data/plasmidfinder.loc.test
test-data/plasmidfinder_test_data_manager_2.1.json
tool-data/plasmidfinder_database.loc
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 8ad77a9dc91d data_manager/macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macro.xml Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,13 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.1.6</token>
+    <token name="@REQUESTS_VERSION@">2.27.1</token>
+    <token name="@PYTHON_VERSION@">3.8</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@PYTHON_VERSION@">python</requirement>
+            <requirement type="package" version="@REQUESTS_VERSION@">requests</requirement>
+        </requirements>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 8ad77a9dc91d data_manager/plasmidfinder_fetch_database.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/plasmidfinder_fetch_database.py Tue Feb 14 14:25:43 2023 +0000
[
b'@@ -0,0 +1,212 @@\n+import argparse\n+import json\n+import os\n+import tarfile\n+from datetime import datetime\n+from pathlib import Path\n+\n+import requests\n+\n+\n+class GetPlasmidfinderDataManager:\n+    """\n+    Create the json file with database information for galaxy data manager\n+    """\n+\n+    def __init__(self,\n+                 plasmidfinder_database="plasmidfinder_database",\n+                 db_name="plasmidfinder-db",\n+                 plasmidfinder_version="latest"):\n+        self.data_table_name = plasmidfinder_database\n+        self._db_name = db_name\n+        self._plasmidfinder_version = plasmidfinder_version\n+        self._plasmidfinder_date_version = None\n+        self.data_table_entry = None\n+        self.plasmidfinder_table_list = None\n+        self._commit_number = None\n+\n+    def get_data_table_format(self):\n+        """\n+        Skeleton of a data_table format\n+        return: a data table formatted for json output\n+        """\n+        self.data_table_entry = {\n+            "data_tables": {\n+                self.data_table_name: {}\n+            }\n+        }\n+        return self.data_table_entry\n+\n+    def get_data_manager(self):\n+        """\n+        Create the empty data table format and add all the information into\n+        Commit number is added if latest is required instead of version number\n+        return: The data table with database information\n+        """\n+        self.plasmidfinder_table_list = self.get_data_table_format()\n+        if self._plasmidfinder_version == "latest":\n+            version_value = self._commit_number\n+        else:\n+            version_value = self._plasmidfinder_version\n+        plasmidfinder_value = f"plasmidfinder_{self._commit_number}" \\\n+                              f"_{self._plasmidfinder_date_version}"\n+        plasmidfinder_name = f"{version_value}" \\\n+                             f"_{self._plasmidfinder_date_version}"\n+        data_info = dict(value=plasmidfinder_value,\n+                         name=plasmidfinder_name,\n+                         date=self._plasmidfinder_date_version,\n+                         path=self._db_name)\n+        self.plasmidfinder_table_list["data_tables"][self.data_table_name] = [data_info]\n+        return self.plasmidfinder_table_list\n+\n+\n+class DownloadPlasmidfinderDatabase(GetPlasmidfinderDataManager):\n+    """\n+    Download the plasmidfinder database from the bitbucket repository.\n+    Build the data manager info for galaxy\n+    """\n+\n+    def __init__(self,\n+                 output_dir=Path.cwd(),\n+                 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/",\n+                 db_name="plasmidfinder-db",\n+                 db_tmp="tmp_database",\n+                 plasmidfinder_version="latest",\n+                 json_file_path=None,\n+                 date_version=datetime.now().strftime("%Y-%m-%d")):\n+\n+        super().__init__()\n+        self.json_file_path = json_file_path\n+        self._output_dir = output_dir\n+        self._plasmidfinder_url = plasmidfinder_url\n+        self._temporary_folder = db_tmp\n+        self._db_name = db_name\n+        self._db_name_tar = f\'{db_name}.gz\'\n+        self._plasmidfinder_version = plasmidfinder_version\n+        self._plasmidfinder_date_version = date_version\n+        self._commit_number = None\n+\n+    def extract_db_commit(self, request_header, title_name="content-disposition"):\n+        """\n+        Extract the commit if to add the information as identifier of the download\n+        @request_header: a request object obtained from requests.get()\n+        @title_name: the tag to search in the header of the requests object\n+        return: the value of the commit\n+        """\n+        db_info = request_header.headers[title_name]\n+        commit_number = db_info.split("-")[2].split(".")[0]\n+        return commit_number\n+\n+    def untar_files(self, file_path: Path, extracted_path_output: Path):\n+        """\n+        untar the download archive\n+        @file_path: i'..b' the version choosen by user.\n+        This method could be upgraded simply by adding the new versions\n+        """\n+        if self._plasmidfinder_version == "latest":\n+            self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz"\n+        elif self._plasmidfinder_version == "2.1":\n+            self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz"\n+\n+    def download_database(self):\n+        """\n+        Download the plasmidfinder database using requests lib\n+        Make the directory and temporary directory for download\n+        Untar the download files\n+        """\n+        self._output_dir = Path(self._output_dir)\n+        self.choose_db_version()\n+        try:\n+            request_info = requests.get(self._plasmidfinder_url)\n+            request_info.raise_for_status()\n+            self._commit_number = self.extract_db_commit(request_info)\n+            output_tar_path = self._output_dir.joinpath(self._temporary_folder)\n+            output_tar_path_file = output_tar_path.joinpath(self._db_name_tar)\n+            output_path = self._output_dir.joinpath(self._db_name)\n+            os.makedirs(output_tar_path)\n+            os.makedirs(output_path)\n+            with open(output_tar_path_file, \'wb\') as output_dir:\n+                output_dir.write(request_info.content)\n+            untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name))\n+\n+            self.moove_download_files(older_path=untar_output, new_path=output_path)\n+        except requests.exceptions.HTTPError as http_error:\n+            print(f"Requests Error: {http_error}")\n+            print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}")\n+\n+    def moove_download_files(self, older_path, new_path, expression_search="*fsa"):\n+        """\n+        Clean downloaded data by mooving fasta files in the final folder\n+        @older_path: previous path where the files are located\n+        @new_path: final path where files will be mooved\n+        @expression_search: keep only file with this expression\n+        """\n+        fasta_files = Path(older_path).rglob(expression_search)\n+        file_list_paths = [file for file in fasta_files if file.is_file()]\n+        [self.keep_filename(pathname=path, output_path=new_path) for path in file_list_paths]\n+\n+    def keep_filename(self, pathname, output_path):\n+        """\n+        Moove files\n+        @pathname: previous path\n+        @output_path: final path\n+        """\n+        Path.replace(pathname, output_path.joinpath(pathname.name))\n+\n+    def read_json_input_file(self):\n+        """\n+        Import the json file\n+        """\n+        with open(self.json_file_path) as fh:\n+            params = json.load(fh)\n+        target_dir = params[\'output_data\'][0][\'extra_files_path\']\n+        os.makedirs(target_dir)\n+        self._output_dir = target_dir\n+\n+    def write_json_infos(self):\n+        """\n+        Write in the imported json file\n+        """\n+        with open(self.json_file_path, \'w\') as fh:\n+            json.dump(self.get_data_manager(), fh, sort_keys=True)\n+\n+\n+def parse_arguments():\n+    """\n+    List of arguments provided by the user\n+    return: parsed arguments\n+    """\n+    # parse options and arguments\n+    arg_parser = argparse.ArgumentParser()\n+    arg_parser.add_argument("data_manager_json",\n+                            help="json file from galaxy")\n+    arg_parser.add_argument("-v", "--db_version",\n+                            help="version of the plasmidfinder (latest or 2.1)")\n+    return arg_parser.parse_args()\n+\n+\n+def main():\n+    all_args = parse_arguments()\n+    plasmidfinder_download = DownloadPlasmidfinderDatabase(json_file_path=all_args.data_manager_json, plasmidfinder_version=all_args.db_version)\n+    plasmidfinder_download.read_json_input_file()\n+    plasmidfinder_download.download_database()\n+    plasmidfinder_download.write_json_infos()\n+\n+\n+if __name__ == \'__main__\':\n+    main()\n'
b
diff -r 000000000000 -r 8ad77a9dc91d data_manager/plasmidfinder_fetch_database.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/plasmidfinder_fetch_database.xml Tue Feb 14 14:25:43 2023 +0000
[
@@ -0,0 +1,48 @@
+<tool id="data_manager_fetch_plasmidfinder" name="plasmidfinder_datamanager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>plasmidfinder database builder</description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+      <![CDATA[
+        python '$__tool_directory__/plasmidfinder_fetch_database.py'
+        --db_version '$database_select'
+        '$output_file'
+      ]]></command>
+    <inputs>
+        <param name="database_select" type="select" label="Database version" help="Choose a database version to download (default latest version)">
+            <option value="latest" selected="true">Latest available version</option>
+            <option value="2.1">V2.1_2019-08-28</option>
+        </param>param>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <!-- Test_1 DB latest -->
+        <test expect_num_outputs="1">
+            <param name="database_select" value="latest"/>
+            <output name="output_file" value="plasmidfinder_test_data_manager_latest.json">
+                <assert_contents>
+                    <has_text_matching expression='{"data_tables": {"plasmidfinder_database": [{"date": "\d\d\d\d-\d\d-\d\d", "name": "b1c9ddac0e61_\d\d\d\d-\d\d-\d\d", "path": "plasmidfinder-db", "value": "plasmidfinder_b1c9ddac0e61_\d\d\d\d-\d\d-\d\d"' />
+                </assert_contents>
+            </output>
+        </test>
+         <!-- Test_2 DB 2.1 -->
+        <test expect_num_outputs="1">
+            <param name="database_select" value="2.1"/>
+            <output name="output_file" value="plasmidfinder_test_data_manager_2.1.json">
+                <assert_contents>
+                    <has_text_matching expression='"data_tables": \{"plasmidfinder_database": \[\{"date": "\d\d\d\\d-\d\d-\d\d", "name": "2\.1_\d\d\d\d-\d\d-\d\d", "path": "plasmidfinder-db", "value": "plasmidfinder_1307168b1ce7_\d\d\d\d-\d\d-\d\d"' />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Download plasmidfinder database from the bitbucket repository
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1007/978-1-4939-9877-7_20</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 8ad77a9dc91d data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/plasmidfinder_fetch_database.xml" id="plasmidfinder_fetch_database" version="@TOOL_VERSION@">
+        <data_table name="plasmidfinder_database">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="date" />
+                <column name="path" output_ref="output_file">
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">plasmidfinder-db/${name}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/plasmidfinder-db/${name}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
b
diff -r 000000000000 -r 8ad77a9dc91d plasmidfinder_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plasmidfinder_database.loc Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of plasmidfinder database
+# The name was obtained by merging date of download and commit number
+# the columns are:
+# value, name, date, path
+#
+# for example
+#plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+#plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
b
diff -r 000000000000 -r 8ad77a9dc91d test-data/plasmidfinder.loc.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plasmidfinder.loc.test Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,12 @@
+plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmp9g7s9mjt/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmp9g7s9mjt/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmpdy82xyjr/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmpdy82xyjr/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmp6_mld4pi/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmp6_mld4pi/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-02-07 c18e08c17a59_2023-02-07 2023-02-07 /tmp/tmp70ecz0oo/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-07
+plasmidfinder_1307168b1ce7_2023-02-07 2.1_2023-02-07 2023-02-07 /tmp/tmp70ecz0oo/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-07
+plasmidfinder_c18e08c17a59_2023-02-14 c18e08c17a59_2023-02-14 2023-02-14 /tmp/tmpvbz0fwsy/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-14
+plasmidfinder_1307168b1ce7_2023-02-14 2.1_2023-02-14 2023-02-14 /tmp/tmpvbz0fwsy/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-14
b
diff -r 000000000000 -r 8ad77a9dc91d test-data/plasmidfinder_test_data_manager_2.1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plasmidfinder_test_data_manager_2.1.json Tue Feb 14 14:25:43 2023 +0000
[
@@ -0,0 +1,1 @@
+{"data_tables": {"plasmidfinder_database": [{"date": "2023-01-27", "name": "2.1_2023-01-27", "path": "plasmidfinder-db", "value": "plasmidfinder_1307168b1ce7_2023-01-27"}]}}
\ No newline at end of file
b
diff -r 000000000000 -r 8ad77a9dc91d tool-data/plasmidfinder_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/plasmidfinder_database.loc Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of plasmidfinder database
+# The name was obtained by merging date of download and commit number
+# the columns are:
+# value, name, date, path
+#
+# for example
+#plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+#plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
b
diff -r 000000000000 -r 8ad77a9dc91d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,8 @@
+
+<tables>
+    <!-- Locations of plasmidfinder database in the required format -->
+    <table name="plasmidfinder_database" comment_char="#">
+        <columns>value, name, date, path</columns>
+        <file path="tool-data/plasmidfinder.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 8ad77a9dc91d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Feb 14 14:25:43 2023 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of plasmidfinder database in the required format -->
+    <table name="plasmidfinder_database" comment_char="#">
+        <columns>value, name, date,  path</columns>
+        <file path="${__HERE__}/test-data/plasmidfinder.loc.test"/>
+    </table>
+</tables>