Repository 'data_manager_fetch_plasmidfinder'
hg clone https://eddie.galaxyproject.org/repos/pimarin/data_manager_fetch_plasmidfinder

Changeset 5:60cfd33bc2fb (2023-07-24)
Previous changeset 4:e05fd47bcca6 (2023-02-17) Next changeset 6:4e933a39094f (2023-08-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit 0a3992c5be846fc9f18b7ca18f0adcd78f5b9396-dirty
modified:
data_manager/macro.xml
data_manager/plasmidfinder_fetch_database.py
data_manager/plasmidfinder_fetch_database.xml
data_manager_conf.xml
test-data/plasmidfinder.loc.test
test-data/plasmidfinder_test_data_manager_2.1.json
removed:
test-data/plasmidfinder_test_data_manager_latest.json
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb data_manager/macro.xml
--- a/data_manager/macro.xml Fri Feb 17 14:07:54 2023 +0000
+++ b/data_manager/macro.xml Mon Jul 24 10:00:33 2023 +0000
b
@@ -1,13 +1,16 @@
 <macros>
     <token name="@TOOL_VERSION@">2.1.6</token>
-    <token name="@REQUESTS_VERSION@">2.27.1</token>
-    <token name="@PYTHON_VERSION@">3.8</token>
+    <token name="@GIT_PYTHON_VERSION@">3.1.31</token>
+    <token name="@GIT_VERSION@">2.34.1</token>
+    <token name="@GIT_DB@">4.0.10</token>
+    <token name="@PYTHON_VERSION@">3.11.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">21.05</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@PYTHON_VERSION@">python</requirement>
-            <requirement type="package" version="@REQUESTS_VERSION@">requests</requirement>
+            <requirement type="package" version="@GIT_PYTHON_VERSION@">gitpython</requirement>
+            <requirement type="package" version="@GIT_VERSION@">git</requirement>
         </requirements>
     </xml>
 </macros>
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb data_manager/plasmidfinder_fetch_database.py
--- a/data_manager/plasmidfinder_fetch_database.py Fri Feb 17 14:07:54 2023 +0000
+++ b/data_manager/plasmidfinder_fetch_database.py Mon Jul 24 10:00:33 2023 +0000
[
@@ -1,11 +1,11 @@
 import argparse
 import json
 import os
-import tarfile
-from datetime import datetime
+import time
 from pathlib import Path
 
-import requests
+
+import git
 
 
 class GetPlasmidfinderDataManager:
@@ -15,7 +15,7 @@
 
     def __init__(self,
                  plasmidfinder_database="plasmidfinder_database",
-                 db_name="plasmidfinder-db",
+                 db_name="plasmidfinder_database",
                  plasmidfinder_version="latest"):
         self.data_table_name = plasmidfinder_database
         self._db_name = db_name
@@ -68,12 +68,12 @@
 
     def __init__(self,
                  output_dir=Path.cwd(),
-                 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/",
-                 db_name="plasmidfinder-db",
+                 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/src/master",
+                 db_name="plasmidfinder_database",
                  db_tmp="tmp_database",
                  plasmidfinder_version="latest",
                  json_file_path=None,
-                 date_version=datetime.now().strftime("%Y-%m-%d")):
+                 date_version=None):
 
         super().__init__()
         self.json_file_path = json_file_path
@@ -86,86 +86,29 @@
         self._plasmidfinder_date_version = date_version
         self._commit_number = None
 
-    def extract_db_commit(self, request_header, title_name="content-disposition"):
-        """
-        Extract the commit if to add the information as identifier of the download
-        @request_header: a request object obtained from requests.get()
-        @title_name: the tag to search in the header of the requests object
-        return: the value of the commit
-        """
-        db_info = request_header.headers[title_name]
-        commit_number = db_info.split("-")[2].split(".")[0]
-        return commit_number
+    def git_clone(self):
+        git.Repo.clone_from(url=self._plasmidfinder_url, to_path=self._output_dir)
+        self._plasmidfinder_repository = git.Repo(path=self._output_dir)
 
-    def untar_files(self, file_path: Path, extracted_path_output: Path):
-        """
-        untar the download archive
-        @file_path: input path of the tar.gz file
-        @extracted_path_output: output path of the extract folder
-        return: the path of the output
-        """
-        try:
-            with file_path.open('rb') as fh_in, \
-                    tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
-                tar_file.extractall(path=extracted_path_output)
-                print(f'Untar the database in {extracted_path_output}')
-                return extracted_path_output
-        except OSError:
-            os.sys.exit(f'ERROR: Could not extract {file_path}')
+    def get_commit_number(self):
+        sha = self._plasmidfinder_repository.head.commit.hexsha
+        short_sha = self._plasmidfinder_repository.git.rev_parse(sha, short=7)
+        self._commit_number = short_sha
 
-    def choose_db_version(self):
-        """
-        Update the url link depending on the version choosen by user.
-        This method could be upgraded simply by adding the new versions
-        """
-        if self._plasmidfinder_version == "latest":
-            self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz"
-        elif self._plasmidfinder_version == "2.1":
-            self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz"
+    def get_commit_date(self):
+        self._plasmidfinder_date_version = time.strftime("%Y_%m_%d", time.gmtime(self._plasmidfinder_repository.head.commit.committed_date))
 
     def download_database(self):
         """
-        Download the plasmidfinder database using requests lib
-        Make the directory and temporary directory for download
-        Untar the download files
+        Download the plasmidfinder database using git lib
+        Extract commit and commit date
         """
         self._output_dir = Path(self._output_dir)
-        self.choose_db_version()
-        try:
-            request_info = requests.get(self._plasmidfinder_url)
-            request_info.raise_for_status()
-            self._commit_number = self.extract_db_commit(request_info)
-            output_tar_path = self._output_dir.joinpath(self._temporary_folder)
-            output_tar_path_file = output_tar_path.joinpath(self._db_name_tar)
-            output_path = self._output_dir.joinpath(self._db_name)
-            os.makedirs(output_tar_path)
-            os.makedirs(output_path)
-            with open(output_tar_path_file, 'wb') as output_dir:
-                output_dir.write(request_info.content)
-            untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name))
-            self.moove_download_files(source=untar_output, destination=output_path)
-        except requests.exceptions.HTTPError as http_error:
-            print(f"Requests Error: {http_error}")
-            print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}")
-
-    def moove_download_files(self, source, destination, expression_search="*"):
-        """
-        Clean downloaded data by mooving fasta files in the final folder
-        @older_path: previous path where the files are located
-        @new_path: final path where files will be mooved
-        @expression_search: keep only file with this expression
-        """
-        fasta_files = Path(source).rglob(expression_search)
-        file_list_paths = [file for file in fasta_files if file.is_file()]
-        [self.keep_filename(pathname=path, output_path=destination) for path in file_list_paths]
-
-    def keep_filename(self, pathname, output_path):
-        """
-        Moove files
-        @pathname: previous path
-        @output_path: final path
-        """
-        Path.replace(pathname, output_path.joinpath(pathname.name))
+        self.git_clone()
+        if self._plasmidfinder_version != "latest":
+            self._plasmidfinder_repository.git.checkout(self._plasmidfinder_version)
+        self.get_commit_number()
+        self.get_commit_date()
 
     def read_json_input_file(self):
         """
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb data_manager/plasmidfinder_fetch_database.xml
--- a/data_manager/plasmidfinder_fetch_database.xml Fri Feb 17 14:07:54 2023 +0000
+++ b/data_manager/plasmidfinder_fetch_database.xml Mon Jul 24 10:00:33 2023 +0000
[
@@ -1,4 +1,3 @@
-
 <tool id="data_manager_fetch_plasmidfinder" name="plasmidfinder_datamanager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>plasmidfinder database builder</description>
     <macros>
@@ -10,38 +9,30 @@
         python '$__tool_directory__/plasmidfinder_fetch_database.py'
         --db_version '$database_select'
         '$output_file'
+        && cat '$output_file'
       ]]></command>
     <inputs>
         <param name="database_select" type="select" label="Database version" help="Choose a database version to download (default latest version)">
             <option value="latest" selected="true">Latest available version</option>
             <option value="2.1">V2.1_2019-08-28</option>
-        </param>param>
+        </param>
     </inputs>
     <outputs>
         <data name="output_file" format="data_manager_json"/>
     </outputs>
     <tests>
-        <!-- Test_1 DB latest -->
+        <!-- Test_2 DB 2.1 -->
         <test expect_num_outputs="1">
-            <param name="database_select" value="latest"/>
-            <output name="output_file" value="plasmidfinder_test_data_manager_latest.json">
-                <assert_contents>
-                    <has_text_matching expression="\{&quot;data_tables&quot;\:\ {&quot;plasmidfinder_database&quot;\:\ \[\{&quot;date&quot;\:\ &quot;\d\d\d\d-\d\d-\d\d&quot;,\ &quot;name&quot;\:\ &quot;\w\w\w\w\w\w\w\w\w\w\w\w_\d\d\d\d-\d\d-\d\d&quot;,\ &quot;path&quot;\:\ &quot;plasmidfinder_database&quot;,\ &quot;value&quot;\:\ &quot;plasmidfinder_\w\w\w\w\w\w\w\w\w\w\w\w_\d\d\d\d-\d\d-\d\d&quot;\}\]\}\}"/>
-                </assert_contents>
-            </output>
-        </test>
-         <!-- Test_2 DB 2.1 -->
-        <test expect_num_outputs="1">
-            <param name="database_select" value="2.1"/>
-            <output name="output_file" value="plasmidfinder_test_data_manager_2.1.json">
-                <assert_contents>
-                    <has_text_matching expression="\{&quot;data_tables&quot;\:\ {&quot;plasmidfinder_database&quot;:\ \[\{&quot;date&quot;\:\ &quot;\d\d\d\d-\d\d-\d\d&quot;,\ &quot;name&quot;\:\ &quot;2.1_\d\d\d\d-\d\d-\d\d&quot;,\ &quot;path&quot;\:\ &quot;plasmidfinder_database&quot;,\ &quot;value&quot;\:\ &quot;plasmidfinder_1307168b1ce7_\d\d\d\d-\d\d-\d\d&quot;\}\]\}\}"/>
-                </assert_contents>
-            </output>
+           <param name="database_select" value="2.1"/>
+           <output name="output_file" value="plasmidfinder_test_data_manager_2.1.json">
+               <assert_contents>
+                   <has_text text="2019_08_28"/>
+               </assert_contents>
+           </output>
         </test>
     </tests>
     <help><![CDATA[
-        Download plasmidfinder database from the bitbucket repository
+        Download plasmidfinder database from the bitbucket repository: https://bitbucket.org/genomicepidemiology/plasmidfinder_db/src/master/
     ]]></help>
     <citations>
         <citation type="doi">10.1007/978-1-4939-9877-7_20</citation>
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb data_manager_conf.xml
--- a/data_manager_conf.xml Fri Feb 17 14:07:54 2023 +0000
+++ b/data_manager_conf.xml Mon Jul 24 10:00:33 2023 +0000
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/plasmidfinder_fetch_database.xml" id="plasmidfinder_fetch_database" version="@TOOL_VERSION@">
+    <data_manager tool_file="data_manager/plasmidfinder_fetch_database.xml" id="plasmidfinder_fetch_database">
         <data_table name="plasmidfinder_database">
             <output>
                 <column name="value" />
@@ -9,9 +9,9 @@
                 <column name="path" output_ref="output_file">
                     <move type="directory" relativize_symlinks="True">
                         <source>${path}</source>
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">plasmidfinder-db/${name}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">plasmidfinder_database/${name}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/plasmidfinder-db/${name}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/plasmidfinder_database/${name}</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb test-data/plasmidfinder.loc.test
--- a/test-data/plasmidfinder.loc.test Fri Feb 17 14:07:54 2023 +0000
+++ b/test-data/plasmidfinder.loc.test Mon Jul 24 10:00:33 2023 +0000
b
@@ -1,10 +1,9 @@
-plasmidfinder_c18e08c17a59_2023-02-15 c18e08c17a59_2023-02-15 2023-02-15 plasmidfinder_database
-plasmidfinder_1307168b1ce7_2023-02-15 2.1_2023-02-15 2023-02-15 plasmidfinder_database
-plasmidfinder_c18e08c17a59_2023-02-15 c18e08c17a59_2023-02-15 2023-02-15 /tmp/tmp19jlq5v4/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-15
-plasmidfinder_1307168b1ce7_2023-02-15 2.1_2023-02-15 2023-02-15 /tmp/tmp19jlq5v4/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-15
-plasmidfinder_c18e08c17a59_2023-02-17 c18e08c17a59_2023-02-17 2023-02-17 /tmp/tmpkf9b8oam/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-17
-plasmidfinder_1307168b1ce7_2023-02-17 2.1_2023-02-17 2023-02-17 /tmp/tmpkf9b8oam/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-17
-plasmidfinder_c18e08c17a59_2023-02-17 c18e08c17a59_2023-02-17 2023-02-17 /tmp/tmpxhdyhsl6/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-17
-plasmidfinder_1307168b1ce7_2023-02-17 2.1_2023-02-17 2023-02-17 /tmp/tmpxhdyhsl6/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-17
-plasmidfinder_c18e08c17a59_2023-02-17 c18e08c17a59_2023-02-17 2023-02-17 /tmp/tmpt_c58x8j/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-17
-plasmidfinder_1307168b1ce7_2023-02-17 2.1_2023-02-17 2023-02-17 /tmp/tmpt_c58x8j/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-17
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmptllq5k0r/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmpnshbzr8f/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmpgfqg6r8x/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmpam2pha1m/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmp_tzl62ye/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmpu0batsz4/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmp72f81ayd/galaxy-dev/tool-data/plasmidfinder-db/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmp73run5j6/galaxy-dev/tool-data/plasmidfinder_database/2.1_2019_08_28
+plasmidfinder_1307168_2019_08_28 2.1_2019_08_28 2019_08_28 /tmp/tmpngljkg08/galaxy-dev/tool-data/plasmidfinder_database/2.1_2019_08_28
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb test-data/plasmidfinder_test_data_manager_2.1.json
--- a/test-data/plasmidfinder_test_data_manager_2.1.json Fri Feb 17 14:07:54 2023 +0000
+++ b/test-data/plasmidfinder_test_data_manager_2.1.json Mon Jul 24 10:00:33 2023 +0000
[
@@ -1,1 +1,1 @@
-{"data_tables": {"plasmidfinder_database": [{"date": "2023-02-15", "name": "2.1_2023-02-15", "path": "plasmidfinder_database", "value": "plasmidfinder_1307168b1ce7_2023-02-15"}]}}
\ No newline at end of file
+{"data_tables": {"plasmidfinder_database": [{"date": "2019_08_28", "name": "2.1_2019_08_28", "path": "plasmidfinder_database", "value": "plasmidfinder_1307168_2019_08_28"}]}}
\ No newline at end of file
b
diff -r e05fd47bcca6 -r 60cfd33bc2fb test-data/plasmidfinder_test_data_manager_latest.json
--- a/test-data/plasmidfinder_test_data_manager_latest.json Fri Feb 17 14:07:54 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,1 +0,0 @@
-{"data_tables": {"plasmidfinder_database": [{"date": "2023-02-15", "name": "c18e08c17a59_2023-02-15", "path": "plasmidfinder_database", "value": "plasmidfinder_c18e08c17a59_2023-02-15"}]}}
\ No newline at end of file