# HG changeset patch
# User pimarin
# Date 1676384743 0
# Node ID 8ad77a9dc91d5d37aa1657452a3289faa33f6698
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit f3383fdb9a17d1b69d05547cdb96534a5f762bec-dirty
diff -r 000000000000 -r 8ad77a9dc91d data_manager/macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macro.xml Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,13 @@
+
+ 2.1.6
+ 2.27.1
+ 3.8
+ 0
+ 21.05
+
+
+ python
+ requests
+
+
+
diff -r 000000000000 -r 8ad77a9dc91d data_manager/plasmidfinder_fetch_database.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/plasmidfinder_fetch_database.py Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,212 @@
+import argparse
+import json
+import os
+import tarfile
+from datetime import datetime
+from pathlib import Path
+
+import requests
+
+
+class GetPlasmidfinderDataManager:
+ """
+ Create the json file with database information for galaxy data manager
+ """
+
+ def __init__(self,
+ plasmidfinder_database="plasmidfinder_database",
+ db_name="plasmidfinder-db",
+ plasmidfinder_version="latest"):
+ self.data_table_name = plasmidfinder_database
+ self._db_name = db_name
+ self._plasmidfinder_version = plasmidfinder_version
+ self._plasmidfinder_date_version = None
+ self.data_table_entry = None
+ self.plasmidfinder_table_list = None
+ self._commit_number = None
+
+ def get_data_table_format(self):
+ """
+ Skeleton of a data_table format
+ return: a data table formatted for json output
+ """
+ self.data_table_entry = {
+ "data_tables": {
+ self.data_table_name: {}
+ }
+ }
+ return self.data_table_entry
+
+ def get_data_manager(self):
+ """
+ Create the empty data table format and add all the information into
+ Commit number is added if latest is required instead of version number
+ return: The data table with database information
+ """
+ self.plasmidfinder_table_list = self.get_data_table_format()
+ if self._plasmidfinder_version == "latest":
+ version_value = self._commit_number
+ else:
+ version_value = self._plasmidfinder_version
+ plasmidfinder_value = f"plasmidfinder_{self._commit_number}" \
+ f"_{self._plasmidfinder_date_version}"
+ plasmidfinder_name = f"{version_value}" \
+ f"_{self._plasmidfinder_date_version}"
+ data_info = dict(value=plasmidfinder_value,
+ name=plasmidfinder_name,
+ date=self._plasmidfinder_date_version,
+ path=self._db_name)
+ self.plasmidfinder_table_list["data_tables"][self.data_table_name] = [data_info]
+ return self.plasmidfinder_table_list
+
+
+class DownloadPlasmidfinderDatabase(GetPlasmidfinderDataManager):
+ """
+ Download the plasmidfinder database from the bitbucket repository.
+ Build the data manager info for galaxy
+ """
+
+ def __init__(self,
+ output_dir=Path.cwd(),
+ plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/",
+ db_name="plasmidfinder-db",
+ db_tmp="tmp_database",
+ plasmidfinder_version="latest",
+ json_file_path=None,
+ date_version=datetime.now().strftime("%Y-%m-%d")):
+
+ super().__init__()
+ self.json_file_path = json_file_path
+ self._output_dir = output_dir
+ self._plasmidfinder_url = plasmidfinder_url
+ self._temporary_folder = db_tmp
+ self._db_name = db_name
+ self._db_name_tar = f'{db_name}.gz'
+ self._plasmidfinder_version = plasmidfinder_version
+ self._plasmidfinder_date_version = date_version
+ self._commit_number = None
+
+ def extract_db_commit(self, request_header, title_name="content-disposition"):
+ """
+ Extract the commit if to add the information as identifier of the download
+ @request_header: a request object obtained from requests.get()
+ @title_name: the tag to search in the header of the requests object
+ return: the value of the commit
+ """
+ db_info = request_header.headers[title_name]
+ commit_number = db_info.split("-")[2].split(".")[0]
+ return commit_number
+
+ def untar_files(self, file_path: Path, extracted_path_output: Path):
+ """
+ untar the download archive
+ @file_path: input path of the tar.gz file
+ @extracted_path_output: output path of the extract folder
+ return: the path of the output
+ """
+ try:
+ with file_path.open('rb') as fh_in, \
+ tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
+ tar_file.extractall(path=extracted_path_output)
+ print(f'Untar the database in {extracted_path_output}')
+ return extracted_path_output
+ except OSError:
+ os.sys.exit(f'ERROR: Could not extract {file_path}')
+
+ def choose_db_version(self):
+ """
+ Update the url link depending on the version choosen by user.
+ This method could be upgraded simply by adding the new versions
+ """
+ if self._plasmidfinder_version == "latest":
+ self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz"
+ elif self._plasmidfinder_version == "2.1":
+ self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz"
+
+ def download_database(self):
+ """
+ Download the plasmidfinder database using requests lib
+ Make the directory and temporary directory for download
+ Untar the download files
+ """
+ self._output_dir = Path(self._output_dir)
+ self.choose_db_version()
+ try:
+ request_info = requests.get(self._plasmidfinder_url)
+ request_info.raise_for_status()
+ self._commit_number = self.extract_db_commit(request_info)
+ output_tar_path = self._output_dir.joinpath(self._temporary_folder)
+ output_tar_path_file = output_tar_path.joinpath(self._db_name_tar)
+ output_path = self._output_dir.joinpath(self._db_name)
+ os.makedirs(output_tar_path)
+ os.makedirs(output_path)
+ with open(output_tar_path_file, 'wb') as output_dir:
+ output_dir.write(request_info.content)
+ untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name))
+
+ self.moove_download_files(older_path=untar_output, new_path=output_path)
+ except requests.exceptions.HTTPError as http_error:
+ print(f"Requests Error: {http_error}")
+ print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}")
+
+ def moove_download_files(self, older_path, new_path, expression_search="*fsa"):
+ """
+ Clean downloaded data by mooving fasta files in the final folder
+ @older_path: previous path where the files are located
+ @new_path: final path where files will be mooved
+ @expression_search: keep only file with this expression
+ """
+ fasta_files = Path(older_path).rglob(expression_search)
+ file_list_paths = [file for file in fasta_files if file.is_file()]
+ [self.keep_filename(pathname=path, output_path=new_path) for path in file_list_paths]
+
+ def keep_filename(self, pathname, output_path):
+ """
+ Moove files
+ @pathname: previous path
+ @output_path: final path
+ """
+ Path.replace(pathname, output_path.joinpath(pathname.name))
+
+ def read_json_input_file(self):
+ """
+ Import the json file
+ """
+ with open(self.json_file_path) as fh:
+ params = json.load(fh)
+ target_dir = params['output_data'][0]['extra_files_path']
+ os.makedirs(target_dir)
+ self._output_dir = target_dir
+
+ def write_json_infos(self):
+ """
+ Write in the imported json file
+ """
+ with open(self.json_file_path, 'w') as fh:
+ json.dump(self.get_data_manager(), fh, sort_keys=True)
+
+
+def parse_arguments():
+ """
+ List of arguments provided by the user
+ return: parsed arguments
+ """
+ # parse options and arguments
+ arg_parser = argparse.ArgumentParser()
+ arg_parser.add_argument("data_manager_json",
+ help="json file from galaxy")
+ arg_parser.add_argument("-v", "--db_version",
+ help="version of the plasmidfinder (latest or 2.1)")
+ return arg_parser.parse_args()
+
+
+def main():
+ all_args = parse_arguments()
+ plasmidfinder_download = DownloadPlasmidfinderDatabase(json_file_path=all_args.data_manager_json, plasmidfinder_version=all_args.db_version)
+ plasmidfinder_download.read_json_input_file()
+ plasmidfinder_download.download_database()
+ plasmidfinder_download.write_json_infos()
+
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r 8ad77a9dc91d data_manager/plasmidfinder_fetch_database.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/plasmidfinder_fetch_database.xml Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,48 @@
+
+ plasmidfinder database builder
+
+ macro.xml
+
+
+
+
+
+
+
+
+ param>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1007/978-1-4939-9877-7_20
+
+
diff -r 000000000000 -r 8ad77a9dc91d data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 8ad77a9dc91d plasmidfinder_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plasmidfinder_database.loc Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of plasmidfinder database
+# The name was obtained by merging date of download and commit number
+# the columns are:
+# value, name, date, path
+#
+# for example
+#plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+#plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
diff -r 000000000000 -r 8ad77a9dc91d test-data/plasmidfinder.loc.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plasmidfinder.loc.test Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,12 @@
+plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmp9g7s9mjt/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmp9g7s9mjt/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmpdy82xyjr/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmpdy82xyjr/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-01-27 c18e08c17a59_2023-01-27 2023-01-27 /tmp/tmp6_mld4pi/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-01-27
+plasmidfinder_1307168b1ce7_2023-01-27 2.1_2023-01-27 2023-01-27 /tmp/tmp6_mld4pi/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-01-27
+plasmidfinder_c18e08c17a59_2023-02-07 c18e08c17a59_2023-02-07 2023-02-07 /tmp/tmp70ecz0oo/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-07
+plasmidfinder_1307168b1ce7_2023-02-07 2.1_2023-02-07 2023-02-07 /tmp/tmp70ecz0oo/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-07
+plasmidfinder_c18e08c17a59_2023-02-14 c18e08c17a59_2023-02-14 2023-02-14 /tmp/tmpvbz0fwsy/galaxy-dev/tool-data/plasmidfinder-db/c18e08c17a59_2023-02-14
+plasmidfinder_1307168b1ce7_2023-02-14 2.1_2023-02-14 2023-02-14 /tmp/tmpvbz0fwsy/galaxy-dev/tool-data/plasmidfinder-db/2.1_2023-02-14
diff -r 000000000000 -r 8ad77a9dc91d test-data/plasmidfinder_test_data_manager_2.1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plasmidfinder_test_data_manager_2.1.json Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"plasmidfinder_database": [{"date": "2023-01-27", "name": "2.1_2023-01-27", "path": "plasmidfinder-db", "value": "plasmidfinder_1307168b1ce7_2023-01-27"}]}}
\ No newline at end of file
diff -r 000000000000 -r 8ad77a9dc91d tool-data/plasmidfinder_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/plasmidfinder_database.loc Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of plasmidfinder database
+# The name was obtained by merging date of download and commit number
+# the columns are:
+# value, name, date, path
+#
+# for example
+#plasmidfinder_9002e7282dd0_2022-12-20 9002e7282dd0_2022-12-20 2022-12-20 plasmidfinder-db
+#plasmidfinder_1307168b1ce7_2022-12-20 2.1_2022-12-20 2022-12-20 plasmidfinder-db
diff -r 000000000000 -r 8ad77a9dc91d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,8 @@
+
+
+
+
+ value, name, date, path
+
+
+
diff -r 000000000000 -r 8ad77a9dc91d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Feb 14 14:25:43 2023 +0000
@@ -0,0 +1,7 @@
+
+
+
+ value, name, date, path
+
+
+