comparison data_manager/plasmidfinder_fetch_database.py @ 5:60cfd33bc2fb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit 0a3992c5be846fc9f18b7ca18f0adcd78f5b9396-dirty
author pimarin
date Mon, 24 Jul 2023 10:00:33 +0000
parents e05fd47bcca6
children f99089461adb
comparison
equal deleted inserted replaced
4:e05fd47bcca6 5:60cfd33bc2fb
1 import argparse 1 import argparse
2 import json 2 import json
3 import os 3 import os
4 import tarfile 4 import time
5 from datetime import datetime
6 from pathlib import Path 5 from pathlib import Path
7 6
8 import requests 7
8 import git
9 9
10 10
11 class GetPlasmidfinderDataManager: 11 class GetPlasmidfinderDataManager:
12 """ 12 """
13 Create the json file with database information for galaxy data manager 13 Create the json file with database information for galaxy data manager
14 """ 14 """
15 15
16 def __init__(self, 16 def __init__(self,
17 plasmidfinder_database="plasmidfinder_database", 17 plasmidfinder_database="plasmidfinder_database",
18 db_name="plasmidfinder-db", 18 db_name="plasmidfinder_database",
19 plasmidfinder_version="latest"): 19 plasmidfinder_version="latest"):
20 self.data_table_name = plasmidfinder_database 20 self.data_table_name = plasmidfinder_database
21 self._db_name = db_name 21 self._db_name = db_name
22 self._plasmidfinder_version = plasmidfinder_version 22 self._plasmidfinder_version = plasmidfinder_version
23 self._plasmidfinder_date_version = None 23 self._plasmidfinder_date_version = None
66 Build the data manager info for galaxy 66 Build the data manager info for galaxy
67 """ 67 """
68 68
69 def __init__(self, 69 def __init__(self,
70 output_dir=Path.cwd(), 70 output_dir=Path.cwd(),
71 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/", 71 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/src/master",
72 db_name="plasmidfinder-db", 72 db_name="plasmidfinder_database",
73 db_tmp="tmp_database", 73 db_tmp="tmp_database",
74 plasmidfinder_version="latest", 74 plasmidfinder_version="latest",
75 json_file_path=None, 75 json_file_path=None,
76 date_version=datetime.now().strftime("%Y-%m-%d")): 76 date_version=None):
77 77
78 super().__init__() 78 super().__init__()
79 self.json_file_path = json_file_path 79 self.json_file_path = json_file_path
80 self._output_dir = output_dir 80 self._output_dir = output_dir
81 self._plasmidfinder_url = plasmidfinder_url 81 self._plasmidfinder_url = plasmidfinder_url
84 self._db_name_tar = f'{db_name}.gz' 84 self._db_name_tar = f'{db_name}.gz'
85 self._plasmidfinder_version = plasmidfinder_version 85 self._plasmidfinder_version = plasmidfinder_version
86 self._plasmidfinder_date_version = date_version 86 self._plasmidfinder_date_version = date_version
87 self._commit_number = None 87 self._commit_number = None
88 88
89 def extract_db_commit(self, request_header, title_name="content-disposition"): 89 def git_clone(self):
90 """ 90 git.Repo.clone_from(url=self._plasmidfinder_url, to_path=self._output_dir)
91 Extract the commit if to add the information as identifier of the download 91 self._plasmidfinder_repository = git.Repo(path=self._output_dir)
92 @request_header: a request object obtained from requests.get()
93 @title_name: the tag to search in the header of the requests object
94 return: the value of the commit
95 """
96 db_info = request_header.headers[title_name]
97 commit_number = db_info.split("-")[2].split(".")[0]
98 return commit_number
99 92
100 def untar_files(self, file_path: Path, extracted_path_output: Path): 93 def get_commit_number(self):
101 """ 94 sha = self._plasmidfinder_repository.head.commit.hexsha
102 untar the download archive 95 short_sha = self._plasmidfinder_repository.git.rev_parse(sha, short=7)
103 @file_path: input path of the tar.gz file 96 self._commit_number = short_sha
104 @extracted_path_output: output path of the extract folder
105 return: the path of the output
106 """
107 try:
108 with file_path.open('rb') as fh_in, \
109 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
110 tar_file.extractall(path=extracted_path_output)
111 print(f'Untar the database in {extracted_path_output}')
112 return extracted_path_output
113 except OSError:
114 os.sys.exit(f'ERROR: Could not extract {file_path}')
115 97
116 def choose_db_version(self): 98 def get_commit_date(self):
117 """ 99 self._plasmidfinder_date_version = time.strftime("%Y_%m_%d", time.gmtime(self._plasmidfinder_repository.head.commit.committed_date))
118 Update the url link depending on the version choosen by user.
119 This method could be upgraded simply by adding the new versions
120 """
121 if self._plasmidfinder_version == "latest":
122 self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz"
123 elif self._plasmidfinder_version == "2.1":
124 self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz"
125 100
126 def download_database(self): 101 def download_database(self):
127 """ 102 """
128 Download the plasmidfinder database using requests lib 103 Download the plasmidfinder database using git lib
129 Make the directory and temporary directory for download 104 Extract commit and commit date
130 Untar the download files
131 """ 105 """
132 self._output_dir = Path(self._output_dir) 106 self._output_dir = Path(self._output_dir)
133 self.choose_db_version() 107 self.git_clone()
134 try: 108 if self._plasmidfinder_version != "latest":
135 request_info = requests.get(self._plasmidfinder_url) 109 self._plasmidfinder_repository.git.checkout(self._plasmidfinder_version)
136 request_info.raise_for_status() 110 self.get_commit_number()
137 self._commit_number = self.extract_db_commit(request_info) 111 self.get_commit_date()
138 output_tar_path = self._output_dir.joinpath(self._temporary_folder)
139 output_tar_path_file = output_tar_path.joinpath(self._db_name_tar)
140 output_path = self._output_dir.joinpath(self._db_name)
141 os.makedirs(output_tar_path)
142 os.makedirs(output_path)
143 with open(output_tar_path_file, 'wb') as output_dir:
144 output_dir.write(request_info.content)
145 untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name))
146 self.moove_download_files(source=untar_output, destination=output_path)
147 except requests.exceptions.HTTPError as http_error:
148 print(f"Requests Error: {http_error}")
149 print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}")
150
151 def moove_download_files(self, source, destination, expression_search="*"):
152 """
153 Clean downloaded data by mooving fasta files in the final folder
154 @older_path: previous path where the files are located
155 @new_path: final path where files will be mooved
156 @expression_search: keep only file with this expression
157 """
158 fasta_files = Path(source).rglob(expression_search)
159 file_list_paths = [file for file in fasta_files if file.is_file()]
160 [self.keep_filename(pathname=path, output_path=destination) for path in file_list_paths]
161
162 def keep_filename(self, pathname, output_path):
163 """
164 Moove files
165 @pathname: previous path
166 @output_path: final path
167 """
168 Path.replace(pathname, output_path.joinpath(pathname.name))
169 112
170 def read_json_input_file(self): 113 def read_json_input_file(self):
171 """ 114 """
172 Import the json file 115 Import the json file
173 """ 116 """