Mercurial > repos > pimarin > data_manager_fetch_plasmidfinder
comparison data_manager/plasmidfinder_fetch_database.py @ 0:8ad77a9dc91d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit f3383fdb9a17d1b69d05547cdb96534a5f762bec-dirty
author | pimarin |
---|---|
date | Tue, 14 Feb 2023 14:25:43 +0000 |
parents | |
children | 9ef108e34063 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8ad77a9dc91d |
---|---|
1 import argparse | |
2 import json | |
3 import os | |
4 import tarfile | |
5 from datetime import datetime | |
6 from pathlib import Path | |
7 | |
8 import requests | |
9 | |
10 | |
11 class GetPlasmidfinderDataManager: | |
12 """ | |
13 Create the json file with database information for galaxy data manager | |
14 """ | |
15 | |
16 def __init__(self, | |
17 plasmidfinder_database="plasmidfinder_database", | |
18 db_name="plasmidfinder-db", | |
19 plasmidfinder_version="latest"): | |
20 self.data_table_name = plasmidfinder_database | |
21 self._db_name = db_name | |
22 self._plasmidfinder_version = plasmidfinder_version | |
23 self._plasmidfinder_date_version = None | |
24 self.data_table_entry = None | |
25 self.plasmidfinder_table_list = None | |
26 self._commit_number = None | |
27 | |
28 def get_data_table_format(self): | |
29 """ | |
30 Skeleton of a data_table format | |
31 return: a data table formatted for json output | |
32 """ | |
33 self.data_table_entry = { | |
34 "data_tables": { | |
35 self.data_table_name: {} | |
36 } | |
37 } | |
38 return self.data_table_entry | |
39 | |
40 def get_data_manager(self): | |
41 """ | |
42 Create the empty data table format and add all the information into | |
43 Commit number is added if latest is required instead of version number | |
44 return: The data table with database information | |
45 """ | |
46 self.plasmidfinder_table_list = self.get_data_table_format() | |
47 if self._plasmidfinder_version == "latest": | |
48 version_value = self._commit_number | |
49 else: | |
50 version_value = self._plasmidfinder_version | |
51 plasmidfinder_value = f"plasmidfinder_{self._commit_number}" \ | |
52 f"_{self._plasmidfinder_date_version}" | |
53 plasmidfinder_name = f"{version_value}" \ | |
54 f"_{self._plasmidfinder_date_version}" | |
55 data_info = dict(value=plasmidfinder_value, | |
56 name=plasmidfinder_name, | |
57 date=self._plasmidfinder_date_version, | |
58 path=self._db_name) | |
59 self.plasmidfinder_table_list["data_tables"][self.data_table_name] = [data_info] | |
60 return self.plasmidfinder_table_list | |
61 | |
62 | |
63 class DownloadPlasmidfinderDatabase(GetPlasmidfinderDataManager): | |
64 """ | |
65 Download the plasmidfinder database from the bitbucket repository. | |
66 Build the data manager info for galaxy | |
67 """ | |
68 | |
69 def __init__(self, | |
70 output_dir=Path.cwd(), | |
71 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/", | |
72 db_name="plasmidfinder-db", | |
73 db_tmp="tmp_database", | |
74 plasmidfinder_version="latest", | |
75 json_file_path=None, | |
76 date_version=datetime.now().strftime("%Y-%m-%d")): | |
77 | |
78 super().__init__() | |
79 self.json_file_path = json_file_path | |
80 self._output_dir = output_dir | |
81 self._plasmidfinder_url = plasmidfinder_url | |
82 self._temporary_folder = db_tmp | |
83 self._db_name = db_name | |
84 self._db_name_tar = f'{db_name}.gz' | |
85 self._plasmidfinder_version = plasmidfinder_version | |
86 self._plasmidfinder_date_version = date_version | |
87 self._commit_number = None | |
88 | |
89 def extract_db_commit(self, request_header, title_name="content-disposition"): | |
90 """ | |
91 Extract the commit if to add the information as identifier of the download | |
92 @request_header: a request object obtained from requests.get() | |
93 @title_name: the tag to search in the header of the requests object | |
94 return: the value of the commit | |
95 """ | |
96 db_info = request_header.headers[title_name] | |
97 commit_number = db_info.split("-")[2].split(".")[0] | |
98 return commit_number | |
99 | |
100 def untar_files(self, file_path: Path, extracted_path_output: Path): | |
101 """ | |
102 untar the download archive | |
103 @file_path: input path of the tar.gz file | |
104 @extracted_path_output: output path of the extract folder | |
105 return: the path of the output | |
106 """ | |
107 try: | |
108 with file_path.open('rb') as fh_in, \ | |
109 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: | |
110 tar_file.extractall(path=extracted_path_output) | |
111 print(f'Untar the database in {extracted_path_output}') | |
112 return extracted_path_output | |
113 except OSError: | |
114 os.sys.exit(f'ERROR: Could not extract {file_path}') | |
115 | |
116 def choose_db_version(self): | |
117 """ | |
118 Update the url link depending on the version choosen by user. | |
119 This method could be upgraded simply by adding the new versions | |
120 """ | |
121 if self._plasmidfinder_version == "latest": | |
122 self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz" | |
123 elif self._plasmidfinder_version == "2.1": | |
124 self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz" | |
125 | |
126 def download_database(self): | |
127 """ | |
128 Download the plasmidfinder database using requests lib | |
129 Make the directory and temporary directory for download | |
130 Untar the download files | |
131 """ | |
132 self._output_dir = Path(self._output_dir) | |
133 self.choose_db_version() | |
134 try: | |
135 request_info = requests.get(self._plasmidfinder_url) | |
136 request_info.raise_for_status() | |
137 self._commit_number = self.extract_db_commit(request_info) | |
138 output_tar_path = self._output_dir.joinpath(self._temporary_folder) | |
139 output_tar_path_file = output_tar_path.joinpath(self._db_name_tar) | |
140 output_path = self._output_dir.joinpath(self._db_name) | |
141 os.makedirs(output_tar_path) | |
142 os.makedirs(output_path) | |
143 with open(output_tar_path_file, 'wb') as output_dir: | |
144 output_dir.write(request_info.content) | |
145 untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name)) | |
146 | |
147 self.moove_download_files(older_path=untar_output, new_path=output_path) | |
148 except requests.exceptions.HTTPError as http_error: | |
149 print(f"Requests Error: {http_error}") | |
150 print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}") | |
151 | |
152 def moove_download_files(self, older_path, new_path, expression_search="*fsa"): | |
153 """ | |
154 Clean downloaded data by mooving fasta files in the final folder | |
155 @older_path: previous path where the files are located | |
156 @new_path: final path where files will be mooved | |
157 @expression_search: keep only file with this expression | |
158 """ | |
159 fasta_files = Path(older_path).rglob(expression_search) | |
160 file_list_paths = [file for file in fasta_files if file.is_file()] | |
161 [self.keep_filename(pathname=path, output_path=new_path) for path in file_list_paths] | |
162 | |
163 def keep_filename(self, pathname, output_path): | |
164 """ | |
165 Moove files | |
166 @pathname: previous path | |
167 @output_path: final path | |
168 """ | |
169 Path.replace(pathname, output_path.joinpath(pathname.name)) | |
170 | |
171 def read_json_input_file(self): | |
172 """ | |
173 Import the json file | |
174 """ | |
175 with open(self.json_file_path) as fh: | |
176 params = json.load(fh) | |
177 target_dir = params['output_data'][0]['extra_files_path'] | |
178 os.makedirs(target_dir) | |
179 self._output_dir = target_dir | |
180 | |
181 def write_json_infos(self): | |
182 """ | |
183 Write in the imported json file | |
184 """ | |
185 with open(self.json_file_path, 'w') as fh: | |
186 json.dump(self.get_data_manager(), fh, sort_keys=True) | |
187 | |
188 | |
189 def parse_arguments(): | |
190 """ | |
191 List of arguments provided by the user | |
192 return: parsed arguments | |
193 """ | |
194 # parse options and arguments | |
195 arg_parser = argparse.ArgumentParser() | |
196 arg_parser.add_argument("data_manager_json", | |
197 help="json file from galaxy") | |
198 arg_parser.add_argument("-v", "--db_version", | |
199 help="version of the plasmidfinder (latest or 2.1)") | |
200 return arg_parser.parse_args() | |
201 | |
202 | |
203 def main(): | |
204 all_args = parse_arguments() | |
205 plasmidfinder_download = DownloadPlasmidfinderDatabase(json_file_path=all_args.data_manager_json, plasmidfinder_version=all_args.db_version) | |
206 plasmidfinder_download.read_json_input_file() | |
207 plasmidfinder_download.download_database() | |
208 plasmidfinder_download.write_json_infos() | |
209 | |
210 | |
211 if __name__ == '__main__': | |
212 main() |