Mercurial > repos > pimarin > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 16:718080c30a76 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit e7c35e529ae95a2c9f2ecd87a3716e6866feabfa
author | pimarin |
---|---|
date | Fri, 04 Nov 2022 15:46:06 +0000 |
parents | ac4d5e1d1421 |
children | ecb7d3d41366 |
comparison
equal
deleted
inserted
replaced
15:ac4d5e1d1421 | 16:718080c30a76 |
---|---|
1 import argparse | 1 import argparse |
2 import hashlib | 2 import hashlib |
3 import json | 3 import json |
4 import os | 4 import os |
5 import subprocess as sp | |
6 import sys | 5 import sys |
7 import tarfile | 6 import tarfile |
8 from datetime import datetime | 7 from datetime import datetime |
9 from pathlib import Path | 8 from pathlib import Path |
10 | 9 |
11 import requests | 10 import requests |
11 | |
12 | |
12 class GetBaktaDatabaseInfo: | 13 class GetBaktaDatabaseInfo: |
13 """ | 14 """ |
14 Extract bakta database information to make a json file for data_manager | 15 Extract bakta database information to make a json file for data_manager |
15 """ | 16 """ |
16 | 17 |
62 if db_version == "latest": | 63 if db_version == "latest": |
63 db_date_list = [] | 64 db_date_list = [] |
64 for db_dic in versions: | 65 for db_dic in versions: |
65 db_date_list.append(datetime.strptime(db_dic["date"], | 66 db_date_list.append(datetime.strptime(db_dic["date"], |
66 '%Y-%m-%d').date()) | 67 '%Y-%m-%d').date()) |
67 max(db_date_list) | |
68 filtered_version = next(item for item in versions | 68 filtered_version = next(item for item in versions |
69 if max(db_date_list)) | 69 if max(db_date_list)) |
70 elif db_version == "test": | 70 elif db_version == "test": |
71 filtered_version = {"date": "date_test", | 71 filtered_version = {"date": "date_test", |
72 "major": "0", | 72 "major": "0", |
94 self.db_version = db_version | 94 self.db_version = db_version |
95 return filtered_version | 95 return filtered_version |
96 | 96 |
97 def get_data_manager(self, bakta_database_info, output_path): | 97 def get_data_manager(self, bakta_database_info, output_path): |
98 self.bakta_table_list = self.get_data_table_format() | 98 self.bakta_table_list = self.get_data_table_format() |
99 bakta_value = f"bakta_{bakta_database_info['major']}." \ | 99 bakta_value = f"V{bakta_database_info['major']}." \ |
100 f"{bakta_database_info['minor']}" | 100 f"{bakta_database_info['minor']}_" \ |
101 data_info = dict(value=bakta_value, | 101 f"{bakta_database_info['date']}" |
102 dbkey=bakta_database_info['date'], | 102 tool_version = str(f"{bakta_database_info['software-min']['major']}." |
103 database_record=bakta_database_info['record'], | 103 f"{bakta_database_info['software-min']['minor']}") |
104 bakta_version=str( | 104 data_info = dict(value=bakta_database_info['record'], |
105 f"{bakta_database_info['software-min']['major']}." | 105 dbkey=bakta_value, |
106 f"{bakta_database_info['software-min']['minor']}" | 106 bakta_version=tool_version, |
107 ), | 107 path="db") |
108 path=output_path) | 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] |
109 self.bakta_table_list["data_tables"][self.data_table_name] = data_info | |
110 return self.bakta_table_list | 109 return self.bakta_table_list |
111 | 110 |
112 | 111 |
113 class InstallBaktaDatabase(GetBaktaDatabaseInfo): | 112 class InstallBaktaDatabase(GetBaktaDatabaseInfo): |
114 """ | 113 """ |
146 except IOError: | 145 except IOError: |
147 print(f'ERROR: Could not download file from Zenodo!' | 146 print(f'ERROR: Could not download file from Zenodo!' |
148 f' url={self.db_url}, path={self.tarball_name}') | 147 f' url={self.db_url}, path={self.tarball_name}') |
149 | 148 |
150 def untar(self): | 149 def untar(self): |
151 db_path = Path(self.db_dir).joinpath(self.db_name) | 150 db_path = Path(self.db_dir).as_posix() |
152 try: | 151 try: |
153 with self.tarball_path.open('rb') as fh_in, \ | 152 with self.tarball_path.open('rb') as fh_in, \ |
154 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: | 153 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: |
155 tar_file.extractall(path=str(db_path)) | 154 tar_file.extractall(path=db_path) |
156 print(f'Untar the database in {db_path}') | 155 print(f'Untar the database in {db_path}') |
157 return db_path.as_posix() | 156 return db_path |
158 except OSError: | 157 except OSError: |
159 sys.exit(f'ERROR: Could not extract {self.tarball_name} ' | 158 sys.exit(f'ERROR: Could not extract {self.tarball_name} ' |
160 f'to {self.db_name}') | 159 f'to {self.db_name}') |
161 | 160 |
162 def calc_md5_sum(self, buffer_size=1048576): | 161 def calc_md5_sum(self, buffer_size=1048576): |
173 else: | 172 else: |
174 print(f"Error: corrupt database file! " | 173 print(f"Error: corrupt database file! " |
175 f"calculated md5 = {md5.hexdigest()}" | 174 f"calculated md5 = {md5.hexdigest()}" |
176 f" different from {self.md5} ") | 175 f" different from {self.md5} ") |
177 | 176 |
177 | |
178 """ | |
179 This is the method to download the amrfinderplus database need by bakta. | |
180 Deprecated to use the amrfinderplus data_manager | |
178 def update_amrfinderplus_db(self): | 181 def update_amrfinderplus_db(self): |
179 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" | 182 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" |
180 if self.db_version == "test": | 183 if self.db_version == "test": |
181 cmd = [ | 184 cmd = [ |
182 'amrfinder_update', | 185 'amrfinder_update', |
198 print(f"ERROR: AMRFinderPlus failed! " | 201 print(f"ERROR: AMRFinderPlus failed! " |
199 f"command: 'amrfinder_update --force_update" | 202 f"command: 'amrfinder_update --force_update" |
200 f" --database {amrfinderplus_db_path}'") | 203 f" --database {amrfinderplus_db_path}'") |
201 else: | 204 else: |
202 print("AMRFinderPlus database download") | 205 print("AMRFinderPlus database download") |
206 """ | |
203 | 207 |
204 | 208 |
205 def parse_arguments(): | 209 def parse_arguments(): |
206 # parse options and arguments | 210 # parse options and arguments |
207 arg_parser = argparse.ArgumentParser() | 211 arg_parser = argparse.ArgumentParser() |
219 | 223 |
220 def main(): | 224 def main(): |
221 all_args = parse_arguments() | 225 all_args = parse_arguments() |
222 | 226 |
223 with open(all_args.data_manager_json) as fh: | 227 with open(all_args.data_manager_json) as fh: |
224 data_manager_input = json.load(fh) | 228 params = json.load(fh) |
225 | 229 target_dir = params['output_data'][0]['extra_files_path'] |
226 target_dir = data_manager_input['output_data'][0]['extra_files_path'] | |
227 os.makedirs(target_dir) | 230 os.makedirs(target_dir) |
228 | |
229 # init the class to download bakta db | 231 # init the class to download bakta db |
230 bakta_upload = InstallBaktaDatabase() | 232 bakta_upload = InstallBaktaDatabase() |
231 # extract the version | 233 # extract the version |
232 if all_args.test is True: | 234 if all_args.test is True: |
233 bakta_db = bakta_upload.fetch_db_versions( | 235 bakta_db = bakta_upload.fetch_db_versions( |
240 # download the database | 242 # download the database |
241 bakta_upload.download() | 243 bakta_upload.download() |
242 # check md5 sum | 244 # check md5 sum |
243 bakta_upload.calc_md5_sum() | 245 bakta_upload.calc_md5_sum() |
244 # untar db | 246 # untar db |
245 bakta_extracted_path = bakta_upload.untar() | 247 bakta_upload.untar() |
246 # update for amrfinderplus | |
247 bakta_upload.update_amrfinderplus_db() | |
248 # make the data_manager metadata | 248 # make the data_manager metadata |
249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) | 249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir) |
250 with open(all_args.data_manager_json, 'w') as fh: | 250 with open(all_args.data_manager_json, 'w') as fh: |
251 json.dump(bakta_data_manager, fh, sort_keys=True) | 251 json.dump(bakta_data_manager, fh, sort_keys=True) |
252 | 252 |
253 | 253 |
254 if __name__ == '__main__': | 254 if __name__ == '__main__': |