comparison data_manager/bakta_build_database.py @ 16:718080c30a76 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit e7c35e529ae95a2c9f2ecd87a3716e6866feabfa
author pimarin
date Fri, 04 Nov 2022 15:46:06 +0000
parents ac4d5e1d1421
children ecb7d3d41366
comparison
equal deleted inserted replaced
15:ac4d5e1d1421 16:718080c30a76
1 import argparse 1 import argparse
2 import hashlib 2 import hashlib
3 import json 3 import json
4 import os 4 import os
5 import subprocess as sp
6 import sys 5 import sys
7 import tarfile 6 import tarfile
8 from datetime import datetime 7 from datetime import datetime
9 from pathlib import Path 8 from pathlib import Path
10 9
11 import requests 10 import requests
11
12
12 class GetBaktaDatabaseInfo: 13 class GetBaktaDatabaseInfo:
13 """ 14 """
14 Extract bakta database information to make a json file for data_manager 15 Extract bakta database information to make a json file for data_manager
15 """ 16 """
16 17
62 if db_version == "latest": 63 if db_version == "latest":
63 db_date_list = [] 64 db_date_list = []
64 for db_dic in versions: 65 for db_dic in versions:
65 db_date_list.append(datetime.strptime(db_dic["date"], 66 db_date_list.append(datetime.strptime(db_dic["date"],
66 '%Y-%m-%d').date()) 67 '%Y-%m-%d').date())
67 max(db_date_list)
68 filtered_version = next(item for item in versions 68 filtered_version = next(item for item in versions
69 if max(db_date_list)) 69 if max(db_date_list))
70 elif db_version == "test": 70 elif db_version == "test":
71 filtered_version = {"date": "date_test", 71 filtered_version = {"date": "date_test",
72 "major": "0", 72 "major": "0",
94 self.db_version = db_version 94 self.db_version = db_version
95 return filtered_version 95 return filtered_version
96 96
97 def get_data_manager(self, bakta_database_info, output_path): 97 def get_data_manager(self, bakta_database_info, output_path):
98 self.bakta_table_list = self.get_data_table_format() 98 self.bakta_table_list = self.get_data_table_format()
99 bakta_value = f"bakta_{bakta_database_info['major']}." \ 99 bakta_value = f"V{bakta_database_info['major']}." \
100 f"{bakta_database_info['minor']}" 100 f"{bakta_database_info['minor']}_" \
101 data_info = dict(value=bakta_value, 101 f"{bakta_database_info['date']}"
102 dbkey=bakta_database_info['date'], 102 tool_version = str(f"{bakta_database_info['software-min']['major']}."
103 database_record=bakta_database_info['record'], 103 f"{bakta_database_info['software-min']['minor']}")
104 bakta_version=str( 104 data_info = dict(value=bakta_database_info['record'],
105 f"{bakta_database_info['software-min']['major']}." 105 dbkey=bakta_value,
106 f"{bakta_database_info['software-min']['minor']}" 106 bakta_version=tool_version,
107 ), 107 path="db")
108 path=output_path) 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]
109 self.bakta_table_list["data_tables"][self.data_table_name] = data_info
110 return self.bakta_table_list 109 return self.bakta_table_list
111 110
112 111
113 class InstallBaktaDatabase(GetBaktaDatabaseInfo): 112 class InstallBaktaDatabase(GetBaktaDatabaseInfo):
114 """ 113 """
146 except IOError: 145 except IOError:
147 print(f'ERROR: Could not download file from Zenodo!' 146 print(f'ERROR: Could not download file from Zenodo!'
148 f' url={self.db_url}, path={self.tarball_name}') 147 f' url={self.db_url}, path={self.tarball_name}')
149 148
150 def untar(self): 149 def untar(self):
151 db_path = Path(self.db_dir).joinpath(self.db_name) 150 db_path = Path(self.db_dir).as_posix()
152 try: 151 try:
153 with self.tarball_path.open('rb') as fh_in, \ 152 with self.tarball_path.open('rb') as fh_in, \
154 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: 153 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
155 tar_file.extractall(path=str(db_path)) 154 tar_file.extractall(path=db_path)
156 print(f'Untar the database in {db_path}') 155 print(f'Untar the database in {db_path}')
157 return db_path.as_posix() 156 return db_path
158 except OSError: 157 except OSError:
159 sys.exit(f'ERROR: Could not extract {self.tarball_name} ' 158 sys.exit(f'ERROR: Could not extract {self.tarball_name} '
160 f'to {self.db_name}') 159 f'to {self.db_name}')
161 160
162 def calc_md5_sum(self, buffer_size=1048576): 161 def calc_md5_sum(self, buffer_size=1048576):
173 else: 172 else:
174 print(f"Error: corrupt database file! " 173 print(f"Error: corrupt database file! "
175 f"calculated md5 = {md5.hexdigest()}" 174 f"calculated md5 = {md5.hexdigest()}"
176 f" different from {self.md5} ") 175 f" different from {self.md5} ")
177 176
177
178 """
179 This is the method to download the amrfinderplus database need by bakta.
180 Deprecated to use the amrfinderplus data_manager
178 def update_amrfinderplus_db(self): 181 def update_amrfinderplus_db(self):
179 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" 182 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"
180 if self.db_version == "test": 183 if self.db_version == "test":
181 cmd = [ 184 cmd = [
182 'amrfinder_update', 185 'amrfinder_update',
198 print(f"ERROR: AMRFinderPlus failed! " 201 print(f"ERROR: AMRFinderPlus failed! "
199 f"command: 'amrfinder_update --force_update" 202 f"command: 'amrfinder_update --force_update"
200 f" --database {amrfinderplus_db_path}'") 203 f" --database {amrfinderplus_db_path}'")
201 else: 204 else:
202 print("AMRFinderPlus database download") 205 print("AMRFinderPlus database download")
206 """
203 207
204 208
205 def parse_arguments(): 209 def parse_arguments():
206 # parse options and arguments 210 # parse options and arguments
207 arg_parser = argparse.ArgumentParser() 211 arg_parser = argparse.ArgumentParser()
219 223
220 def main(): 224 def main():
221 all_args = parse_arguments() 225 all_args = parse_arguments()
222 226
223 with open(all_args.data_manager_json) as fh: 227 with open(all_args.data_manager_json) as fh:
224 data_manager_input = json.load(fh) 228 params = json.load(fh)
225 229 target_dir = params['output_data'][0]['extra_files_path']
226 target_dir = data_manager_input['output_data'][0]['extra_files_path']
227 os.makedirs(target_dir) 230 os.makedirs(target_dir)
228
229 # init the class to download bakta db 231 # init the class to download bakta db
230 bakta_upload = InstallBaktaDatabase() 232 bakta_upload = InstallBaktaDatabase()
231 # extract the version 233 # extract the version
232 if all_args.test is True: 234 if all_args.test is True:
233 bakta_db = bakta_upload.fetch_db_versions( 235 bakta_db = bakta_upload.fetch_db_versions(
240 # download the database 242 # download the database
241 bakta_upload.download() 243 bakta_upload.download()
242 # check md5 sum 244 # check md5 sum
243 bakta_upload.calc_md5_sum() 245 bakta_upload.calc_md5_sum()
244 # untar db 246 # untar db
245 bakta_extracted_path = bakta_upload.untar() 247 bakta_upload.untar()
246 # update for amrfinderplus
247 bakta_upload.update_amrfinderplus_db()
248 # make the data_manager metadata 248 # make the data_manager metadata
249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) 249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir)
250 with open(all_args.data_manager_json, 'w') as fh: 250 with open(all_args.data_manager_json, 'w') as fh:
251 json.dump(bakta_data_manager, fh, sort_keys=True) 251 json.dump(bakta_data_manager, fh, sort_keys=True)
252 252
253 253
254 if __name__ == '__main__': 254 if __name__ == '__main__':