Mercurial > repos > pimarin > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 16:718080c30a76 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit e7c35e529ae95a2c9f2ecd87a3716e6866feabfa
| author | pimarin |
|---|---|
| date | Fri, 04 Nov 2022 15:46:06 +0000 |
| parents | ac4d5e1d1421 |
| children | ecb7d3d41366 |
comparison
equal
deleted
inserted
replaced
| 15:ac4d5e1d1421 | 16:718080c30a76 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import hashlib | 2 import hashlib |
| 3 import json | 3 import json |
| 4 import os | 4 import os |
| 5 import subprocess as sp | |
| 6 import sys | 5 import sys |
| 7 import tarfile | 6 import tarfile |
| 8 from datetime import datetime | 7 from datetime import datetime |
| 9 from pathlib import Path | 8 from pathlib import Path |
| 10 | 9 |
| 11 import requests | 10 import requests |
| 11 | |
| 12 | |
| 12 class GetBaktaDatabaseInfo: | 13 class GetBaktaDatabaseInfo: |
| 13 """ | 14 """ |
| 14 Extract bakta database information to make a json file for data_manager | 15 Extract bakta database information to make a json file for data_manager |
| 15 """ | 16 """ |
| 16 | 17 |
| 62 if db_version == "latest": | 63 if db_version == "latest": |
| 63 db_date_list = [] | 64 db_date_list = [] |
| 64 for db_dic in versions: | 65 for db_dic in versions: |
| 65 db_date_list.append(datetime.strptime(db_dic["date"], | 66 db_date_list.append(datetime.strptime(db_dic["date"], |
| 66 '%Y-%m-%d').date()) | 67 '%Y-%m-%d').date()) |
| 67 max(db_date_list) | |
| 68 filtered_version = next(item for item in versions | 68 filtered_version = next(item for item in versions |
| 69 if max(db_date_list)) | 69 if max(db_date_list)) |
| 70 elif db_version == "test": | 70 elif db_version == "test": |
| 71 filtered_version = {"date": "date_test", | 71 filtered_version = {"date": "date_test", |
| 72 "major": "0", | 72 "major": "0", |
| 94 self.db_version = db_version | 94 self.db_version = db_version |
| 95 return filtered_version | 95 return filtered_version |
| 96 | 96 |
| 97 def get_data_manager(self, bakta_database_info, output_path): | 97 def get_data_manager(self, bakta_database_info, output_path): |
| 98 self.bakta_table_list = self.get_data_table_format() | 98 self.bakta_table_list = self.get_data_table_format() |
| 99 bakta_value = f"bakta_{bakta_database_info['major']}." \ | 99 bakta_value = f"V{bakta_database_info['major']}." \ |
| 100 f"{bakta_database_info['minor']}" | 100 f"{bakta_database_info['minor']}_" \ |
| 101 data_info = dict(value=bakta_value, | 101 f"{bakta_database_info['date']}" |
| 102 dbkey=bakta_database_info['date'], | 102 tool_version = str(f"{bakta_database_info['software-min']['major']}." |
| 103 database_record=bakta_database_info['record'], | 103 f"{bakta_database_info['software-min']['minor']}") |
| 104 bakta_version=str( | 104 data_info = dict(value=bakta_database_info['record'], |
| 105 f"{bakta_database_info['software-min']['major']}." | 105 dbkey=bakta_value, |
| 106 f"{bakta_database_info['software-min']['minor']}" | 106 bakta_version=tool_version, |
| 107 ), | 107 path="db") |
| 108 path=output_path) | 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] |
| 109 self.bakta_table_list["data_tables"][self.data_table_name] = data_info | |
| 110 return self.bakta_table_list | 109 return self.bakta_table_list |
| 111 | 110 |
| 112 | 111 |
| 113 class InstallBaktaDatabase(GetBaktaDatabaseInfo): | 112 class InstallBaktaDatabase(GetBaktaDatabaseInfo): |
| 114 """ | 113 """ |
| 146 except IOError: | 145 except IOError: |
| 147 print(f'ERROR: Could not download file from Zenodo!' | 146 print(f'ERROR: Could not download file from Zenodo!' |
| 148 f' url={self.db_url}, path={self.tarball_name}') | 147 f' url={self.db_url}, path={self.tarball_name}') |
| 149 | 148 |
| 150 def untar(self): | 149 def untar(self): |
| 151 db_path = Path(self.db_dir).joinpath(self.db_name) | 150 db_path = Path(self.db_dir).as_posix() |
| 152 try: | 151 try: |
| 153 with self.tarball_path.open('rb') as fh_in, \ | 152 with self.tarball_path.open('rb') as fh_in, \ |
| 154 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: | 153 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: |
| 155 tar_file.extractall(path=str(db_path)) | 154 tar_file.extractall(path=db_path) |
| 156 print(f'Untar the database in {db_path}') | 155 print(f'Untar the database in {db_path}') |
| 157 return db_path.as_posix() | 156 return db_path |
| 158 except OSError: | 157 except OSError: |
| 159 sys.exit(f'ERROR: Could not extract {self.tarball_name} ' | 158 sys.exit(f'ERROR: Could not extract {self.tarball_name} ' |
| 160 f'to {self.db_name}') | 159 f'to {self.db_name}') |
| 161 | 160 |
| 162 def calc_md5_sum(self, buffer_size=1048576): | 161 def calc_md5_sum(self, buffer_size=1048576): |
| 173 else: | 172 else: |
| 174 print(f"Error: corrupt database file! " | 173 print(f"Error: corrupt database file! " |
| 175 f"calculated md5 = {md5.hexdigest()}" | 174 f"calculated md5 = {md5.hexdigest()}" |
| 176 f" different from {self.md5} ") | 175 f" different from {self.md5} ") |
| 177 | 176 |
| 177 | |
| 178 """ | |
| 179 This is the method to download the amrfinderplus database need by bakta. | |
| 180 Deprecated to use the amrfinderplus data_manager | |
| 178 def update_amrfinderplus_db(self): | 181 def update_amrfinderplus_db(self): |
| 179 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" | 182 amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" |
| 180 if self.db_version == "test": | 183 if self.db_version == "test": |
| 181 cmd = [ | 184 cmd = [ |
| 182 'amrfinder_update', | 185 'amrfinder_update', |
| 198 print(f"ERROR: AMRFinderPlus failed! " | 201 print(f"ERROR: AMRFinderPlus failed! " |
| 199 f"command: 'amrfinder_update --force_update" | 202 f"command: 'amrfinder_update --force_update" |
| 200 f" --database {amrfinderplus_db_path}'") | 203 f" --database {amrfinderplus_db_path}'") |
| 201 else: | 204 else: |
| 202 print("AMRFinderPlus database download") | 205 print("AMRFinderPlus database download") |
| 206 """ | |
| 203 | 207 |
| 204 | 208 |
| 205 def parse_arguments(): | 209 def parse_arguments(): |
| 206 # parse options and arguments | 210 # parse options and arguments |
| 207 arg_parser = argparse.ArgumentParser() | 211 arg_parser = argparse.ArgumentParser() |
| 219 | 223 |
| 220 def main(): | 224 def main(): |
| 221 all_args = parse_arguments() | 225 all_args = parse_arguments() |
| 222 | 226 |
| 223 with open(all_args.data_manager_json) as fh: | 227 with open(all_args.data_manager_json) as fh: |
| 224 data_manager_input = json.load(fh) | 228 params = json.load(fh) |
| 225 | 229 target_dir = params['output_data'][0]['extra_files_path'] |
| 226 target_dir = data_manager_input['output_data'][0]['extra_files_path'] | |
| 227 os.makedirs(target_dir) | 230 os.makedirs(target_dir) |
| 228 | |
| 229 # init the class to download bakta db | 231 # init the class to download bakta db |
| 230 bakta_upload = InstallBaktaDatabase() | 232 bakta_upload = InstallBaktaDatabase() |
| 231 # extract the version | 233 # extract the version |
| 232 if all_args.test is True: | 234 if all_args.test is True: |
| 233 bakta_db = bakta_upload.fetch_db_versions( | 235 bakta_db = bakta_upload.fetch_db_versions( |
| 240 # download the database | 242 # download the database |
| 241 bakta_upload.download() | 243 bakta_upload.download() |
| 242 # check md5 sum | 244 # check md5 sum |
| 243 bakta_upload.calc_md5_sum() | 245 bakta_upload.calc_md5_sum() |
| 244 # untar db | 246 # untar db |
| 245 bakta_extracted_path = bakta_upload.untar() | 247 bakta_upload.untar() |
| 246 # update for amrfinderplus | |
| 247 bakta_upload.update_amrfinderplus_db() | |
| 248 # make the data_manager metadata | 248 # make the data_manager metadata |
| 249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) | 249 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir) |
| 250 with open(all_args.data_manager_json, 'w') as fh: | 250 with open(all_args.data_manager_json, 'w') as fh: |
| 251 json.dump(bakta_data_manager, fh, sort_keys=True) | 251 json.dump(bakta_data_manager, fh, sort_keys=True) |
| 252 | 252 |
| 253 | 253 |
| 254 if __name__ == '__main__': | 254 if __name__ == '__main__': |
