Mercurial > repos > pimarin > data_manager_bakta
diff data_manager/bakta_build_database.py @ 16:718080c30a76 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit e7c35e529ae95a2c9f2ecd87a3716e6866feabfa
author | pimarin |
---|---|
date | Fri, 04 Nov 2022 15:46:06 +0000 |
parents | ac4d5e1d1421 |
children | ecb7d3d41366 |
line wrap: on
line diff
--- a/data_manager/bakta_build_database.py Wed Nov 02 16:21:11 2022 +0000 +++ b/data_manager/bakta_build_database.py Fri Nov 04 15:46:06 2022 +0000 @@ -2,13 +2,14 @@ import hashlib import json import os -import subprocess as sp import sys import tarfile from datetime import datetime from pathlib import Path import requests + + class GetBaktaDatabaseInfo: """ Extract bakta database information to make a json file for data_manager @@ -64,7 +65,6 @@ for db_dic in versions: db_date_list.append(datetime.strptime(db_dic["date"], '%Y-%m-%d').date()) - max(db_date_list) filtered_version = next(item for item in versions if max(db_date_list)) elif db_version == "test": @@ -96,17 +96,16 @@ def get_data_manager(self, bakta_database_info, output_path): self.bakta_table_list = self.get_data_table_format() - bakta_value = f"bakta_{bakta_database_info['major']}." \ - f"{bakta_database_info['minor']}" - data_info = dict(value=bakta_value, - dbkey=bakta_database_info['date'], - database_record=bakta_database_info['record'], - bakta_version=str( - f"{bakta_database_info['software-min']['major']}." - f"{bakta_database_info['software-min']['minor']}" - ), - path=output_path) - self.bakta_table_list["data_tables"][self.data_table_name] = data_info + bakta_value = f"V{bakta_database_info['major']}." \ + f"{bakta_database_info['minor']}_" \ + f"{bakta_database_info['date']}" + tool_version = str(f"{bakta_database_info['software-min']['major']}." + f"{bakta_database_info['software-min']['minor']}") + data_info = dict(value=bakta_database_info['record'], + dbkey=bakta_value, + bakta_version=tool_version, + path="db") + self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] return self.bakta_table_list @@ -148,13 +147,13 @@ f' url={self.db_url}, path={self.tarball_name}') def untar(self): - db_path = Path(self.db_dir).joinpath(self.db_name) + db_path = Path(self.db_dir).as_posix() try: with self.tarball_path.open('rb') as fh_in, \ tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: - tar_file.extractall(path=str(db_path)) + tar_file.extractall(path=db_path) print(f'Untar the database in {db_path}') - return db_path.as_posix() + return db_path except OSError: sys.exit(f'ERROR: Could not extract {self.tarball_name} ' f'to {self.db_name}') @@ -175,6 +174,10 @@ f"calculated md5 = {md5.hexdigest()}" f" different from {self.md5} ") + +""" +This is the method to download the amrfinderplus database need by bakta. +Deprecated to use the amrfinderplus data_manager def update_amrfinderplus_db(self): amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" if self.db_version == "test": @@ -200,6 +203,7 @@ f" --database {amrfinderplus_db_path}'") else: print("AMRFinderPlus database download") +""" def parse_arguments(): @@ -221,11 +225,9 @@ all_args = parse_arguments() with open(all_args.data_manager_json) as fh: - data_manager_input = json.load(fh) - - target_dir = data_manager_input['output_data'][0]['extra_files_path'] + params = json.load(fh) + target_dir = params['output_data'][0]['extra_files_path'] os.makedirs(target_dir) - # init the class to download bakta db bakta_upload = InstallBaktaDatabase() # extract the version @@ -242,11 +244,9 @@ # check md5 sum bakta_upload.calc_md5_sum() # untar db - bakta_extracted_path = bakta_upload.untar() - # update for amrfinderplus - bakta_upload.update_amrfinderplus_db() + bakta_upload.untar() # make the data_manager metadata - bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) + bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir) with open(all_args.data_manager_json, 'w') as fh: json.dump(bakta_data_manager, fh, sort_keys=True)