Mercurial > repos > pimarin > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 11:c50e8b4f74d7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit 572bbceeba9a22cdb591d526abda362595f8f0c4
author | pimarin |
---|---|
date | Wed, 02 Nov 2022 11:06:36 +0000 |
parents | 9c65e5da43e2 |
children | bcac3aa1f494 |
comparison
equal
deleted
inserted
replaced
10:9c65e5da43e2 | 11:c50e8b4f74d7 |
---|---|
6 import sys | 6 import sys |
7 import tarfile | 7 import tarfile |
8 from datetime import datetime | 8 from datetime import datetime |
9 from pathlib import Path | 9 from pathlib import Path |
10 | 10 |
11 import bakta.constants as bc | |
12 import bakta.utils as bu | |
13 import requests | 11 import requests |
14 from alive_progress import alive_bar | 12 from alive_progress import alive_bar |
15 | 13 |
16 | 14 |
17 class GetBaktaDatabaseInfo: | 15 class GetBaktaDatabaseInfo: |
27 self.db_url = None | 25 self.db_url = None |
28 self.data_table_entry = None | 26 self.data_table_entry = None |
29 self.data_table_name = data_table_name | 27 self.data_table_name = data_table_name |
30 self.db_name = db_name | 28 self.db_name = db_name |
31 self.db_version = db_version | 29 self.db_version = db_version |
30 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' | |
32 | 31 |
33 def get_data_table_format(self): | 32 def get_data_table_format(self): |
34 """ | 33 """ |
35 Build a data table format for galaxy | 34 Build a data table format for galaxy |
36 using the bakta database information | 35 using the bakta database information |
55 db_version: a string of the version number | 54 db_version: a string of the version number |
56 in the galaxy wrapper list or just latest | 55 in the galaxy wrapper list or just latest |
57 return: info for the select or the latest bakta db version | 56 return: info for the select or the latest bakta db version |
58 """ | 57 """ |
59 try: | 58 try: |
60 with requests.get(bc.DB_VERSIONS_URL) as resp: | 59 with requests.get(self.DB_VERSIONS_URL) as resp: |
61 versions = json.loads(resp.content) | 60 versions = json.loads(resp.content) |
62 except IOError as e: | 61 except IOError as e: |
63 print(e, file=sys.stderr) | 62 print(e, file=sys.stderr) |
64 raise e | 63 raise e |
65 else: | 64 else: |
106 dbkey=bakta_database_info['date'], | 105 dbkey=bakta_database_info['date'], |
107 database_record=bakta_database_info['record'], | 106 database_record=bakta_database_info['record'], |
108 bakta_version=str( | 107 bakta_version=str( |
109 f"{bakta_database_info['software-min']['major']}." | 108 f"{bakta_database_info['software-min']['major']}." |
110 f"{bakta_database_info['software-min']['minor']}" | 109 f"{bakta_database_info['software-min']['minor']}" |
111 ), path=output_path) | 110 ), |
111 path=output_path) | |
112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info | 112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info |
113 return self.bakta_table_list | 113 return self.bakta_table_list |
114 | 114 |
115 | 115 |
116 class InstallBaktaDatabase(GetBaktaDatabaseInfo): | 116 class InstallBaktaDatabase(GetBaktaDatabaseInfo): |
128 self.md5 = None | 128 self.md5 = None |
129 self.db_dir = db_dir | 129 self.db_dir = db_dir |
130 self.db_name = db_name | 130 self.db_name = db_name |
131 self.tarball_name = tarball_name | 131 self.tarball_name = tarball_name |
132 self.tarball_path = None | 132 self.tarball_path = None |
133 bu.test_dependency(bu.DEPENDENCY_AMRFINDERPLUS) | |
134 | 133 |
135 def download(self): | 134 def download(self): |
136 self.db_name = f'{self.db_name}_{self.db_version}' | 135 self.db_name = f'{self.db_name}_{self.db_version}' |
137 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) | 136 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) |
138 try: | 137 try: |
219 help='Select the database version ' | 218 help='Select the database version ' |
220 '(major and minor eg. 4.0),' | 219 '(major and minor eg. 4.0),' |
221 'default is the latest version', | 220 'default is the latest version', |
222 default="latest", | 221 default="latest", |
223 required=True) | 222 required=True) |
223 arg_parser.add_argument("-t", "--test", action='store_true', | |
224 help="option to test the script with an empty database") | |
224 return arg_parser.parse_args() | 225 return arg_parser.parse_args() |
225 | 226 |
226 | 227 |
227 def main(): | 228 def main(): |
228 all_args = parse_arguments() | 229 all_args = parse_arguments() |
234 os.makedirs(target_dir) | 235 os.makedirs(target_dir) |
235 | 236 |
236 # init the class to download bakta db | 237 # init the class to download bakta db |
237 bakta_upload = InstallBaktaDatabase() | 238 bakta_upload = InstallBaktaDatabase() |
238 # extract the version | 239 # extract the version |
239 bakta_db = bakta_upload.fetch_db_versions( | 240 if all_args.test is True: |
240 db_version=all_args.database_version) | 241 bakta_db = bakta_upload.fetch_db_versions( |
242 db_version="test") | |
243 else: | |
244 bakta_db = bakta_upload.fetch_db_versions( | |
245 db_version=all_args.database_version) | |
241 # update the path for galaxy | 246 # update the path for galaxy |
242 bakta_upload.db_dir = target_dir | 247 bakta_upload.db_dir = target_dir |
243 # download the database | 248 # download the database |
244 print(bakta_db) | |
245 bakta_upload.download() | 249 bakta_upload.download() |
246 # check md5 sum | 250 # check md5 sum |
247 bakta_upload.calc_md5_sum() | 251 bakta_upload.calc_md5_sum() |
248 # untar db | 252 # untar db |
249 bakta_extracted_path = bakta_upload.untar() | 253 bakta_extracted_path = bakta_upload.untar() |
250 # update for amrfinderplus | 254 # update for amrfinderplus |
251 bakta_upload.update_amrfinderplus_db() | 255 bakta_upload.update_amrfinderplus_db() |
252 # make the data_manager metadata | 256 # make the data_manager metadata |
253 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) | 257 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) |
254 with open(all_args.data_manager_json, 'w') as fh: | 258 with open(all_args.data_manager_json, 'w') as fh: |
255 json.dump(bakta_data_manager, fh, indent=2, sort_keys=True) | 259 json.dump(bakta_data_manager, fh, sort_keys=True) |
256 | 260 |
257 | 261 |
258 if __name__ == '__main__': | 262 if __name__ == '__main__': |
259 main() | 263 main() |