comparison data_manager/bakta_build_database.py @ 11:c50e8b4f74d7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit 572bbceeba9a22cdb591d526abda362595f8f0c4
author pimarin
date Wed, 02 Nov 2022 11:06:36 +0000
parents 9c65e5da43e2
children bcac3aa1f494
comparison
equal deleted inserted replaced
10:9c65e5da43e2 11:c50e8b4f74d7
6 import sys 6 import sys
7 import tarfile 7 import tarfile
8 from datetime import datetime 8 from datetime import datetime
9 from pathlib import Path 9 from pathlib import Path
10 10
11 import bakta.constants as bc
12 import bakta.utils as bu
13 import requests 11 import requests
14 from alive_progress import alive_bar 12 from alive_progress import alive_bar
15 13
16 14
17 class GetBaktaDatabaseInfo: 15 class GetBaktaDatabaseInfo:
27 self.db_url = None 25 self.db_url = None
28 self.data_table_entry = None 26 self.data_table_entry = None
29 self.data_table_name = data_table_name 27 self.data_table_name = data_table_name
30 self.db_name = db_name 28 self.db_name = db_name
31 self.db_version = db_version 29 self.db_version = db_version
30 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json'
32 31
33 def get_data_table_format(self): 32 def get_data_table_format(self):
34 """ 33 """
35 Build a data table format for galaxy 34 Build a data table format for galaxy
36 using the bakta database information 35 using the bakta database information
55 db_version: a string of the version number 54 db_version: a string of the version number
56 in the galaxy wrapper list or just latest 55 in the galaxy wrapper list or just latest
57 return: info for the select or the latest bakta db version 56 return: info for the select or the latest bakta db version
58 """ 57 """
59 try: 58 try:
60 with requests.get(bc.DB_VERSIONS_URL) as resp: 59 with requests.get(self.DB_VERSIONS_URL) as resp:
61 versions = json.loads(resp.content) 60 versions = json.loads(resp.content)
62 except IOError as e: 61 except IOError as e:
63 print(e, file=sys.stderr) 62 print(e, file=sys.stderr)
64 raise e 63 raise e
65 else: 64 else:
106 dbkey=bakta_database_info['date'], 105 dbkey=bakta_database_info['date'],
107 database_record=bakta_database_info['record'], 106 database_record=bakta_database_info['record'],
108 bakta_version=str( 107 bakta_version=str(
109 f"{bakta_database_info['software-min']['major']}." 108 f"{bakta_database_info['software-min']['major']}."
110 f"{bakta_database_info['software-min']['minor']}" 109 f"{bakta_database_info['software-min']['minor']}"
111 ), path=output_path) 110 ),
111 path=output_path)
112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info 112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info
113 return self.bakta_table_list 113 return self.bakta_table_list
114 114
115 115
116 class InstallBaktaDatabase(GetBaktaDatabaseInfo): 116 class InstallBaktaDatabase(GetBaktaDatabaseInfo):
128 self.md5 = None 128 self.md5 = None
129 self.db_dir = db_dir 129 self.db_dir = db_dir
130 self.db_name = db_name 130 self.db_name = db_name
131 self.tarball_name = tarball_name 131 self.tarball_name = tarball_name
132 self.tarball_path = None 132 self.tarball_path = None
133 bu.test_dependency(bu.DEPENDENCY_AMRFINDERPLUS)
134 133
135 def download(self): 134 def download(self):
136 self.db_name = f'{self.db_name}_{self.db_version}' 135 self.db_name = f'{self.db_name}_{self.db_version}'
137 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) 136 bakta_path = Path(self.db_dir).joinpath(self.tarball_name)
138 try: 137 try:
219 help='Select the database version ' 218 help='Select the database version '
220 '(major and minor eg. 4.0),' 219 '(major and minor eg. 4.0),'
221 'default is the latest version', 220 'default is the latest version',
222 default="latest", 221 default="latest",
223 required=True) 222 required=True)
223 arg_parser.add_argument("-t", "--test", action='store_true',
224 help="option to test the script with an empty database")
224 return arg_parser.parse_args() 225 return arg_parser.parse_args()
225 226
226 227
227 def main(): 228 def main():
228 all_args = parse_arguments() 229 all_args = parse_arguments()
234 os.makedirs(target_dir) 235 os.makedirs(target_dir)
235 236
236 # init the class to download bakta db 237 # init the class to download bakta db
237 bakta_upload = InstallBaktaDatabase() 238 bakta_upload = InstallBaktaDatabase()
238 # extract the version 239 # extract the version
239 bakta_db = bakta_upload.fetch_db_versions( 240 if all_args.test is True:
240 db_version=all_args.database_version) 241 bakta_db = bakta_upload.fetch_db_versions(
242 db_version="test")
243 else:
244 bakta_db = bakta_upload.fetch_db_versions(
245 db_version=all_args.database_version)
241 # update the path for galaxy 246 # update the path for galaxy
242 bakta_upload.db_dir = target_dir 247 bakta_upload.db_dir = target_dir
243 # download the database 248 # download the database
244 print(bakta_db)
245 bakta_upload.download() 249 bakta_upload.download()
246 # check md5 sum 250 # check md5 sum
247 bakta_upload.calc_md5_sum() 251 bakta_upload.calc_md5_sum()
248 # untar db 252 # untar db
249 bakta_extracted_path = bakta_upload.untar() 253 bakta_extracted_path = bakta_upload.untar()
250 # update for amrfinderplus 254 # update for amrfinderplus
251 bakta_upload.update_amrfinderplus_db() 255 bakta_upload.update_amrfinderplus_db()
252 # make the data_manager metadata 256 # make the data_manager metadata
253 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) 257 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path)
254 with open(all_args.data_manager_json, 'w') as fh: 258 with open(all_args.data_manager_json, 'w') as fh:
255 json.dump(bakta_data_manager, fh, indent=2, sort_keys=True) 259 json.dump(bakta_data_manager, fh, sort_keys=True)
256 260
257 261
258 if __name__ == '__main__': 262 if __name__ == '__main__':
259 main() 263 main()