comparison data_manager/bakta_build_database.py @ 5:ba7b35caf55c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit e0ce56ac52cff0e8f85e546440d28ca46853b11d
author iuc
date Thu, 20 Jun 2024 19:13:47 +0000
parents b71b550553b2
children
comparison
equal deleted inserted replaced
4:b71b550553b2 5:ba7b35caf55c
1 import argparse 1 import argparse
2 import hashlib 2 import hashlib
3 import json 3 import json
4 import os
5 import re 4 import re
6 import shutil
7 import sys 5 import sys
8 import tarfile 6 import tarfile
9 from datetime import datetime 7 from datetime import datetime
10 from pathlib import Path 8 from pathlib import Path
11 9
32 self.data_table_entry = None 30 self.data_table_entry = None
33 self.data_table_name = data_table_name 31 self.data_table_name = data_table_name
34 self.tar_name = tarball_name 32 self.tar_name = tarball_name
35 self.db_version = db_version 33 self.db_version = db_version
36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" 34 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"
37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" 35 self.DB_TEST_URL = "https://zenodo.org/record/11381156/files/db-versions.json"
38 self.test_mode = test_mode 36 self.test_mode = test_mode
39 37
40 def get_database_type(self): 38 def get_database_type(self):
41 self.light_db = bool(re.search(pattern="light", string=self.db_version)) 39 self.light_db = bool(re.search(pattern="light", string=self.db_version))
42 self.db_version = self.db_version.split(sep="_")[0] 40 self.db_version = self.db_version.split(sep="_")[0]
135 def download(self): 133 def download(self):
136 bakta_path = Path(self.db_dir).joinpath(self.tar_name) 134 bakta_path = Path(self.db_dir).joinpath(self.tar_name)
137 try: 135 try:
138 with bakta_path.open("wb") as fh_out, requests.get( 136 with bakta_path.open("wb") as fh_out, requests.get(
139 self.db_url, stream=True) as resp: 137 self.db_url, stream=True) as resp:
140 total_length = resp.headers.get("content-length") 138 # total_length = resp.headers.get("content-length")
141 if total_length is None: # no content length header 139 for data in resp.iter_content(chunk_size=1024 * 1024):
142 for data in resp.iter_content(chunk_size=1024 * 1024): 140 fh_out.write(data)
143 fh_out.write(data)
144 else:
145 for data in resp.iter_content(chunk_size=1024 * 1024):
146 fh_out.write(data)
147 print(f"Download bakta database {self.db_version}") 141 print(f"Download bakta database {self.db_version}")
148 self.tarball_path = bakta_path 142 self.tarball_path = bakta_path
149 except IOError: 143 except IOError:
150 print( 144 print(
151 f"ERROR: Could not download file from Zenodo!" 145 f"ERROR: Could not download file from Zenodo!"
158 with self.tarball_path.open("rb") as fh_in, tarfile.open( 152 with self.tarball_path.open("rb") as fh_in, tarfile.open(
159 fileobj=fh_in, mode="r:gz" 153 fileobj=fh_in, mode="r:gz"
160 ) as tar_file: 154 ) as tar_file:
161 tar_file.extractall(path=db_path) 155 tar_file.extractall(path=db_path)
162 print(f"Untar the database in {db_path}") 156 print(f"Untar the database in {db_path}")
163
164 if not self.test_mode:
165 self.moove_files(db_path=db_path)
166
167 except OSError: 157 except OSError:
168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") 158 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}")
169 159 if not self.test_mode:
170 def moove_files(self, db_path): 160 self.move_files(db_path=db_path)
171 if os.path.isdir(db_path.joinpath("db-light")): 161 self.db_dir = db_path.resolve()
162
163 def delete_folder(self, path):
164 for sub in path.iterdir():
165 if sub.is_dir() and sub.name != "latest":
166 self.delete_folder(sub)
167 else:
168 sub.unlink()
169 path.rmdir()
170
171 def move_files(self, db_path):
172 if db_path.joinpath("db-light").is_dir():
172 input_dir = db_path.joinpath("db-light") 173 input_dir = db_path.joinpath("db-light")
173 elif os.path.isdir(db_path.joinpath("db")): 174 elif db_path.joinpath("db").is_dir():
174 input_dir = db_path.joinpath("db") 175 input_dir = db_path.joinpath("db")
175 file_list = os.listdir(input_dir)
176 output_dir = db_path 176 output_dir = db_path
177 for file in file_list: 177 for file in input_dir.iterdir():
178 input = input_dir.joinpath(file) 178 if file.is_file(): # to avoid moving amrfinder-plus folder
179 output = output_dir.joinpath(file) 179 output = output_dir.joinpath(file.name)
180 shutil.move(input, output) 180 file.rename(output)
181 self.delete_folder(input_dir)
181 182
182 def calc_md5_sum(self, buffer_size=1048576): 183 def calc_md5_sum(self, buffer_size=1048576):
183 tarball_path = Path(self.db_dir).joinpath(self.tar_name) 184 tarball_path = Path(self.db_dir).joinpath(self.tar_name)
184 md5 = hashlib.md5() 185 md5 = hashlib.md5()
185 with tarball_path.open("rb") as fh: 186 with tarball_path.open("rb") as fh:
221 222
222 def main(): 223 def main():
223 all_args = parse_arguments() 224 all_args = parse_arguments()
224 with open(all_args.data_manager_json) as fh: 225 with open(all_args.data_manager_json) as fh:
225 params = json.load(fh) 226 params = json.load(fh)
226 target_dir = params["output_data"][0]["extra_files_path"] 227 target_dir = Path(params["output_data"][0]["extra_files_path"])
227 os.makedirs(target_dir) 228 target_dir.mkdir(parents=True, exist_ok=True)
228 # init the class to download bakta db 229 # init the class to download bakta db
229 bakta_upload = InstallBaktaDatabase( 230 bakta_upload = InstallBaktaDatabase(
230 test_mode=all_args.test, db_version=all_args.database_version 231 test_mode=all_args.test, db_version=all_args.database_version
231 ) 232 )
232 bakta_db = bakta_upload.fetch_db_versions() 233 bakta_db = bakta_upload.fetch_db_versions()
233 # update the path for galaxy 234 # update the path for galaxy
234 bakta_upload.db_dir = target_dir 235 bakta_upload.db_dir = target_dir.absolute()
235 # download the database 236 # download the database
236 bakta_upload.download() 237 bakta_upload.download()
237 # check md5 sum 238 # check md5 sum
238 bakta_upload.calc_md5_sum() 239 bakta_upload.calc_md5_sum()
239 # untar db 240 # untar db