Mercurial > repos > iuc > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 5:ba7b35caf55c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit e0ce56ac52cff0e8f85e546440d28ca46853b11d
| author | iuc |
|---|---|
| date | Thu, 20 Jun 2024 19:13:47 +0000 |
| parents | b71b550553b2 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:b71b550553b2 | 5:ba7b35caf55c |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import hashlib | 2 import hashlib |
| 3 import json | 3 import json |
| 4 import os | |
| 5 import re | 4 import re |
| 6 import shutil | |
| 7 import sys | 5 import sys |
| 8 import tarfile | 6 import tarfile |
| 9 from datetime import datetime | 7 from datetime import datetime |
| 10 from pathlib import Path | 8 from pathlib import Path |
| 11 | 9 |
| 32 self.data_table_entry = None | 30 self.data_table_entry = None |
| 33 self.data_table_name = data_table_name | 31 self.data_table_name = data_table_name |
| 34 self.tar_name = tarball_name | 32 self.tar_name = tarball_name |
| 35 self.db_version = db_version | 33 self.db_version = db_version |
| 36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" | 34 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" |
| 37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" | 35 self.DB_TEST_URL = "https://zenodo.org/record/11381156/files/db-versions.json" |
| 38 self.test_mode = test_mode | 36 self.test_mode = test_mode |
| 39 | 37 |
| 40 def get_database_type(self): | 38 def get_database_type(self): |
| 41 self.light_db = bool(re.search(pattern="light", string=self.db_version)) | 39 self.light_db = bool(re.search(pattern="light", string=self.db_version)) |
| 42 self.db_version = self.db_version.split(sep="_")[0] | 40 self.db_version = self.db_version.split(sep="_")[0] |
| 135 def download(self): | 133 def download(self): |
| 136 bakta_path = Path(self.db_dir).joinpath(self.tar_name) | 134 bakta_path = Path(self.db_dir).joinpath(self.tar_name) |
| 137 try: | 135 try: |
| 138 with bakta_path.open("wb") as fh_out, requests.get( | 136 with bakta_path.open("wb") as fh_out, requests.get( |
| 139 self.db_url, stream=True) as resp: | 137 self.db_url, stream=True) as resp: |
| 140 total_length = resp.headers.get("content-length") | 138 # total_length = resp.headers.get("content-length") |
| 141 if total_length is None: # no content length header | 139 for data in resp.iter_content(chunk_size=1024 * 1024): |
| 142 for data in resp.iter_content(chunk_size=1024 * 1024): | 140 fh_out.write(data) |
| 143 fh_out.write(data) | |
| 144 else: | |
| 145 for data in resp.iter_content(chunk_size=1024 * 1024): | |
| 146 fh_out.write(data) | |
| 147 print(f"Download bakta database {self.db_version}") | 141 print(f"Download bakta database {self.db_version}") |
| 148 self.tarball_path = bakta_path | 142 self.tarball_path = bakta_path |
| 149 except IOError: | 143 except IOError: |
| 150 print( | 144 print( |
| 151 f"ERROR: Could not download file from Zenodo!" | 145 f"ERROR: Could not download file from Zenodo!" |
| 158 with self.tarball_path.open("rb") as fh_in, tarfile.open( | 152 with self.tarball_path.open("rb") as fh_in, tarfile.open( |
| 159 fileobj=fh_in, mode="r:gz" | 153 fileobj=fh_in, mode="r:gz" |
| 160 ) as tar_file: | 154 ) as tar_file: |
| 161 tar_file.extractall(path=db_path) | 155 tar_file.extractall(path=db_path) |
| 162 print(f"Untar the database in {db_path}") | 156 print(f"Untar the database in {db_path}") |
| 163 | |
| 164 if not self.test_mode: | |
| 165 self.moove_files(db_path=db_path) | |
| 166 | |
| 167 except OSError: | 157 except OSError: |
| 168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") | 158 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") |
| 169 | 159 if not self.test_mode: |
| 170 def moove_files(self, db_path): | 160 self.move_files(db_path=db_path) |
| 171 if os.path.isdir(db_path.joinpath("db-light")): | 161 self.db_dir = db_path.resolve() |
| 162 | |
| 163 def delete_folder(self, path): | |
| 164 for sub in path.iterdir(): | |
| 165 if sub.is_dir() and sub.name != "latest": | |
| 166 self.delete_folder(sub) | |
| 167 else: | |
| 168 sub.unlink() | |
| 169 path.rmdir() | |
| 170 | |
| 171 def move_files(self, db_path): | |
| 172 if db_path.joinpath("db-light").is_dir(): | |
| 172 input_dir = db_path.joinpath("db-light") | 173 input_dir = db_path.joinpath("db-light") |
| 173 elif os.path.isdir(db_path.joinpath("db")): | 174 elif db_path.joinpath("db").is_dir(): |
| 174 input_dir = db_path.joinpath("db") | 175 input_dir = db_path.joinpath("db") |
| 175 file_list = os.listdir(input_dir) | |
| 176 output_dir = db_path | 176 output_dir = db_path |
| 177 for file in file_list: | 177 for file in input_dir.iterdir(): |
| 178 input = input_dir.joinpath(file) | 178 if file.is_file(): # to avoid moving amrfinder-plus folder |
| 179 output = output_dir.joinpath(file) | 179 output = output_dir.joinpath(file.name) |
| 180 shutil.move(input, output) | 180 file.rename(output) |
| 181 self.delete_folder(input_dir) | |
| 181 | 182 |
| 182 def calc_md5_sum(self, buffer_size=1048576): | 183 def calc_md5_sum(self, buffer_size=1048576): |
| 183 tarball_path = Path(self.db_dir).joinpath(self.tar_name) | 184 tarball_path = Path(self.db_dir).joinpath(self.tar_name) |
| 184 md5 = hashlib.md5() | 185 md5 = hashlib.md5() |
| 185 with tarball_path.open("rb") as fh: | 186 with tarball_path.open("rb") as fh: |
| 221 | 222 |
| 222 def main(): | 223 def main(): |
| 223 all_args = parse_arguments() | 224 all_args = parse_arguments() |
| 224 with open(all_args.data_manager_json) as fh: | 225 with open(all_args.data_manager_json) as fh: |
| 225 params = json.load(fh) | 226 params = json.load(fh) |
| 226 target_dir = params["output_data"][0]["extra_files_path"] | 227 target_dir = Path(params["output_data"][0]["extra_files_path"]) |
| 227 os.makedirs(target_dir) | 228 target_dir.mkdir(parents=True, exist_ok=True) |
| 228 # init the class to download bakta db | 229 # init the class to download bakta db |
| 229 bakta_upload = InstallBaktaDatabase( | 230 bakta_upload = InstallBaktaDatabase( |
| 230 test_mode=all_args.test, db_version=all_args.database_version | 231 test_mode=all_args.test, db_version=all_args.database_version |
| 231 ) | 232 ) |
| 232 bakta_db = bakta_upload.fetch_db_versions() | 233 bakta_db = bakta_upload.fetch_db_versions() |
| 233 # update the path for galaxy | 234 # update the path for galaxy |
| 234 bakta_upload.db_dir = target_dir | 235 bakta_upload.db_dir = target_dir.absolute() |
| 235 # download the database | 236 # download the database |
| 236 bakta_upload.download() | 237 bakta_upload.download() |
| 237 # check md5 sum | 238 # check md5 sum |
| 238 bakta_upload.calc_md5_sum() | 239 bakta_upload.calc_md5_sum() |
| 239 # untar db | 240 # untar db |
