Mercurial > repos > greg > data_manager_gtdbtk_database_installer
comparison data_manager/gtdbtk_database_installer.py @ 1:7093598fa300 draft
Uploaded
author | greg |
---|---|
date | Tue, 15 Mar 2022 19:31:23 +0000 |
parents | 3ab83cb7e2d2 |
children | e4fe7259c4e9 |
comparison
equal
deleted
inserted
replaced
0:3ab83cb7e2d2 | 1:7093598fa300 |
---|---|
7 import tarfile | 7 import tarfile |
8 from urllib.request import Request, urlopen | 8 from urllib.request import Request, urlopen |
9 from urllib.parse import urlparse | 9 from urllib.parse import urlparse |
10 | 10 |
11 | 11 |
12 def url_download(url, work_dir): | 12 def url_download(url, target_directory): |
13 url_parts = urlparse(url) | 13 url_parts = urlparse(url) |
14 file_path = os.path.abspath(os.path.join(work_dir, os.path.basename(url_parts.path))) | 14 tarball = os.path.abspath(os.path.join(target_directory, os.path.basename(url_parts.path))) |
15 src = None | 15 src = None |
16 dst = None | 16 dst = None |
17 try: | 17 try: |
18 req = Request(url) | 18 req = Request(url) |
19 src = urlopen(req) | 19 src = urlopen(req) |
20 with open(file_path, 'wb') as dst: | 20 with open(tarball, 'wb') as dst: |
21 while True: | 21 while True: |
22 chunk = src.read(2**10) | 22 chunk = src.read(2**10) |
23 if chunk: | 23 if chunk: |
24 dst.write(chunk) | 24 dst.write(chunk) |
25 else: | 25 else: |
26 break | 26 break |
27 except Exception as e: | 27 except Exception as e: |
28 sys.exit(str(e)) | 28 sys.exit(str(e)) |
29 finally: | 29 finally: |
30 if src: | 30 if src is not None: |
31 src.close() | 31 src.close() |
32 if tarfile.is_tarfile(file_path): | 32 if tarfile.is_tarfile(tarball): |
33 fh = tarfile.open(file_path, 'r:*') | 33 fh = tarfile.open(tarball, 'r:*') |
34 else: | 34 else: |
35 return file_path | 35 return tarball |
36 fh.extractall(work_dir) | 36 fh.extractall(target_directory) |
37 os.remove(file_path) | 37 fh.close() |
38 return work_dir | 38 os.remove(tarball) |
39 return target_directory | |
39 | 40 |
40 | 41 |
41 def download(database_id, database_name, url, out_file): | 42 def download(database_id, database_name, url, out_file): |
42 | 43 |
43 with open(out_file) as fh: | 44 with open(out_file) as fh: |
44 params = json.load(fh) | 45 params = json.load(fh) |
45 | 46 |
46 work_dir = params['output_data'][0]['extra_files_path'] | 47 target_directory = params['output_data'][0]['extra_files_path'] |
47 os.makedirs(work_dir) | 48 os.makedirs(target_directory) |
48 file_path = url_download(url, work_dir) | 49 file_path = url_download(url, target_directory) |
49 | 50 |
50 data_manager_json = {"data_tables": {}} | 51 data_manager_json = {"data_tables": {}} |
51 data_manager_entry = {} | 52 data_manager_entry = {} |
52 data_manager_entry['value'] = database_id | 53 data_manager_entry['value'] = database_id |
53 data_manager_entry['name'] = database_name | 54 data_manager_entry['name'] = database_name |
54 data_manager_entry['path'] = file_path | 55 data_manager_entry['db_path'] = file_path |
55 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry | 56 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry |
56 | 57 |
57 with open(out_file, 'w') as fh: | 58 with open(out_file, 'w') as fh: |
58 json.dump(data_manager_json, fh, sort_keys=True) | 59 json.dump(data_manager_json, fh, sort_keys=True) |
59 | 60 |