comparison data_manager/gtdbtk_database_installer.py @ 1:7093598fa300 draft

Uploaded
author greg
date Tue, 15 Mar 2022 19:31:23 +0000
parents 3ab83cb7e2d2
children e4fe7259c4e9
comparison
equal deleted inserted replaced
0:3ab83cb7e2d2 1:7093598fa300
7 import tarfile 7 import tarfile
8 from urllib.request import Request, urlopen 8 from urllib.request import Request, urlopen
9 from urllib.parse import urlparse 9 from urllib.parse import urlparse
10 10
11 11
12 def url_download(url, work_dir): 12 def url_download(url, target_directory):
13 url_parts = urlparse(url) 13 url_parts = urlparse(url)
14 file_path = os.path.abspath(os.path.join(work_dir, os.path.basename(url_parts.path))) 14 tarball = os.path.abspath(os.path.join(target_directory, os.path.basename(url_parts.path)))
15 src = None 15 src = None
16 dst = None 16 dst = None
17 try: 17 try:
18 req = Request(url) 18 req = Request(url)
19 src = urlopen(req) 19 src = urlopen(req)
20 with open(file_path, 'wb') as dst: 20 with open(tarball, 'wb') as dst:
21 while True: 21 while True:
22 chunk = src.read(2**10) 22 chunk = src.read(2**10)
23 if chunk: 23 if chunk:
24 dst.write(chunk) 24 dst.write(chunk)
25 else: 25 else:
26 break 26 break
27 except Exception as e: 27 except Exception as e:
28 sys.exit(str(e)) 28 sys.exit(str(e))
29 finally: 29 finally:
30 if src: 30 if src is not None:
31 src.close() 31 src.close()
32 if tarfile.is_tarfile(file_path): 32 if tarfile.is_tarfile(tarball):
33 fh = tarfile.open(file_path, 'r:*') 33 fh = tarfile.open(tarball, 'r:*')
34 else: 34 else:
35 return file_path 35 return tarball
36 fh.extractall(work_dir) 36 fh.extractall(target_directory)
37 os.remove(file_path) 37 fh.close()
38 return work_dir 38 os.remove(tarball)
39 return target_directory
39 40
40 41
41 def download(database_id, database_name, url, out_file): 42 def download(database_id, database_name, url, out_file):
42 43
43 with open(out_file) as fh: 44 with open(out_file) as fh:
44 params = json.load(fh) 45 params = json.load(fh)
45 46
46 work_dir = params['output_data'][0]['extra_files_path'] 47 target_directory = params['output_data'][0]['extra_files_path']
47 os.makedirs(work_dir) 48 os.makedirs(target_directory)
48 file_path = url_download(url, work_dir) 49 file_path = url_download(url, target_directory)
49 50
50 data_manager_json = {"data_tables": {}} 51 data_manager_json = {"data_tables": {}}
51 data_manager_entry = {} 52 data_manager_entry = {}
52 data_manager_entry['value'] = database_id 53 data_manager_entry['value'] = database_id
53 data_manager_entry['name'] = database_name 54 data_manager_entry['name'] = database_name
54 data_manager_entry['path'] = file_path 55 data_manager_entry['db_path'] = file_path
55 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry 56 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry
56 57
57 with open(out_file, 'w') as fh: 58 with open(out_file, 'w') as fh:
58 json.dump(data_manager_json, fh, sort_keys=True) 59 json.dump(data_manager_json, fh, sort_keys=True)
59 60