annotate data_manager/data_manager_cat.py @ 1:5efcedfa6157 draft default tip

"planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author iuc
date Sun, 22 Nov 2020 12:43:10 +0000
parents 3db8d67192d2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
2 from __future__ import print_function
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
3
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
4 import argparse
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
5 import json
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
6 import os.path
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
7 import subprocess
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
8 import sys
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
9 import tarfile
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
10 import tempfile
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
11 import zipfile
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
12 try:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
13 # For Python 3.0 and later
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
14 from urllib.request import urlopen
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
15 except ImportError:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
16 # Fall back to Python 2 imports
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
17 from urllib2 import urlopen
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
18
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
19
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
20 def url_download(url, workdir):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
21 file_path = os.path.join(workdir, 'download.dat')
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
22 src = None
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
23 dst = None
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
24 try:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
25 src = urlopen(url)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
26 with open(file_path, 'wb') as dst:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
27 while True:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
28 chunk = src.read(2**10)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
29 if chunk:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
30 dst.write(chunk)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
31 else:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
32 break
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
33 finally:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
34 if src:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
35 src.close()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
36 if tarfile.is_tarfile(file_path):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
37 fh = tarfile.open(file_path, 'r:*')
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
38 elif zipfile.is_zipfile(file_path):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
39 fh = zipfile.ZipFile(file_path, 'r')
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
40 else:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
41 return
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
42 fh.extractall(workdir)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
43 os.remove(file_path)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
44
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
45
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
46 def cat_prepare(install_dir, db_dir=None, tax_dir=None):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
47 if db_dir and tax_dir:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
48 cmd = ['CAT', 'prepare', '--existing', '-d', db_dir, '-t', tax_dir]
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
49 else:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
50 cmd = ['CAT', 'prepare', '--fresh', '-q']
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
51 cmd_stdout = tempfile.NamedTemporaryFile()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
52 cmd_stderr = tempfile.NamedTemporaryFile()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
53 return_code = subprocess.call(cmd, shell=False, cwd=install_dir,
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
54 stdout=cmd_stdout, stderr=cmd_stderr)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
55 if return_code:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
56 msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(),
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
57 cmd_stderr.read())
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
58 cmd_stdout.close()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
59 cmd_stderr.close()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
60 raise Exception('Error: (%s), returncode=%s %s'
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
61 % (' '.join(cmd), return_code, msg))
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
62
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
63
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
64 def main():
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
65 parser = argparse.ArgumentParser()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
66 parser.add_argument('--config_file', required=True)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
67 parser.add_argument('--install_path', default=None)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
68 parser.add_argument('--db_url', default=None)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
69 parser.add_argument('--database_folder', default=None)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
70 parser.add_argument('--taxonomy_folder', default=None)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
71 args = parser.parse_args()
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
72
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
73 cat_path = None
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
74 cat_db = None
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
75 tax_db = None
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
76 if args.database_folder and args.taxonomy_folder:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
77 cat_path = os.path.dirname(args.database_folder)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
78 cat_db = os.path.basename(args.database_folder)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
79 tax_db = os.path.basename(args.taxonomy_folder)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
80 cat_prepare(os.getcwd(),
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
81 db_dir=args.database_folder,
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
82 tax_dir=args.taxonomy_folder)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
83 elif not args.install_path:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
84 sys.exit(1)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
85 else:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
86 if not os.path.exists(args.install_path):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
87 os.makedirs(args.install_path)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
88 if args.db_url:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
89 url_download(args.db_url, args.install_path)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
90 else:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
91 cat_prepare(args.install_path)
1
5efcedfa6157 "planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 0
diff changeset
92 for root, dirs, _ in os.walk(args.install_path):
0
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
93 for dname in dirs:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
94 if dname.endswith('CAT_database'):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
95 cat_db = dname
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
96 elif dname.endswith('taxonomy'):
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
97 tax_db = dname
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
98 if cat_db and tax_db:
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
99 cat_path = root
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
100 break
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
101 cat_dir = os.path.basename(cat_path)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
102 dm_dict = {}
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
103 dm_dict['data_tables'] = dm_dict.get('data_tables', {})
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
104 data_table = 'cat_database'
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
105 dm_dict['data_tables'][data_table]\
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
106 = dm_dict['data_tables'].get(data_table, [])
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
107 data_table_entry = dict(value=cat_dir, name=cat_dir,
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
108 database_folder=os.path.join(cat_dir, cat_db),
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
109 taxonomy_folder=os.path.join(cat_dir, tax_db))
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
110 dm_dict['data_tables'][data_table].append(data_table_entry)
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
111 # save info to json file
1
5efcedfa6157 "planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 0
diff changeset
112 with open(args.config_file, 'w') as fh:
5efcedfa6157 "planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 0
diff changeset
113 json.dump(dm_dict, fh, sort_keys=True)
0
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
114
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
115
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
116 if __name__ == "__main__":
3db8d67192d2 "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
117 main()