annotate get_online_data.py @ 0:deb08c131d50 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
author bgruening
date Wed, 22 May 2019 07:42:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
1 import os
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
2 import urllib.request
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
3 import gzip, tempfile
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
4 import zipfile
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
5 import subprocess
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
6 import shutil
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
7 import argparse
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
8 from io import BytesIO
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
9
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
10 def unescape(cond_text):
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
11 # Unescape if input has been escaped
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
12 mapped_chars = { '>' :'__gt__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
13 '<' :'__lt__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
14 "'" :'__sq__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
15 '"' :'__dq__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
16 '[' :'__ob__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
17 ']' :'__cb__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
18 '{' :'__oc__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
19 '}' :'__cc__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
20 '@' : '__at__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
21 '\n' : '__cn__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
22 '\r' : '__cr__',
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
23 '\t' : '__tc__'
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
24 }
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
25 for key, value in mapped_chars.items():
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
26 cond_text = cond_text.replace( value, key )
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
27 return cond_text
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
28
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
29 def get_files(options):
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
30 urls = unescape(options.url)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
31 with open(options.out, 'wb+') as out:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
32 if options.whitelist:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
33 allowed_extensions = [ext.strip() for ext in unescape(options.whitelist).split('\n')]
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
34 else:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
35 allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol']
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
36
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
37 for url in urls.split('\n'):
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
38 request = urllib.request.Request(url)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
39 response = urllib.request.urlopen(request)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
40 resp_read = response.read()
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
41 if resp_read[:2] == b'\x1f\x8b': # test magic number for gzipped files
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
42 response = urllib.request.urlopen(request)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
43 out.write(gzip.decompress(resp_read))
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
44 elif resp_read[:2] == b'PK': # test magic number for zipped files
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
45 temp = tempfile.NamedTemporaryFile(delete=False)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
46 temp.close()
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
47 zf = zipfile.ZipFile(BytesIO(resp_read), allowZip64=True)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
48 tmpdir = tempfile.mkdtemp()
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
49
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
50 for filename in zf.namelist():
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
51 zf.extractall(tmpdir)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
52
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
53 os.remove(temp.name)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
54 molfiles = []
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
55 for root, dirs, files in os.walk(tmpdir):
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
56 for filename in files:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
57 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
58 mfile = os.path.join(root, filename)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
59 shutil.copyfileobj(open(mfile, 'rb'), out)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
60 shutil.rmtree( tmpdir )
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
61 zf.close()
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
62 else:
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
63 out.write(resp_read)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
64
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
65 if __name__ == "__main__":
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
66 parser = argparse.ArgumentParser(description="""Download compressed files and extract files of with chosen extensions
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
67 """)
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
68 parser.add_argument('--url', dest='url', help='URL')
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
69 parser.add_argument('--whitelist', dest='whitelist', default=None, help='whitelist')
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
70 parser.add_argument('--out', dest='out', help='output')
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
71
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
72 options = parser.parse_args()
deb08c131d50 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
73 get_files(options)