Mercurial > repos > bgruening > get_online_data
comparison get_online_data.py @ 0:deb08c131d50 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb
| author | bgruening |
|---|---|
| date | Wed, 22 May 2019 07:42:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:deb08c131d50 |
|---|---|
| 1 import os | |
| 2 import urllib.request | |
| 3 import gzip, tempfile | |
| 4 import zipfile | |
| 5 import subprocess | |
| 6 import shutil | |
| 7 import argparse | |
| 8 from io import BytesIO | |
| 9 | |
| 10 def unescape(cond_text): | |
| 11 # Unescape if input has been escaped | |
| 12 mapped_chars = { '>' :'__gt__', | |
| 13 '<' :'__lt__', | |
| 14 "'" :'__sq__', | |
| 15 '"' :'__dq__', | |
| 16 '[' :'__ob__', | |
| 17 ']' :'__cb__', | |
| 18 '{' :'__oc__', | |
| 19 '}' :'__cc__', | |
| 20 '@' : '__at__', | |
| 21 '\n' : '__cn__', | |
| 22 '\r' : '__cr__', | |
| 23 '\t' : '__tc__' | |
| 24 } | |
| 25 for key, value in mapped_chars.items(): | |
| 26 cond_text = cond_text.replace( value, key ) | |
| 27 return cond_text | |
| 28 | |
| 29 def get_files(options): | |
| 30 urls = unescape(options.url) | |
| 31 with open(options.out, 'wb+') as out: | |
| 32 if options.whitelist: | |
| 33 allowed_extensions = [ext.strip() for ext in unescape(options.whitelist).split('\n')] | |
| 34 else: | |
| 35 allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol'] | |
| 36 | |
| 37 for url in urls.split('\n'): | |
| 38 request = urllib.request.Request(url) | |
| 39 response = urllib.request.urlopen(request) | |
| 40 resp_read = response.read() | |
| 41 if resp_read[:2] == b'\x1f\x8b': # test magic number for gzipped files | |
| 42 response = urllib.request.urlopen(request) | |
| 43 out.write(gzip.decompress(resp_read)) | |
| 44 elif resp_read[:2] == b'PK': # test magic number for zipped files | |
| 45 temp = tempfile.NamedTemporaryFile(delete=False) | |
| 46 temp.close() | |
| 47 zf = zipfile.ZipFile(BytesIO(resp_read), allowZip64=True) | |
| 48 tmpdir = tempfile.mkdtemp() | |
| 49 | |
| 50 for filename in zf.namelist(): | |
| 51 zf.extractall(tmpdir) | |
| 52 | |
| 53 os.remove(temp.name) | |
| 54 molfiles = [] | |
| 55 for root, dirs, files in os.walk(tmpdir): | |
| 56 for filename in files: | |
| 57 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []: | |
| 58 mfile = os.path.join(root, filename) | |
| 59 shutil.copyfileobj(open(mfile, 'rb'), out) | |
| 60 shutil.rmtree( tmpdir ) | |
| 61 zf.close() | |
| 62 else: | |
| 63 out.write(resp_read) | |
| 64 | |
| 65 if __name__ == "__main__": | |
| 66 parser = argparse.ArgumentParser(description="""Download compressed files and extract files of with chosen extensions | |
| 67 """) | |
| 68 parser.add_argument('--url', dest='url', help='URL') | |
| 69 parser.add_argument('--whitelist', dest='whitelist', default=None, help='whitelist') | |
| 70 parser.add_argument('--out', dest='out', help='output') | |
| 71 | |
| 72 options = parser.parse_args() | |
| 73 get_files(options) |
