Mercurial > repos > tduigou > data_manager_cache
comparison data_manager/cache_fetcher.py @ 1:35c33747b9e3 draft
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
| author | tduigou |
|---|---|
| date | Tue, 05 Jul 2022 12:57:38 +0000 |
| parents | e0b92d203870 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:e0b92d203870 | 1:35c33747b9e3 |
|---|---|
| 8 from tempfile import ( | 8 from tempfile import ( |
| 9 NamedTemporaryFile | 9 NamedTemporaryFile |
| 10 ) | 10 ) |
| 11 | 11 |
| 12 import argparse | 12 import argparse |
| 13 import json | |
| 13 | 14 |
| 14 def download( | 15 def download( |
| 15 url: str, | 16 url: str, |
| 16 file: str = "" | 17 file: str = "" |
| 17 ) -> str: | 18 ) -> str: |
| 34 f = open(file, 'wb') | 35 f = open(file, 'wb') |
| 35 f.write(r.content) | 36 f.write(r.content) |
| 36 f.close() | 37 f.close() |
| 37 return file | 38 return file |
| 38 | 39 |
| 40 def download_entries(url, filename, workdir): | |
| 41 full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz")) | |
| 42 | |
| 43 download(url+filename +".json.gz", full_filename) | |
| 44 data_manager_entry = {} | |
| 45 data_manager_entry["value"] = filename | |
| 46 data_manager_entry["name"] = filename | |
| 47 data_manager_entry["path"] = full_filename | |
| 48 | |
| 49 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access) | |
| 50 #if ix % 5 == 0: | |
| 51 # time.sleep(1) | |
| 52 yield data_manager_entry | |
| 53 | |
| 54 | |
| 39 parser = argparse.ArgumentParser(description="Download a cache file") | 55 parser = argparse.ArgumentParser(description="Download a cache file") |
| 40 parser.add_argument('-u','--url', required=True, default=None, type=str, help="URL the file is downloaded from") | 56 parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download") |
| 41 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") | 57 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") |
| 42 | 58 |
| 43 args = parser.parse_args() | 59 args = parser.parse_args() |
| 44 | 60 |
| 45 url= args.url #"https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" | 61 url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" |
| 46 filename= os_path.basename(args.outfile) #"cid_strc.json.gz" | 62 filename= args.filename |
| 47 cache_dir=os_path.dirname(args.outfile) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/' | |
| 48 full_filename=os_path.join(cache_dir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' | |
| 49 | 63 |
| 50 if not os_path.isdir(cache_dir): | |
| 51 os_mkdir(cache_dir) | |
| 52 | 64 |
| 53 download(url+filename, full_filename) | 65 data_manager_json = {"data_tables": {}} |
| 66 with open(args.outfile) as fh: | |
| 67 params = json.load(fh) | |
| 68 | |
| 69 workdir = params["output_data"][0]["extra_files_path"] | |
| 70 os_mkdir(workdir) | |
| 71 | |
| 72 #full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' | |
| 73 | |
| 74 | |
| 75 entries = list(download_entries(url, filename, workdir)) | |
| 76 | |
| 77 data_manager_json["data_tables"]["cache"] = entries | |
| 78 with open(args.outfile, "w") as fh: | |
| 79 json.dump(data_manager_json, fh, sort_keys=True) |
