comparison data_manager/cache_fetcher.py @ 1:35c33747b9e3 draft

"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
author tduigou
date Tue, 05 Jul 2022 12:57:38 +0000
parents e0b92d203870
children
comparison
equal deleted inserted replaced
0:e0b92d203870 1:35c33747b9e3
8 from tempfile import ( 8 from tempfile import (
9 NamedTemporaryFile 9 NamedTemporaryFile
10 ) 10 )
11 11
12 import argparse 12 import argparse
13 import json
13 14
14 def download( 15 def download(
15 url: str, 16 url: str,
16 file: str = "" 17 file: str = ""
17 ) -> str: 18 ) -> str:
34 f = open(file, 'wb') 35 f = open(file, 'wb')
35 f.write(r.content) 36 f.write(r.content)
36 f.close() 37 f.close()
37 return file 38 return file
38 39
40 def download_entries(url, filename, workdir):
41 full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz"))
42
43 download(url+filename +".json.gz", full_filename)
44 data_manager_entry = {}
45 data_manager_entry["value"] = filename
46 data_manager_entry["name"] = filename
47 data_manager_entry["path"] = full_filename
48
49 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access)
50 #if ix % 5 == 0:
51 # time.sleep(1)
52 yield data_manager_entry
53
54
39 parser = argparse.ArgumentParser(description="Download a cache file") 55 parser = argparse.ArgumentParser(description="Download a cache file")
40 parser.add_argument('-u','--url', required=True, default=None, type=str, help="URL the file is downloaded from") 56 parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download")
41 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") 57 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into")
42 58
43 args = parser.parse_args() 59 args = parser.parse_args()
44 60
45 url= args.url #"https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" 61 url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"
46 filename= os_path.basename(args.outfile) #"cid_strc.json.gz" 62 filename= args.filename
47 cache_dir=os_path.dirname(args.outfile) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/'
48 full_filename=os_path.join(cache_dir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'
49 63
50 if not os_path.isdir(cache_dir):
51 os_mkdir(cache_dir)
52 64
53 download(url+filename, full_filename) 65 data_manager_json = {"data_tables": {}}
66 with open(args.outfile) as fh:
67 params = json.load(fh)
68
69 workdir = params["output_data"][0]["extra_files_path"]
70 os_mkdir(workdir)
71
72 #full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'
73
74
75 entries = list(download_entries(url, filename, workdir))
76
77 data_manager_json["data_tables"]["cache"] = entries
78 with open(args.outfile, "w") as fh:
79 json.dump(data_manager_json, fh, sort_keys=True)