Mercurial > repos > tduigou > data_manager_cache
changeset 1:35c33747b9e3 draft
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
| author | tduigou |
|---|---|
| date | Tue, 05 Jul 2022 12:57:38 +0000 |
| parents | e0b92d203870 |
| children | 1822b676bf47 |
| files | data_manager/cache_fetcher.py data_manager/cache_fetcher.xml test-data/cache.json |
| diffstat | 3 files changed, 45 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/cache_fetcher.py Mon Jul 04 13:28:30 2022 +0000 +++ b/data_manager/cache_fetcher.py Tue Jul 05 12:57:38 2022 +0000 @@ -10,6 +10,7 @@ ) import argparse +import json def download( url: str, @@ -36,18 +37,43 @@ f.close() return file +def download_entries(url, filename, workdir): + full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz")) + + download(url+filename +".json.gz", full_filename) + data_manager_entry = {} + data_manager_entry["value"] = filename + data_manager_entry["name"] = filename + data_manager_entry["path"] = full_filename + + # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access) + #if ix % 5 == 0: + # time.sleep(1) + yield data_manager_entry + + parser = argparse.ArgumentParser(description="Download a cache file") -parser.add_argument('-u','--url', required=True, default=None, type=str, help="URL the file is downloaded from") +parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download") parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") args = parser.parse_args() -url= args.url #"https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" -filename= os_path.basename(args.outfile) #"cid_strc.json.gz" -cache_dir=os_path.dirname(args.outfile) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/' -full_filename=os_path.join(cache_dir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' +url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" +filename= args.filename + + +data_manager_json = {"data_tables": {}} +with open(args.outfile) as fh: + params = json.load(fh) -if not os_path.isdir(cache_dir): - os_mkdir(cache_dir) +workdir = params["output_data"][0]["extra_files_path"] +os_mkdir(workdir) + +#full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' + -download(url+filename, full_filename) \ No newline at end of file +entries = list(download_entries(url, filename, workdir)) + +data_manager_json["data_tables"]["cache"] = entries +with open(args.outfile, "w") as fh: + json.dump(data_manager_json, fh, sort_keys=True) \ No newline at end of file
--- a/data_manager/cache_fetcher.xml Mon Jul 04 13:28:30 2022 +0000 +++ b/data_manager/cache_fetcher.xml Tue Jul 05 12:57:38 2022 +0000 @@ -3,23 +3,29 @@ <description>Download cache file</description> <requirements> <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="2.28.1">requests</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/cache_fetcher.py' --outfile '$out_file' - --url '$url' + --filename '$filename' ]]> </command> <inputs> - <param name="url" type="text" value="https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" label="URL to download the cache file" optional="False" /> + <param name="filename" type="text" value="cid_strc" label="cache file name to download" optional="False" /> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> </outputs> <tests> <test> - <param name="url" value="https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"/> - <output name="out_file" value="cid_strc.json.gz" compare="contains"/> + <param name="filename" value="cid_strc"/> + <output name="out_file"> + <assert_contents> + <has_text text="cid_strc"/> + <has_text text="cid_strc.json.gz"/> + </assert_contents> + </output> </test> </tests> <help><