changeset 1:35c33747b9e3 draft

"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
author tduigou
date Tue, 05 Jul 2022 12:57:38 +0000
parents e0b92d203870
children 1822b676bf47
files data_manager/cache_fetcher.py data_manager/cache_fetcher.xml test-data/cache.json
diffstat 3 files changed, 45 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/cache_fetcher.py	Mon Jul 04 13:28:30 2022 +0000
+++ b/data_manager/cache_fetcher.py	Tue Jul 05 12:57:38 2022 +0000
@@ -10,6 +10,7 @@
 )
 
 import argparse
+import json
 
 def download(
     url: str,
@@ -36,18 +37,43 @@
     f.close()
     return file
 
+def download_entries(url, filename, workdir):
+    full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz"))
+
+    download(url+filename +".json.gz", full_filename)
+    data_manager_entry = {}
+    data_manager_entry["value"] = filename
+    data_manager_entry["name"] = filename
+    data_manager_entry["path"] = full_filename
+
+    # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access)
+    #if ix % 5 == 0:
+    #    time.sleep(1)
+    yield data_manager_entry
+
+
 parser = argparse.ArgumentParser(description="Download a cache file")
-parser.add_argument('-u','--url', required=True, default=None, type=str, help="URL the file is downloaded from")
+parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download")
 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into")
 
 args = parser.parse_args()
 
-url= args.url #"https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"
-filename= os_path.basename(args.outfile) #"cid_strc.json.gz"
-cache_dir=os_path.dirname(args.outfile) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/'
-full_filename=os_path.join(cache_dir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'
+url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"
+filename= args.filename
+
+
+data_manager_json = {"data_tables": {}}
+with open(args.outfile) as fh:
+    params = json.load(fh)
 
-if not os_path.isdir(cache_dir):
-    os_mkdir(cache_dir)
+workdir = params["output_data"][0]["extra_files_path"]
+os_mkdir(workdir)
+
+#full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'
+
 
-download(url+filename, full_filename)
\ No newline at end of file
+entries = list(download_entries(url, filename, workdir))
+
+data_manager_json["data_tables"]["cache"] = entries
+with open(args.outfile, "w") as fh:
+    json.dump(data_manager_json, fh, sort_keys=True)
\ No newline at end of file
--- a/data_manager/cache_fetcher.xml	Mon Jul 04 13:28:30 2022 +0000
+++ b/data_manager/cache_fetcher.xml	Tue Jul 05 12:57:38 2022 +0000
@@ -3,23 +3,29 @@
     <description>Download cache file</description>
     <requirements>
         <requirement type="package" version="3.7">python</requirement>
+        <requirement type="package" version="2.28.1">requests</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
     python '$__tool_directory__/cache_fetcher.py'
     --outfile '$out_file'
-    --url '$url'
+    --filename '$filename'
     ]]>
     </command>
     <inputs>
-        <param name="url" type="text" value="https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" label="URL to download the cache file" optional="False" />
+        <param name="filename" type="text" value="cid_strc" label="cache file name to download" optional="False" />
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />
     </outputs>
     <tests>
         <test>
-            <param name="url" value="https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"/>
-            <output name="out_file" value="cid_strc.json.gz" compare="contains"/>
+            <param name="filename" value="cid_strc"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="cid_strc"/>
+                    <has_text text="cid_strc.json.gz"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cache.json	Tue Jul 05 12:57:38 2022 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"cache": [{"name": "cid_strc", "path": "cid_strc.json.gz", "value": "cid_strc"}]}}
\ No newline at end of file