Mercurial > repos > iuc > data_manager_omamer
diff data_manager/omamer.py @ 0:d28438704310 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_omamer commit b890cd87ef5cfaa81eda29cc935de224ecb05bb6
| author | iuc |
|---|---|
| date | Wed, 14 Aug 2024 15:42:38 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/omamer.py Wed Aug 14 15:42:38 2024 +0000 @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import sys + +import requests + +# URL for downloading OMAmer datasets +OMAMER_DATASETS_URL = "https://omabrowser.org/All/{dataset}" + +# List of OMAmer data sets with versions +OMAMER_DATASETS = { + "Primates_v2.0.0": "Primates-v2.0.0.h5", + "Viridiplantae_v2.0.0": "Viridiplantae-v2.0.0.h5", + "Metazoa_v2.0.0": "Metazoa-v2.0.0.h5", + "LUCA_v0.2.5": "LUCA-v0.2.5.h5", + "LUCA_v2.0.0": "LUCA-v2.0.0.h5", + "Saccharomyceta": "Saccharomyceta.h5", + "Homininae": "Homininae.h5", +} + +DEFAULT_OUTPUT_DIR = "database_omamer" + + +def download_file(url, dest): + try: + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(dest, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + print(f"Downloaded: {url} to {dest}") + except requests.exceptions.RequestException as e: + print(f"Error downloading {url}: {e}") + sys.exit(1) + + +def main(args): + + with open(args.json) as fh: + params = json.load(fh) + target_directory = params["output_data"][0]["extra_files_path"] + + # Create output directory if none exists + if not os.path.exists(target_directory): + os.makedirs(target_directory) + + # Check if the selected dataset exists + if args.name not in OMAMER_DATASETS: + print(f"Error: Selected dataset '{args.name}' not found.") + sys.exit(1) + + # Download the selected OMAmer dataset + dataset = OMAMER_DATASETS[args.name] + url = OMAMER_DATASETS_URL.format(dataset=dataset) + base_name = os.path.splitext(dataset)[0] + destination_path = os.path.join(target_directory, dataset) + download_file(url, destination_path) + + data_manager_entry = { + "value": dataset, + "name": base_name, + "version": args.version, + "path": dataset, + } + + # Creates a JSON dictionary representing the Data Manager configuration + data_manager_json = {"data_tables": {"omamer": [data_manager_entry]}} + + # Writes this JSON dictionary to the specified output file + with open(args.json, "w") as fh: + json.dump(data_manager_json, fh, indent=2, sort_keys=True) + + +if __name__ == "__main__": + # Set up argparse to specify expected command line arguments + parser = argparse.ArgumentParser(description='Download data for OMAmer') + parser.add_argument('--name', default='Primates', choices=OMAMER_DATASETS.keys(), help='Select dataset to download') + parser.add_argument('--json', help='Path to JSON file') + parser.add_argument("--version", help="Omamer version") + + args = parser.parse_args() + + main(args)
