Mercurial > repos > iuc > data_manager_omamer
comparison data_manager/omamer.py @ 0:d28438704310 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_omamer commit b890cd87ef5cfaa81eda29cc935de224ecb05bb6
| author | iuc |
|---|---|
| date | Wed, 14 Aug 2024 15:42:38 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d28438704310 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import json | |
| 5 import os | |
| 6 import sys | |
| 7 | |
| 8 import requests | |
| 9 | |
| 10 # URL for downloading OMAmer datasets | |
| 11 OMAMER_DATASETS_URL = "https://omabrowser.org/All/{dataset}" | |
| 12 | |
| 13 # List of OMAmer data sets with versions | |
| 14 OMAMER_DATASETS = { | |
| 15 "Primates_v2.0.0": "Primates-v2.0.0.h5", | |
| 16 "Viridiplantae_v2.0.0": "Viridiplantae-v2.0.0.h5", | |
| 17 "Metazoa_v2.0.0": "Metazoa-v2.0.0.h5", | |
| 18 "LUCA_v0.2.5": "LUCA-v0.2.5.h5", | |
| 19 "LUCA_v2.0.0": "LUCA-v2.0.0.h5", | |
| 20 "Saccharomyceta": "Saccharomyceta.h5", | |
| 21 "Homininae": "Homininae.h5", | |
| 22 } | |
| 23 | |
| 24 DEFAULT_OUTPUT_DIR = "database_omamer" | |
| 25 | |
| 26 | |
| 27 def download_file(url, dest): | |
| 28 try: | |
| 29 with requests.get(url, stream=True) as r: | |
| 30 r.raise_for_status() | |
| 31 with open(dest, 'wb') as f: | |
| 32 for chunk in r.iter_content(chunk_size=8192): | |
| 33 f.write(chunk) | |
| 34 print(f"Downloaded: {url} to {dest}") | |
| 35 except requests.exceptions.RequestException as e: | |
| 36 print(f"Error downloading {url}: {e}") | |
| 37 sys.exit(1) | |
| 38 | |
| 39 | |
| 40 def main(args): | |
| 41 | |
| 42 with open(args.json) as fh: | |
| 43 params = json.load(fh) | |
| 44 target_directory = params["output_data"][0]["extra_files_path"] | |
| 45 | |
| 46 # Create output directory if none exists | |
| 47 if not os.path.exists(target_directory): | |
| 48 os.makedirs(target_directory) | |
| 49 | |
| 50 # Check if the selected dataset exists | |
| 51 if args.name not in OMAMER_DATASETS: | |
| 52 print(f"Error: Selected dataset '{args.name}' not found.") | |
| 53 sys.exit(1) | |
| 54 | |
| 55 # Download the selected OMAmer dataset | |
| 56 dataset = OMAMER_DATASETS[args.name] | |
| 57 url = OMAMER_DATASETS_URL.format(dataset=dataset) | |
| 58 base_name = os.path.splitext(dataset)[0] | |
| 59 destination_path = os.path.join(target_directory, dataset) | |
| 60 download_file(url, destination_path) | |
| 61 | |
| 62 data_manager_entry = { | |
| 63 "value": dataset, | |
| 64 "name": base_name, | |
| 65 "version": args.version, | |
| 66 "path": dataset, | |
| 67 } | |
| 68 | |
| 69 # Creates a JSON dictionary representing the Data Manager configuration | |
| 70 data_manager_json = {"data_tables": {"omamer": [data_manager_entry]}} | |
| 71 | |
| 72 # Writes this JSON dictionary to the specified output file | |
| 73 with open(args.json, "w") as fh: | |
| 74 json.dump(data_manager_json, fh, indent=2, sort_keys=True) | |
| 75 | |
| 76 | |
| 77 if __name__ == "__main__": | |
| 78 # Set up argparse to specify expected command line arguments | |
| 79 parser = argparse.ArgumentParser(description='Download data for OMAmer') | |
| 80 parser.add_argument('--name', default='Primates', choices=OMAMER_DATASETS.keys(), help='Select dataset to download') | |
| 81 parser.add_argument('--json', help='Path to JSON file') | |
| 82 parser.add_argument("--version", help="Omamer version") | |
| 83 | |
| 84 args = parser.parse_args() | |
| 85 | |
| 86 main(args) |
