Mercurial > repos > matthias > data_manager_dada2
comparison data_manager/data_manager.py @ 0:419037fe1150 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
| author | matthias |
|---|---|
| date | Thu, 07 Mar 2019 09:33:43 -0500 |
| parents | |
| children | b4c303665291 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:419037fe1150 |
|---|---|
| 1 import argparse | |
| 2 import json | |
| 3 import os | |
| 4 import shutil | |
| 5 import sys | |
| 6 import zipfile | |
| 7 try: | |
| 8 # For Python 3.0 and later | |
| 9 from urllib.request import Request, urlopen | |
| 10 except ImportError: | |
| 11 # Fall back to Python 2 imports | |
| 12 from urllib2 import Request, urlopen | |
| 13 | |
| 14 DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species" | |
| 15 | |
| 16 FILE2NAME = { | |
| 17 "silva132":"Silva version 132", | |
| 18 "silva128":"Silva version 128", | |
| 19 "rdp16":"RDP trainset 16", | |
| 20 "rdp14":"RDP trainset 14", | |
| 21 "gg13.84":"GreenGenes version 13.8", | |
| 22 } | |
| 23 | |
| 24 FILE2TAXURL = { | |
| 25 "silva132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", | |
| 26 "silva128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", | |
| 27 "rdp16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", | |
| 28 "rdp14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", | |
| 29 "gg13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", | |
| 30 } | |
| 31 | |
| 32 FILE2SPECIESURL = { | |
| 33 "silva132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", | |
| 34 "silva128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", | |
| 35 "rdp16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", | |
| 36 "rdp14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1" | |
| 37 } | |
| 38 | |
| 39 FILE2TAXLEVELS = { | |
| 40 } | |
| 41 | |
| 42 def url_download(url, fname, workdir): | |
| 43 """ | |
| 44 download url to workdir/fname | |
| 45 | |
| 46 return the path to the resulting file | |
| 47 """ | |
| 48 file_path = os.path.join(workdir, fname) | |
| 49 if not os.path.exists(workdir): | |
| 50 os.makedirs(workdir) | |
| 51 src = None | |
| 52 dst = None | |
| 53 try: | |
| 54 req = Request(url) | |
| 55 src = urlopen(req) | |
| 56 with open(file_path, 'wb') as dst: | |
| 57 while True: | |
| 58 chunk = src.read(2**10) | |
| 59 if chunk: | |
| 60 dst.write(chunk) | |
| 61 else: | |
| 62 break | |
| 63 finally: | |
| 64 if src: | |
| 65 src.close() | |
| 66 return os.path.join(workdir, fname) | |
| 67 | |
| 68 def main(dataset, outjson): | |
| 69 | |
| 70 params = json.loads(open(outjson).read()) | |
| 71 target_directory = params['output_data'][0]['extra_files_path'] | |
| 72 os.mkdir(target_directory) | |
| 73 output_path = os.path.abspath(os.path.join(os.getcwd(), 'dada2')) | |
| 74 | |
| 75 workdir = os.path.join(os.getcwd(), 'dada2') | |
| 76 path = url_download( FILE2TAXURL[dataset], taxdataset+".taxonomy", workdir) | |
| 77 | |
| 78 data_manager_json = {"data_tables":{}} | |
| 79 data_manager_entry = {} | |
| 80 data_manager_entry['value'] = dataset | |
| 81 data_manager_entry['name'] = FILE2NAME[dataset] | |
| 82 data_manager_entry['path'] = path | |
| 83 data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS) | |
| 84 data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry | |
| 85 | |
| 86 | |
| 87 if FILE2SPECIES.get(dataset, False ): | |
| 88 path = url_download( FILE2SPECIES[dataset], taxdataset+".species", workdir) | |
| 89 | |
| 90 data_manager_entry = {} | |
| 91 data_manager_entry['value'] = dataset | |
| 92 data_manager_entry['name'] = FILE2NAME[dataset] | |
| 93 data_manager_entry['path'] = path | |
| 94 data_manager_json["data_tables"]["dada2_species"] = data_manager_entry | |
| 95 | |
| 96 for filename in os.listdir(workdir): | |
| 97 shutil.move(os.path.join(output_path, filename), target_directory) | |
| 98 file(outjson, 'w').write(json.dumps(data_manager_json)) | |
| 99 | |
| 100 if __name__ == '__main__': | |
| 101 parser = argparse.ArgumentParser(description='Create data manager json.') | |
| 102 parser.add_argument('--out', action='store', help='JSON filename') | |
| 103 parser.add_argument('--dataset', action='store', help='Download data set name') | |
| 104 args = parser.parse_args() | |
| 105 | |
| 106 main(args.dataset, args.out) |
