data_manager_mothur_toolsuite: data_manager/fetch_mothur_reference

comparison data_manager/fetch_mothur_reference_data.py @ 3:2004bb845685 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit f845716f6ac93500f143a30abef97eaba406344e"

author	iuc
date	Fri, 25 Jun 2021 09:36:36 +0000
parents	2ffd2cdc5089
children

comparison

equal deleted inserted replaced

-:2ffd2cdc5089
+:2004bb845685
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Data manager for reference data for the 'mothur_toolsuite' Galaxy tools
+import io
 import json
 import optparse
 import os
 import shutil
 import sys
 import tarfile
 import tempfile
-import urllib2
+import urllib.error
+import urllib.parse
+import urllib.request
 import zipfile
 from functools import reduce
 # When extracting files from archives, skip names that
 # start with the following strings
 "lookup_gs20": {
 "GS20": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_gs20.zip", ]
 },
 # RDP reference files
 # http://www.mothur.org/wiki/RDP_reference_files
+"RDP_v18": {
+"16S rRNA RDP training set 18":
+[
+"https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.rdp.tgz", ],
+"16S rRNA PDS training set 18":
+[
+"https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.pds.tgz", ],
+},
 "RDP_v16": {
 "16S rRNA RDP training set 16":
 ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ],
 "16S rRNA PDS training set 16":
 ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.pds.tgz", ],
 "RDP training set 6":
 ["https://mothur.s3.us-east-2.amazonaws.com/wiki/rdptrainingset.zip", ],
 },
 # Silva reference files
 # http://www.mothur.org/wiki/Silva_reference_files
+"silva_release_138.1": {
+"SILVA release 138.1":
+[
+"https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v138_1.tgz",
+"https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v138_1.tgz", ],
+},
 "silva_release_128": {
 "SILVA release 128":
 ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz",
 "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v128.tgz", ],
 },
 NB the directory pointed to by 'extra_files_path'
 doesn't exist initially, it is the job of the script
 to create it if necessary.
 """
-params = json.loads(open(jsonfile).read())
+with open(jsonfile) as fh:
+params = json.load(fh)
 return (params['param_dict'],
 params['output_data'][0]['extra_files_path'])
 # Utility functions for creating data table dictionaries
 # Example usage:
 # >>> d = create_data_tables_dict()
 # >>> add_data_table(d,'my_data')
 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
-# >>> print str(json.dumps(d))
+# >>> print(json.dumps(d))
 def create_data_tables_dict():
 """Return a dictionary for storing data table information
 Returns a dictionary that can be used with 'add_data_table'
 and 'add_data_table_entry' to store information about a
 system.
 Returns the name that the file is saved with.
 """
-print("Downloading %s" % url)
+print(f"Downloading {url}")
 if not target:
 target = os.path.basename(url)
 if wd:
 target = os.path.join(wd, target)
-print("Saving to %s" % target)
+print(f"Saving to {target}")
-open(target, 'wb').write(urllib2.urlopen(url).read())
+with open(target, 'wb') as fh:
+url_h = urllib.request.urlopen(url)
+while True:
+buffer = url_h.read(io.DEFAULT_BUFFER_SIZE)
+if buffer == b"":
+break
+fh.write(buffer)
 return target
 def unpack_zip_archive(filen, wd=None):
 """Extract files from a ZIP archive
 Once all the files are extracted the ZIP archive
 file is deleted from the file system.
 """
 if not zipfile.is_zipfile(filen):
-print("%s: not ZIP formatted file")
+print(f"{filen}: not ZIP formatted file")
 return [filen]
 file_list = []
-z = zipfile.ZipFile(filen)
+with zipfile.ZipFile(filen) as z:
 for name in z.namelist():
 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False):
-print("Ignoring %s" % name)
+print(f"Ignoring {name}")
 continue
 if wd:
 target = os.path.join(wd, name)
 else:
 target = name
 if name.endswith('/'):
 # Make directory
-print("Creating dir %s" % target)
+print(f"Creating dir {target}")
 try:
 os.makedirs(target)
 except OSError:
 pass
 else:
 # Extract file
-print("Extracting %s" % name)
+print("Extracting {target}")
 try:
 os.makedirs(os.path.dirname(target))
 except OSError:
 pass
-open(target, 'wb').write(z.read(name))
+with open(target, 'wb') as fh:
-file_list.append(target)
+fh.write(z.read(name))
-print("Removing %s" % filen)
+file_list.append(target)
+print(f"Removing {filen}")
 os.remove(filen)
 return file_list
 def unpack_tar_archive(filen, wd=None):
 file is deleted from the file system.
 """
 file_list = []
 if not tarfile.is_tarfile(filen):
-print("%s: not TAR file")
+print(f"{filen}: not TAR file")
 return [filen]
-t = tarfile.open(filen)
+with tarfile.open(filen) as t:
 for name in t.getnames():
 # Check for unwanted files
 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False):
-print("Ignoring %s" % name)
+print(f"Ignoring {name}")
 continue
 # Extract file
-print("Extracting %s" % name)
+print(f"Extracting {name}")
 t.extract(name, wd)
 if wd:
 target = os.path.join(wd, name)
 else:
 target = name
 file_list.append(target)
-print("Removing %s" % filen)
+print(f"Removing {filen}")
 os.remove(filen)
 return file_list
 def unpack_archive(filen, wd=None):
 'wd' specifies the working directory to extract
 the files to, otherwise they are extracted to the
 current working directory.
 """
-print("Unpack %s" % filen)
+print(f"Unpack {filen}")
 ext = os.path.splitext(filen)[1]
-print("Extension: %s" % ext)
+print(f"Extension: {ext}")
 if ext == ".zip":
 return unpack_zip_archive(filen, wd=wd)
 elif ext == ".tgz":
 return unpack_tar_archive(filen, wd=wd)
 else:
 """
 ext = os.path.splitext(filen)[1]
 try:
 return MOTHUR_FILE_TYPES[ext]
 except KeyError:
-print("WARNING: unknown file type for " + filen + ", skipping")
+print(f"WARNING: unknown file type for {filen}, skipping")
 return None
 def get_name(filen):
 """Generate a descriptive name based on the file name
 datasets: a list of dataset names corresponding to keys in
 the MOTHUR_REFERENCE_DATA dictionary
 """
 # Make working dir
 wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd())
-print("Working dir %s" % wd)
+print(f"Working dir {wd}")
 # Iterate over all requested reference data URLs
 for dataset in datasets:
-print("Handling dataset '%s'" % dataset)
+print(f"Handling dataset '{dataset}'")
 for name in MOTHUR_REFERENCE_DATA[dataset]:
 for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd):
 type_ = identify_type(f)
-entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name)
+name_from_file = os.path.splitext(os.path.basename(f))[0]
-print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f)))
+entry_name = f"{name_from_file} ({name})"
+print(f"{type_}\t\'{entry_name}'\t.../{os.path.basename(f)}")
 if type_ is not None:
 # Move to target dir
 ref_data_file = os.path.basename(f)
 f1 = os.path.join(target_dir, ref_data_file)
-print("Moving %s to %s" % (f, f1))
+print(f"Moving {f} to {f1}")
-os.rename(f, f1)
+shutil.move(f, f1)
 # Add entry to data table
-table_name = "mothur_%s" % type_
+table_name = f"mothur_{type_}"
 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file))
 # Remove working dir
-print("Removing %s" % wd)
+print(f"Removing {wd}")
 shutil.rmtree(wd)
 def files_from_filesystem_paths(paths):
 """Return list of file paths from arbitrary input paths
 """
 # Collect files to add
 files = []
 for path in paths:
 path = os.path.abspath(path)
-print("Examining '%s'..." % path)
+print(f"Examining '{path}'...")
 if os.path.isfile(path):
 # Store full path for file
 files.append(path)
 elif os.path.isdir(path):
 # Descend into directory and collect the files
 files = files_from_filesystem_paths(paths)
 # Handle each file individually
 for f in files:
 type_ = identify_type(f)
 if type_ is None:
-print("%s: unrecognised type, skipped" % f)
+print(f"{f}: unrecognised type, skipped")
 continue
 ref_data_file = os.path.basename(f)
 target_file = os.path.join(target_dir, ref_data_file)
 entry_name = "%s" % os.path.splitext(ref_data_file)[0]
 if description:
 entry_name += " (%s)" % description
-print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file))
+print(f"{type_}\t\'{entry_name}'\t.../{ref_data_file}")
 # Link to or copy the data
 if link_to_data:
 os.symlink(f, target_file)
 else:
 shutil.copyfile(f, target_file)
 # Add entry to data table
-table_name = "mothur_%s" % type_
+table_name = f"mothur_{type_}"
 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file))
 if __name__ == "__main__":
 print("Starting...")
 parser.add_option('--datasets', action='store', dest='datasets', default='')
 parser.add_option('--paths', action='store', dest='paths', default=[])
 parser.add_option('--description', action='store', dest='description', default='')
 parser.add_option('--link', action='store_true', dest='link_to_data')
 options, args = parser.parse_args()
-print("options: %s" % options)
+print(f"options: {options}")
-print("args   : %s" % args)
+print(f"args   : {args}")
 # Check for JSON file
 if len(args) != 1:
 sys.stderr.write("Need to supply JSON file name")
 sys.exit(1)
 # Read the input JSON
 params, target_dir = read_input_json(jsonfile)
 # Make the target directory
-print("Making %s" % target_dir)
+print(f"Making {target_dir}")
 os.mkdir(target_dir)
 # Set up data tables dictionary
 data_tables = create_data_tables_dict()
 add_data_table(data_tables, 'mothur_lookup')
 # that might have been inserted by Galaxy)
 paths = options.paths.replace('__cn__', '\n').replace('__cr__', '\r').split()
 import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data)
 # Write output JSON
 print("Outputting JSON")
-print(json.dumps(data_tables))
+with open(jsonfile, 'w') as fh:
-open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True))
+json.dump(data_tables, fh, sort_keys=True)
 print("Done.")

Mercurial > repos > iuc > data_manager_mothur_toolsuite

comparison data_manager/fetch_mothur_reference_data.py @ 3:2004bb845685 draft default tip