Mercurial > repos > pjbriggs > ceas
diff data_manager/data_manager_ceas_fetch_annotations.py @ 4:4e2883bb058d draft
Synchronise version with one on main toolshed (version of 30/06/2015)
author | pjbriggs |
---|---|
date | Wed, 10 Aug 2016 11:04:42 -0400 |
parents | d9032bb158b9 |
children | e853be203962 |
line wrap: on
line diff
--- a/data_manager/data_manager_ceas_fetch_annotations.py Wed Apr 22 05:34:06 2015 -0400 +++ b/data_manager/data_manager_ceas_fetch_annotations.py Wed Aug 10 11:04:42 2016 -0400 @@ -3,9 +3,12 @@ import sys import os +import subprocess +import tempfile import optparse import urllib2 import gzip +import shutil from galaxy.util.json import from_json_string, to_json_string @@ -14,8 +17,6 @@ if __name__ == '__main__': #Parse Command Line parser = optparse.OptionParser() - parser.add_option('--download',dest='url',action='store', - type="string",default=None,help='URL to download') options,args = parser.parse_args() print "options: %s" % options print "args : %s" % args @@ -35,23 +36,28 @@ # # We want the values set in the data manager XML dbkey = params['param_dict']['dbkey'] - description = params['param_dict']['description'] + description = args[1].strip() + identifier = params['param_dict']['unique_id'].strip() # Where to put the output file # Nb we have to make this ourselves, it doesn't exist by default target_dir = params['output_data'][0]['extra_files_path'] os.mkdir(target_dir) + method = params['param_dict']['reference_source']['reference_source_selector'] + # Dictionary for returning to data manager data_manager_dict = {} + data_manager_dict['data_tables'] = dict() # Download from URL - if options.url is not None: - print "Downloading: %s" % options.url - annotation_file_name = os.path.basename(options.url) + if method == 'web': + url = params['param_dict']['reference_source']['annotation_url'] + print "Downloading: %s" % url + annotation_file_name = os.path.basename(url) annotation_file_path = os.path.join(target_dir,annotation_file_name) print "Annotation file name: %s" % annotation_file_name print "Annotation file path: %s" % annotation_file_path - open(annotation_file_path,'wb').write(urllib2.urlopen(options.url).read()) + open(annotation_file_path,'wb').write(urllib2.urlopen(url).read()) if annotation_file_name.endswith('.gz'): # Uncompress uncompressed_file = annotation_file_path[:-3] @@ -60,15 +66,87 @@ os.remove(annotation_file_path) annotation_file_name = os.path.basename(uncompressed_file) annotation_file_path = uncompressed_file + # Update the identifier and description + if not identifier: + identifier = "%s_ceas_web" % dbkey + if not description: + description = "%s (%s)" % (os.path.splitext(annotation_file_name)[0],dbkey) # Update the output dictionary - data_manager_dict['data_tables'] = dict() data_manager_dict['data_tables']['ceas_annotations'] = { + 'value': identifier, + 'dbkey': dbkey, + 'name': description, + 'path': annotation_file_name, + } + elif method == 'server': + # Pull in a file from the server + filename = params['param_dict']['reference_source']['annotation_filename'] + create_symlink = params['param_dict']['reference_source']['create_symlink'] + print "Canonical gene list file name: %s" % filename + print "Create symlink: %s" % create_symlink + target_filename = os.path.join(target_dir,os.path.basename(filename)) + if create_symlink == 'copy_file': + shutil.copyfile(filename,target_filename) + else: + os.symlink(filename,target_filename) + # Update the identifier and description + if not identifier: + identifier = "%s_%s" % (dbkey, + os.path.splitext(os.path.basename(filename))[0]) + if not description: + description = "%s: %s" % (dbkey, + os.path.splitext(os.path.basename(filename))[0]) + # Update the output dictionary + data_manager_dict['data_tables']['ceas_annotations'] = { + 'value': identifier, 'dbkey': dbkey, 'name': description, - 'value': annotation_file_name, + 'path': os.path.basename(filename), + } + elif method == 'from_wig': + # Make a reference file from a wig file + wig_file = params['param_dict']['reference_source']['wig_file'] + gene_annotation = params['param_dict']['reference_source']['gene_annotation'] + target_filename = os.path.join(target_dir,"%s_%s.%s" % (dbkey, + os.path.basename(wig_file), + gene_annotation)) + print "Wig file: %s" % wig_file + print "Gene annotation: %s" % gene_annotation + print "Output file: %s" % os.path.basename(target_filename) + # Make a working directory + working_dir = tempfile.mkdtemp() + # Collect stderr in a file for reporting later + stderr_filen = tempfile.NamedTemporaryFile().name + # Build the command to run + cmd = "build_genomeBG -d %s -g %s -w %s -o %s" % (dbkey, + gene_annotation, + wig_file, + target_filename) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, + stderr=open(stderr_filen,'wb')) + proc.wait() + # Copy stderr to stdout + with open(stderr_filen,'r') as fp: + sys.stdout.write(fp.read()) + # Update identifier and description + if not identifier: + identifier = "%s_%s_%s" % (dbkey, + gene_annotation, + os.path.basename(wig_file)) + if not description: + description = "%s %s from %s" % (dbkey, + gene_annotation, + os.path.basename(wig_file)) + # Update the output dictionary + data_manager_dict['data_tables']['ceas_annotations'] = { + 'value': identifier, + 'dbkey': dbkey, + 'name': description, + 'path': os.path.basename(target_filename), } else: - raise NotImplementedError("Non-download options not implemented") + raise NotImplementedError("Method '%s' not implemented" % method) #save info to json file open(jsonfile,'wb').write(to_json_string(data_manager_dict))