diff data_manager/data_manager_ceas_fetch_annotations.py @ 4:4e2883bb058d draft

Synchronise version with one on main toolshed (version of 30/06/2015)
author pjbriggs
date Wed, 10 Aug 2016 11:04:42 -0400
parents d9032bb158b9
children e853be203962
line wrap: on
line diff
--- a/data_manager/data_manager_ceas_fetch_annotations.py	Wed Apr 22 05:34:06 2015 -0400
+++ b/data_manager/data_manager_ceas_fetch_annotations.py	Wed Aug 10 11:04:42 2016 -0400
@@ -3,9 +3,12 @@
 
 import sys
 import os
+import subprocess
+import tempfile
 import optparse
 import urllib2
 import gzip
+import shutil
 
 from galaxy.util.json import from_json_string, to_json_string
 
@@ -14,8 +17,6 @@
 if __name__ == '__main__':
     #Parse Command Line
     parser = optparse.OptionParser()
-    parser.add_option('--download',dest='url',action='store',
-                      type="string",default=None,help='URL to download')
     options,args = parser.parse_args()
     print "options: %s" % options
     print "args   : %s" % args
@@ -35,23 +36,28 @@
     #
     # We want the values set in the data manager XML
     dbkey = params['param_dict']['dbkey']
-    description = params['param_dict']['description']
+    description = args[1].strip()
+    identifier = params['param_dict']['unique_id'].strip()
     # Where to put the output file
     # Nb we have to make this ourselves, it doesn't exist by default
     target_dir = params['output_data'][0]['extra_files_path']
     os.mkdir(target_dir)
 
+    method = params['param_dict']['reference_source']['reference_source_selector']
+
     # Dictionary for returning to data manager
     data_manager_dict = {}
+    data_manager_dict['data_tables'] = dict()
 
     # Download from URL
-    if options.url is not None:
-        print "Downloading: %s" % options.url
-        annotation_file_name = os.path.basename(options.url)
+    if method == 'web':
+        url = params['param_dict']['reference_source']['annotation_url']
+        print "Downloading: %s" % url
+        annotation_file_name = os.path.basename(url)
         annotation_file_path = os.path.join(target_dir,annotation_file_name)
         print "Annotation file name: %s" % annotation_file_name
         print "Annotation file path: %s" % annotation_file_path
-        open(annotation_file_path,'wb').write(urllib2.urlopen(options.url).read())
+        open(annotation_file_path,'wb').write(urllib2.urlopen(url).read())
         if annotation_file_name.endswith('.gz'):
             # Uncompress
             uncompressed_file = annotation_file_path[:-3]
@@ -60,15 +66,87 @@
             os.remove(annotation_file_path)
             annotation_file_name = os.path.basename(uncompressed_file)
             annotation_file_path = uncompressed_file
+        # Update the identifier and description
+        if not identifier:
+            identifier = "%s_ceas_web" % dbkey
+        if not description:
+            description = "%s (%s)" % (os.path.splitext(annotation_file_name)[0],dbkey)
         # Update the output dictionary
-        data_manager_dict['data_tables'] = dict()
         data_manager_dict['data_tables']['ceas_annotations'] = {
+            'value': identifier,
+            'dbkey': dbkey,
+            'name': description,
+            'path': annotation_file_name,
+        }
+    elif method == 'server':
+        # Pull in a file from the server
+        filename = params['param_dict']['reference_source']['annotation_filename']
+        create_symlink = params['param_dict']['reference_source']['create_symlink']
+        print "Canonical gene list file name: %s" % filename
+        print "Create symlink: %s" % create_symlink
+        target_filename = os.path.join(target_dir,os.path.basename(filename))
+        if create_symlink == 'copy_file':
+            shutil.copyfile(filename,target_filename)
+        else:
+            os.symlink(filename,target_filename)
+        # Update the identifier and description
+        if not identifier:
+            identifier = "%s_%s" % (dbkey,
+                                    os.path.splitext(os.path.basename(filename))[0])
+        if not description:
+            description = "%s: %s" % (dbkey,
+                                      os.path.splitext(os.path.basename(filename))[0])
+        # Update the output dictionary
+        data_manager_dict['data_tables']['ceas_annotations'] = {
+            'value': identifier,
             'dbkey': dbkey,
             'name': description,
-            'value': annotation_file_name,
+            'path': os.path.basename(filename),
+        }
+    elif method == 'from_wig':
+        # Make a reference file from a wig file
+        wig_file = params['param_dict']['reference_source']['wig_file']
+        gene_annotation = params['param_dict']['reference_source']['gene_annotation']
+        target_filename = os.path.join(target_dir,"%s_%s.%s" % (dbkey,
+                                                                os.path.basename(wig_file),
+                                                                gene_annotation))
+        print "Wig file: %s" % wig_file
+        print "Gene annotation: %s" % gene_annotation
+        print "Output file: %s" % os.path.basename(target_filename)
+        # Make a working directory
+        working_dir = tempfile.mkdtemp()
+        # Collect stderr in a file for reporting later
+        stderr_filen = tempfile.NamedTemporaryFile().name
+        # Build the command to run
+        cmd = "build_genomeBG -d %s -g %s -w %s -o %s" % (dbkey,
+                                                          gene_annotation,
+                                                          wig_file,
+                                                          target_filename)
+        print "Running %s" %  cmd
+        proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
+                                stderr=open(stderr_filen,'wb'))
+        proc.wait()
+        # Copy stderr to stdout
+        with open(stderr_filen,'r') as fp:
+            sys.stdout.write(fp.read())
+        # Update identifier and description
+        if not identifier:
+            identifier = "%s_%s_%s" % (dbkey,
+                                       gene_annotation,
+                                       os.path.basename(wig_file))
+        if not description:
+            description = "%s %s from %s" % (dbkey,
+                                             gene_annotation,
+                                             os.path.basename(wig_file))
+        # Update the output dictionary
+        data_manager_dict['data_tables']['ceas_annotations'] = {
+            'value': identifier,
+            'dbkey': dbkey,
+            'name': description,
+            'path': os.path.basename(target_filename),
         }
     else:
-        raise NotImplementedError("Non-download options not implemented")
+        raise NotImplementedError("Method '%s' not implemented" % method)
 
     #save info to json file
     open(jsonfile,'wb').write(to_json_string(data_manager_dict))