annotate data_manager/data_manager_ceas_fetch_annotations.py @ 4:4e2883bb058d draft

Synchronise version with one on main toolshed (version of 30/06/2015)
author pjbriggs
date Wed, 10 Aug 2016 11:04:42 -0400
parents d9032bb158b9
children e853be203962
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
1 #!/usr/bin/env python
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
2 #
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
3
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
4 import sys
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
5 import os
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
6 import subprocess
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
7 import tempfile
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
8 import optparse
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
9 import urllib2
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
10 import gzip
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
11 import shutil
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
12
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
13 from galaxy.util.json import from_json_string, to_json_string
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
14
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
15 # Download file from specified URL and put into local subdir
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
16
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
17 if __name__ == '__main__':
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
18 #Parse Command Line
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
19 parser = optparse.OptionParser()
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
20 options,args = parser.parse_args()
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
21 print "options: %s" % options
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
22 print "args : %s" % args
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
23 if len(args) != 2:
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
24 p.error("Need to supply JSON file name and description text")
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
25
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
26 # Read the JSON supplied from the data manager tool
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
27 # Results from this program will be returned via the
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
28 # same file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
29 jsonfile = args[0]
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
30 params = from_json_string(open(jsonfile).read() )
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
31 print "%s" % params
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
32
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
33 # Extract the data from the input JSON
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
34 # See https://wiki.galaxyproject.org/Admin/Tools/DataManagers/HowTo/Define?highlight=%28\bAdmin%2FTools%2FDataManagers\b%29
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
35 # for example of JSON
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
36 #
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
37 # We want the values set in the data manager XML
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
38 dbkey = params['param_dict']['dbkey']
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
39 description = args[1].strip()
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
40 identifier = params['param_dict']['unique_id'].strip()
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
41 # Where to put the output file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
42 # Nb we have to make this ourselves, it doesn't exist by default
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
43 target_dir = params['output_data'][0]['extra_files_path']
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
44 os.mkdir(target_dir)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
45
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
46 method = params['param_dict']['reference_source']['reference_source_selector']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
47
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
48 # Dictionary for returning to data manager
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
49 data_manager_dict = {}
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
50 data_manager_dict['data_tables'] = dict()
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
51
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
52 # Download from URL
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
53 if method == 'web':
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
54 url = params['param_dict']['reference_source']['annotation_url']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
55 print "Downloading: %s" % url
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
56 annotation_file_name = os.path.basename(url)
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
57 annotation_file_path = os.path.join(target_dir,annotation_file_name)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
58 print "Annotation file name: %s" % annotation_file_name
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
59 print "Annotation file path: %s" % annotation_file_path
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
60 open(annotation_file_path,'wb').write(urllib2.urlopen(url).read())
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
61 if annotation_file_name.endswith('.gz'):
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
62 # Uncompress
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
63 uncompressed_file = annotation_file_path[:-3]
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
64 open(uncompressed_file,'wb').write(gzip.open(annotation_file_path,'rb').read())
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
65 # Remove gzipped file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
66 os.remove(annotation_file_path)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
67 annotation_file_name = os.path.basename(uncompressed_file)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
68 annotation_file_path = uncompressed_file
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
69 # Update the identifier and description
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
70 if not identifier:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
71 identifier = "%s_ceas_web" % dbkey
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
72 if not description:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
73 description = "%s (%s)" % (os.path.splitext(annotation_file_name)[0],dbkey)
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
74 # Update the output dictionary
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
75 data_manager_dict['data_tables']['ceas_annotations'] = {
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
76 'value': identifier,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
77 'dbkey': dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
78 'name': description,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
79 'path': annotation_file_name,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
80 }
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
81 elif method == 'server':
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
82 # Pull in a file from the server
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
83 filename = params['param_dict']['reference_source']['annotation_filename']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
84 create_symlink = params['param_dict']['reference_source']['create_symlink']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
85 print "Canonical gene list file name: %s" % filename
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
86 print "Create symlink: %s" % create_symlink
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
87 target_filename = os.path.join(target_dir,os.path.basename(filename))
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
88 if create_symlink == 'copy_file':
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
89 shutil.copyfile(filename,target_filename)
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
90 else:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
91 os.symlink(filename,target_filename)
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
92 # Update the identifier and description
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
93 if not identifier:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
94 identifier = "%s_%s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
95 os.path.splitext(os.path.basename(filename))[0])
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
96 if not description:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
97 description = "%s: %s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
98 os.path.splitext(os.path.basename(filename))[0])
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
99 # Update the output dictionary
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
100 data_manager_dict['data_tables']['ceas_annotations'] = {
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
101 'value': identifier,
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
102 'dbkey': dbkey,
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
103 'name': description,
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
104 'path': os.path.basename(filename),
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
105 }
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
106 elif method == 'from_wig':
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
107 # Make a reference file from a wig file
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
108 wig_file = params['param_dict']['reference_source']['wig_file']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
109 gene_annotation = params['param_dict']['reference_source']['gene_annotation']
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
110 target_filename = os.path.join(target_dir,"%s_%s.%s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
111 os.path.basename(wig_file),
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
112 gene_annotation))
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
113 print "Wig file: %s" % wig_file
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
114 print "Gene annotation: %s" % gene_annotation
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
115 print "Output file: %s" % os.path.basename(target_filename)
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
116 # Make a working directory
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
117 working_dir = tempfile.mkdtemp()
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
118 # Collect stderr in a file for reporting later
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
119 stderr_filen = tempfile.NamedTemporaryFile().name
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
120 # Build the command to run
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
121 cmd = "build_genomeBG -d %s -g %s -w %s -o %s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
122 gene_annotation,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
123 wig_file,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
124 target_filename)
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
125 print "Running %s" % cmd
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
126 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
127 stderr=open(stderr_filen,'wb'))
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
128 proc.wait()
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
129 # Copy stderr to stdout
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
130 with open(stderr_filen,'r') as fp:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
131 sys.stdout.write(fp.read())
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
132 # Update identifier and description
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
133 if not identifier:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
134 identifier = "%s_%s_%s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
135 gene_annotation,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
136 os.path.basename(wig_file))
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
137 if not description:
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
138 description = "%s %s from %s" % (dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
139 gene_annotation,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
140 os.path.basename(wig_file))
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
141 # Update the output dictionary
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
142 data_manager_dict['data_tables']['ceas_annotations'] = {
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
143 'value': identifier,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
144 'dbkey': dbkey,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
145 'name': description,
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
146 'path': os.path.basename(target_filename),
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
147 }
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
148 else:
4
4e2883bb058d Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents: 1
diff changeset
149 raise NotImplementedError("Method '%s' not implemented" % method)
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
150
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
151 #save info to json file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
152 open(jsonfile,'wb').write(to_json_string(data_manager_dict))
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
153