Mercurial > repos > pjbriggs > ceas
annotate data_manager/data_manager_ceas_fetch_annotations.py @ 5:e853be203962 draft
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
author | pjbriggs |
---|---|
date | Wed, 10 Aug 2016 11:37:28 -0400 |
parents | 4e2883bb058d |
children |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # | |
3 | |
4 import sys | |
5 import os | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
6 import subprocess |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
7 import tempfile |
1 | 8 import optparse |
9 import urllib2 | |
10 import gzip | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
11 import shutil |
1 | 12 |
5
e853be203962
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
pjbriggs
parents:
4
diff
changeset
|
13 # Convenience functions mapping to JSON conversion |
e853be203962
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
pjbriggs
parents:
4
diff
changeset
|
14 # (this idiom borrowed from lib/galaxy/utils/json.py) |
e853be203962
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
pjbriggs
parents:
4
diff
changeset
|
15 import json |
e853be203962
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
pjbriggs
parents:
4
diff
changeset
|
16 to_json_string = json.dumps |
e853be203962
Fix error for Galaxy v16.04 (data manager crashes complaining about missing 'six' Python package)
pjbriggs
parents:
4
diff
changeset
|
17 from_json_string = json.loads |
1 | 18 |
19 # Download file from specified URL and put into local subdir | |
20 | |
21 if __name__ == '__main__': | |
22 #Parse Command Line | |
23 parser = optparse.OptionParser() | |
24 options,args = parser.parse_args() | |
25 print "options: %s" % options | |
26 print "args : %s" % args | |
27 if len(args) != 2: | |
28 p.error("Need to supply JSON file name and description text") | |
29 | |
30 # Read the JSON supplied from the data manager tool | |
31 # Results from this program will be returned via the | |
32 # same file | |
33 jsonfile = args[0] | |
34 params = from_json_string(open(jsonfile).read() ) | |
35 print "%s" % params | |
36 | |
37 # Extract the data from the input JSON | |
38 # See https://wiki.galaxyproject.org/Admin/Tools/DataManagers/HowTo/Define?highlight=%28\bAdmin%2FTools%2FDataManagers\b%29 | |
39 # for example of JSON | |
40 # | |
41 # We want the values set in the data manager XML | |
42 dbkey = params['param_dict']['dbkey'] | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
43 description = args[1].strip() |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
44 identifier = params['param_dict']['unique_id'].strip() |
1 | 45 # Where to put the output file |
46 # Nb we have to make this ourselves, it doesn't exist by default | |
47 target_dir = params['output_data'][0]['extra_files_path'] | |
48 os.mkdir(target_dir) | |
49 | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
50 method = params['param_dict']['reference_source']['reference_source_selector'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
51 |
1 | 52 # Dictionary for returning to data manager |
53 data_manager_dict = {} | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
54 data_manager_dict['data_tables'] = dict() |
1 | 55 |
56 # Download from URL | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
57 if method == 'web': |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
58 url = params['param_dict']['reference_source']['annotation_url'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
59 print "Downloading: %s" % url |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
60 annotation_file_name = os.path.basename(url) |
1 | 61 annotation_file_path = os.path.join(target_dir,annotation_file_name) |
62 print "Annotation file name: %s" % annotation_file_name | |
63 print "Annotation file path: %s" % annotation_file_path | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
64 open(annotation_file_path,'wb').write(urllib2.urlopen(url).read()) |
1 | 65 if annotation_file_name.endswith('.gz'): |
66 # Uncompress | |
67 uncompressed_file = annotation_file_path[:-3] | |
68 open(uncompressed_file,'wb').write(gzip.open(annotation_file_path,'rb').read()) | |
69 # Remove gzipped file | |
70 os.remove(annotation_file_path) | |
71 annotation_file_name = os.path.basename(uncompressed_file) | |
72 annotation_file_path = uncompressed_file | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
73 # Update the identifier and description |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
74 if not identifier: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
75 identifier = "%s_ceas_web" % dbkey |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
76 if not description: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
77 description = "%s (%s)" % (os.path.splitext(annotation_file_name)[0],dbkey) |
1 | 78 # Update the output dictionary |
79 data_manager_dict['data_tables']['ceas_annotations'] = { | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
80 'value': identifier, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
81 'dbkey': dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
82 'name': description, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
83 'path': annotation_file_name, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
84 } |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
85 elif method == 'server': |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
86 # Pull in a file from the server |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
87 filename = params['param_dict']['reference_source']['annotation_filename'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
88 create_symlink = params['param_dict']['reference_source']['create_symlink'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
89 print "Canonical gene list file name: %s" % filename |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
90 print "Create symlink: %s" % create_symlink |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
91 target_filename = os.path.join(target_dir,os.path.basename(filename)) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
92 if create_symlink == 'copy_file': |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
93 shutil.copyfile(filename,target_filename) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
94 else: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
95 os.symlink(filename,target_filename) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
96 # Update the identifier and description |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
97 if not identifier: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
98 identifier = "%s_%s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
99 os.path.splitext(os.path.basename(filename))[0]) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
100 if not description: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
101 description = "%s: %s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
102 os.path.splitext(os.path.basename(filename))[0]) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
103 # Update the output dictionary |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
104 data_manager_dict['data_tables']['ceas_annotations'] = { |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
105 'value': identifier, |
1 | 106 'dbkey': dbkey, |
107 'name': description, | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
108 'path': os.path.basename(filename), |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
109 } |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
110 elif method == 'from_wig': |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
111 # Make a reference file from a wig file |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
112 wig_file = params['param_dict']['reference_source']['wig_file'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
113 gene_annotation = params['param_dict']['reference_source']['gene_annotation'] |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
114 target_filename = os.path.join(target_dir,"%s_%s.%s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
115 os.path.basename(wig_file), |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
116 gene_annotation)) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
117 print "Wig file: %s" % wig_file |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
118 print "Gene annotation: %s" % gene_annotation |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
119 print "Output file: %s" % os.path.basename(target_filename) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
120 # Make a working directory |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
121 working_dir = tempfile.mkdtemp() |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
122 # Collect stderr in a file for reporting later |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
123 stderr_filen = tempfile.NamedTemporaryFile().name |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
124 # Build the command to run |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
125 cmd = "build_genomeBG -d %s -g %s -w %s -o %s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
126 gene_annotation, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
127 wig_file, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
128 target_filename) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
129 print "Running %s" % cmd |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
130 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
131 stderr=open(stderr_filen,'wb')) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
132 proc.wait() |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
133 # Copy stderr to stdout |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
134 with open(stderr_filen,'r') as fp: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
135 sys.stdout.write(fp.read()) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
136 # Update identifier and description |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
137 if not identifier: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
138 identifier = "%s_%s_%s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
139 gene_annotation, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
140 os.path.basename(wig_file)) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
141 if not description: |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
142 description = "%s %s from %s" % (dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
143 gene_annotation, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
144 os.path.basename(wig_file)) |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
145 # Update the output dictionary |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
146 data_manager_dict['data_tables']['ceas_annotations'] = { |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
147 'value': identifier, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
148 'dbkey': dbkey, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
149 'name': description, |
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
150 'path': os.path.basename(target_filename), |
1 | 151 } |
152 else: | |
4
4e2883bb058d
Synchronise version with one on main toolshed (version of 30/06/2015)
pjbriggs
parents:
1
diff
changeset
|
153 raise NotImplementedError("Method '%s' not implemented" % method) |
1 | 154 |
155 #save info to json file | |
156 open(jsonfile,'wb').write(to_json_string(data_manager_dict)) | |
157 |