annotate data_manager/data_manager_ceas_fetch_annotations.py @ 1:d9032bb158b9 draft

Add in data manager for CEAS annotation databases.
author pjbriggs
date Wed, 28 Jan 2015 05:04:46 -0500
parents
children 4e2883bb058d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
1 #!/usr/bin/env python
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
2 #
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
3
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
4 import sys
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
5 import os
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
6 import optparse
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
7 import urllib2
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
8 import gzip
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
9
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
10 from galaxy.util.json import from_json_string, to_json_string
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
11
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
12 # Download file from specified URL and put into local subdir
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
13
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
14 if __name__ == '__main__':
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
15 #Parse Command Line
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
16 parser = optparse.OptionParser()
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
17 parser.add_option('--download',dest='url',action='store',
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
18 type="string",default=None,help='URL to download')
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
19 options,args = parser.parse_args()
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
20 print "options: %s" % options
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
21 print "args : %s" % args
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
22 if len(args) != 2:
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
23 p.error("Need to supply JSON file name and description text")
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
24
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
25 # Read the JSON supplied from the data manager tool
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
26 # Results from this program will be returned via the
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
27 # same file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
28 jsonfile = args[0]
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
29 params = from_json_string(open(jsonfile).read() )
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
30 print "%s" % params
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
31
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
32 # Extract the data from the input JSON
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
33 # See https://wiki.galaxyproject.org/Admin/Tools/DataManagers/HowTo/Define?highlight=%28\bAdmin%2FTools%2FDataManagers\b%29
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
34 # for example of JSON
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
35 #
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
36 # We want the values set in the data manager XML
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
37 dbkey = params['param_dict']['dbkey']
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
38 description = params['param_dict']['description']
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
39 # Where to put the output file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
40 # Nb we have to make this ourselves, it doesn't exist by default
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
41 target_dir = params['output_data'][0]['extra_files_path']
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
42 os.mkdir(target_dir)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
43
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
44 # Dictionary for returning to data manager
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
45 data_manager_dict = {}
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
46
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
47 # Download from URL
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
48 if options.url is not None:
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
49 print "Downloading: %s" % options.url
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
50 annotation_file_name = os.path.basename(options.url)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
51 annotation_file_path = os.path.join(target_dir,annotation_file_name)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
52 print "Annotation file name: %s" % annotation_file_name
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
53 print "Annotation file path: %s" % annotation_file_path
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
54 open(annotation_file_path,'wb').write(urllib2.urlopen(options.url).read())
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
55 if annotation_file_name.endswith('.gz'):
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
56 # Uncompress
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
57 uncompressed_file = annotation_file_path[:-3]
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
58 open(uncompressed_file,'wb').write(gzip.open(annotation_file_path,'rb').read())
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
59 # Remove gzipped file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
60 os.remove(annotation_file_path)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
61 annotation_file_name = os.path.basename(uncompressed_file)
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
62 annotation_file_path = uncompressed_file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
63 # Update the output dictionary
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
64 data_manager_dict['data_tables'] = dict()
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
65 data_manager_dict['data_tables']['ceas_annotations'] = {
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
66 'dbkey': dbkey,
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
67 'name': description,
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
68 'value': annotation_file_name,
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
69 }
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
70 else:
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
71 raise NotImplementedError("Non-download options not implemented")
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
72
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
73 #save info to json file
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
74 open(jsonfile,'wb').write(to_json_string(data_manager_dict))
d9032bb158b9 Add in data manager for CEAS annotation databases.
pjbriggs
parents:
diff changeset
75