Mercurial > repos > devteam > data_manager_rsync_g2
comparison data_manager/data_manager_rsync.py @ 1:861c071a6cd5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_rsync_g2 commit 8652f36a3a3838dca989426961561e81432acf4f
| author | iuc |
|---|---|
| date | Tue, 04 Apr 2017 17:53:15 -0400 |
| parents | 70afa70bba41 |
| children | cf2f615e2515 |
comparison
equal
deleted
inserted
replaced
| 0:70afa70bba41 | 1:861c071a6cd5 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 #Dan Blankenberg | 2 # Dan Blankenberg |
| 3 | 3 from __future__ import print_function |
| 4 import sys | 4 |
| 5 import datetime | |
| 6 import logging | |
| 7 import optparse | |
| 5 import os | 8 import os |
| 9 import shutil | |
| 10 import subprocess | |
| 6 import tempfile | 11 import tempfile |
| 7 import shutil | 12 from json import ( |
| 8 import optparse | 13 dumps, |
| 9 import urllib2 | 14 loads |
| 10 import subprocess | 15 ) |
| 11 import datetime | |
| 12 from os.path import basename | 16 from os.path import basename |
| 13 from json import loads, dumps | |
| 14 from xml.etree.ElementTree import tostring | 17 from xml.etree.ElementTree import tostring |
| 15 | 18 try: |
| 16 import logging | 19 # For Python 3.0 and later |
| 20 from urllib.request import urlopen | |
| 21 except ImportError: | |
| 22 # Fall back to Python 2 imports | |
| 23 from urllib2 import urlopen | |
| 24 | |
| 17 _log_name = __name__ | 25 _log_name = __name__ |
| 18 if _log_name == '__builtin__': | 26 if _log_name == '__builtin__': |
| 19 _log_name = 'toolshed.installed.g2.rsync.data.manager' | 27 _log_name = 'toolshed.installed.g2.rsync.data.manager' |
| 20 log = logging.getLogger( _log_name ) | 28 log = logging.getLogger( _log_name ) |
| 21 | 29 |
| 25 LOCATION_DIR = "location" | 33 LOCATION_DIR = "location" |
| 26 INDEX_DIR = "indexes" | 34 INDEX_DIR = "indexes" |
| 27 | 35 |
| 28 # Pull the Tool Data Table files from github | 36 # Pull the Tool Data Table files from github |
| 29 # FIXME: These files should be accessible from the rsync server directly. | 37 # FIXME: These files should be accessible from the rsync server directly. |
| 30 TOOL_DATA_TABLE_CONF_XML_URLS = { 'main':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml", | 38 TOOL_DATA_TABLE_CONF_XML_URLS = {'main': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml", |
| 31 'test':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" } | 39 'test': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" } |
| 32 | 40 |
| 33 # Replace data table source entries with local temporary location | 41 # Replace data table source entries with local temporary location |
| 34 GALAXY_DATA_CANONICAL_PATH = "/galaxy/data/" | 42 GALAXY_DATA_CANONICAL_PATH = "/galaxy/data/" |
| 35 TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE = '<file path="%slocation/' % ( GALAXY_DATA_CANONICAL_PATH ) | 43 TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE = '<file path="%slocation/' % ( GALAXY_DATA_CANONICAL_PATH ) |
| 36 TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET = '<file path="%s/' | 44 TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET = '<file path="%s/' |
| 48 # TODO: Make additional handler actions available for tables that can't fit into the the basic | 56 # TODO: Make additional handler actions available for tables that can't fit into the the basic |
| 49 # "take the value of path" as a dir and copy contents. | 57 # "take the value of path" as a dir and copy contents. |
| 50 # e.g. mafs. Although this maf table is goofy and doesn't have path defined in <table> def, | 58 # e.g. mafs. Although this maf table is goofy and doesn't have path defined in <table> def, |
| 51 # but it does exit in the .loc. | 59 # but it does exit in the .loc. |
| 52 | 60 |
| 61 | |
| 53 # --- These methods are called by/within the Galaxy Application | 62 # --- These methods are called by/within the Galaxy Application |
| 54 | |
| 55 def exec_before_job( app, inp_data, out_data, param_dict, tool=None, **kwd ): | 63 def exec_before_job( app, inp_data, out_data, param_dict, tool=None, **kwd ): |
| 56 # Look for any data tables that haven't been defined for this data manager before and dynamically add them to Galaxy | 64 # Look for any data tables that haven't been defined for this data manager before and dynamically add them to Galaxy |
| 57 param_dict = dict( **param_dict ) | 65 param_dict = dict( **param_dict ) |
| 58 param_dict['data_table_entries'] = param_dict.get( 'data_table_entries', [] ) | 66 param_dict['data_table_entries'] = param_dict.get( 'data_table_entries', [] ) |
| 59 if not isinstance( param_dict['data_table_entries'], list ): | 67 if not isinstance( param_dict['data_table_entries'], list ): |
| 65 tool_shed_repository = None | 73 tool_shed_repository = None |
| 66 tdtm = None | 74 tdtm = None |
| 67 data_manager = app.data_managers.get_manager( tool.data_manager_id, None ) | 75 data_manager = app.data_managers.get_manager( tool.data_manager_id, None ) |
| 68 data_table_entries = get_data_table_entries( param_dict ) | 76 data_table_entries = get_data_table_entries( param_dict ) |
| 69 data_tables = load_data_tables_from_url( data_table_class=app.tool_data_tables.__class__ ).get( 'data_tables' ) | 77 data_tables = load_data_tables_from_url( data_table_class=app.tool_data_tables.__class__ ).get( 'data_tables' ) |
| 70 for data_table_name, entries in data_table_entries.iteritems(): | 78 for data_table_name, entries in data_table_entries.items(): |
| 71 #get data table managed by this data Manager | 79 # get data table managed by this data Manager |
| 72 has_data_table = app.tool_data_tables.get_tables().get( data_table_name ) | 80 has_data_table = app.tool_data_tables.get_tables().get( data_table_name ) |
| 73 if has_data_table: | 81 if has_data_table: |
| 74 has_data_table = bool( has_data_table.get_filename_for_source( data_manager, None ) ) | 82 has_data_table = bool( has_data_table.get_filename_for_source( data_manager, None ) ) |
| 75 if not has_data_table: | 83 if not has_data_table: |
| 76 if tdtm is None: | 84 if tdtm is None: |
| 77 from tool_shed.tools import data_table_manager | 85 from tool_shed.tools import data_table_manager |
| 78 tdtm = data_table_manager.ToolDataTableManager( app ) | 86 tdtm = data_table_manager.ToolDataTableManager( app ) |
| 79 target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir( tool_shed_repository ) | 87 target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir( tool_shed_repository ) |
| 80 #Dynamically add this data table | 88 # Dynamically add this data table |
| 81 log.debug( "Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name ) | 89 log.debug( "Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name ) |
| 82 data_table = data_tables[data_table_name] | 90 data_table = data_tables[data_table_name] |
| 83 repo_info = tdtm.generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None ) | 91 repo_info = tdtm.generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None ) |
| 84 if repo_info is not None: | 92 if repo_info is not None: |
| 85 repo_info = tostring( repo_info ) | 93 repo_info = tostring( repo_info ) |
| 87 tmp_file.write( get_new_xml_definition( app, data_table, data_manager, repo_info, target_dir ) ) | 95 tmp_file.write( get_new_xml_definition( app, data_table, data_manager, repo_info, target_dir ) ) |
| 88 tmp_file.flush() | 96 tmp_file.flush() |
| 89 app.tool_data_tables.add_new_entries_from_config_file( tmp_file.name, None, app.config.shed_tool_data_table_config, persist=True ) | 97 app.tool_data_tables.add_new_entries_from_config_file( tmp_file.name, None, app.config.shed_tool_data_table_config, persist=True ) |
| 90 tmp_file.close() | 98 tmp_file.close() |
| 91 | 99 |
| 100 | |
| 92 def galaxy_code_get_available_data_tables( trans ): | 101 def galaxy_code_get_available_data_tables( trans ): |
| 93 #list of data tables | 102 # list of data tables |
| 94 found_tables = get_available_tables( trans ) | 103 found_tables = get_available_tables( trans ) |
| 95 rval = map( lambda x: ( ( x, x, DEFAULT_SELECTED ) ), found_tables ) | 104 rval = [ ( x, x, DEFAULT_SELECTED ) for x in found_tables] |
| 96 return rval | 105 return rval |
| 106 | |
| 97 | 107 |
| 98 def galaxy_code_get_available_data_tables_entries( trans, dbkey, data_table_names ): | 108 def galaxy_code_get_available_data_tables_entries( trans, dbkey, data_table_names ): |
| 99 #available entries, optionally filtered by dbkey and table names | 109 # available entries, optionally filtered by dbkey and table names |
| 100 if dbkey in [ None, '', '?' ]: | 110 if dbkey in [ None, '', '?' ]: |
| 101 dbkey = None | 111 dbkey = None |
| 102 if data_table_names in [ None, '', '?' ]: | 112 if data_table_names in [ None, '', '?' ]: |
| 103 data_table_names = None | 113 data_table_names = None |
| 104 found_tables = get_available_tables_for_dbkey( trans, dbkey, data_table_names ) | 114 found_tables = get_available_tables_for_dbkey( trans, dbkey, data_table_names ) |
| 105 dbkey_text = '(%s) ' % ( dbkey ) if dbkey else '' | 115 dbkey_text = '(%s) ' % ( dbkey ) if dbkey else '' |
| 106 rval = map( lambda x: ( "%s%s" % ( dbkey_text, x[0] ), dumps( dict( name=x[0].split( ': ' )[0], entry=x[1] ) ).encode( 'base64' ).rstrip(), DEFAULT_SELECTED ), found_tables.items() ) | 116 rval = [( "%s%s" % ( dbkey_text, x[0] ), dumps( dict( name=x[0].split( ': ' )[0], entry=x[1] ) ).encode( 'base64' ).rstrip(), DEFAULT_SELECTED ) for x in found_tables.items()] |
| 107 return rval | 117 return rval |
| 108 | 118 |
| 109 # --- End Galaxy called Methods --- | 119 # --- End Galaxy called Methods --- |
| 110 | 120 |
| 111 | 121 |
| 114 # probably because it doesn't recognize the rsync scheme | 124 # probably because it doesn't recognize the rsync scheme |
| 115 base = base.rstrip( '/' ) | 125 base = base.rstrip( '/' ) |
| 116 url = url.lstrip( '/' ) | 126 url = url.lstrip( '/' ) |
| 117 return "%s/%s" % ( base, url ) | 127 return "%s/%s" % ( base, url ) |
| 118 | 128 |
| 129 | |
| 119 def rsync_list_dir( server, dir=None, skip_names=[] ): | 130 def rsync_list_dir( server, dir=None, skip_names=[] ): |
| 120 #drwxr-xr-x 50 2014/05/16 20:58:11 . | 131 # drwxr-xr-x 50 2014/05/16 20:58:11 . |
| 121 if dir: | 132 if dir: |
| 122 dir = rsync_urljoin( server, dir ) | 133 dir = rsync_urljoin( server, dir ) |
| 123 else: | 134 else: |
| 124 dir = server | 135 dir = server |
| 125 rsync_response = tempfile.NamedTemporaryFile() | 136 rsync_response = tempfile.NamedTemporaryFile() |
| 151 size = line.strip() | 162 size = line.strip() |
| 152 rval[ name ] = dict( name=name, permissions=perms, bytes=size, date=date, time=time ) | 163 rval[ name ] = dict( name=name, permissions=perms, bytes=size, date=date, time=time ) |
| 153 rsync_response.close() | 164 rsync_response.close() |
| 154 return rval | 165 return rval |
| 155 | 166 |
| 167 | |
| 156 def rsync_sync_to_dir( source, target ): | 168 def rsync_sync_to_dir( source, target ): |
| 157 rsync_response = tempfile.NamedTemporaryFile() | 169 rsync_response = tempfile.NamedTemporaryFile() |
| 158 rsync_stderr = tempfile.NamedTemporaryFile() | 170 rsync_stderr = tempfile.NamedTemporaryFile() |
| 159 rsync_cmd = [ RSYNC_CMD, '-avzP', source, target ] | 171 rsync_cmd = [ RSYNC_CMD, '-avzP', source, target ] |
| 160 return_code = subprocess.call( rsync_cmd, stdout=rsync_response, stderr=rsync_stderr ) | 172 return_code = subprocess.call( rsync_cmd, stdout=rsync_response, stderr=rsync_stderr ) |
| 174 | 186 |
| 175 def data_table_needs_refresh( cached_data_table, url ): | 187 def data_table_needs_refresh( cached_data_table, url ): |
| 176 if cached_data_table is None: | 188 if cached_data_table is None: |
| 177 return True, {} | 189 return True, {} |
| 178 if datetime.datetime.now() - cached_data_table.get( 'time_loaded' ) > CACHE_TIME: | 190 if datetime.datetime.now() - cached_data_table.get( 'time_loaded' ) > CACHE_TIME: |
| 179 data_table_text = urllib2.urlopen( url ).read() | 191 data_table_text = urlopen( url ).read() |
| 180 if cached_data_table.get( 'data_table_text', None ) != data_table_text: | 192 if cached_data_table.get( 'data_table_text', None ) != data_table_text: |
| 181 return True, {'data_table_text':data_table_text} | 193 return True, {'data_table_text': data_table_text} |
| 182 loc_file_attrs = rsync_list_dir( RSYNC_SERVER, LOCATION_DIR ) | 194 loc_file_attrs = rsync_list_dir( RSYNC_SERVER, LOCATION_DIR ) |
| 183 if cached_data_table.get( 'loc_file_attrs', None ) != loc_file_attrs: | 195 if cached_data_table.get( 'loc_file_attrs', None ) != loc_file_attrs: |
| 184 return True, {'loc_file_attrs':loc_file_attrs} | 196 return True, {'loc_file_attrs': loc_file_attrs} |
| 185 return False, {} | 197 return False, {} |
| 198 | |
| 186 | 199 |
| 187 def load_data_tables_from_url( url=None, site='main', data_table_class=None ): | 200 def load_data_tables_from_url( url=None, site='main', data_table_class=None ): |
| 188 if not url: | 201 if not url: |
| 189 url = TOOL_DATA_TABLE_CONF_XML_URLS.get( site, None ) | 202 url = TOOL_DATA_TABLE_CONF_XML_URLS.get( site, None ) |
| 190 assert url, ValueError( 'You must provide either a URL or a site=name.' ) | 203 assert url, ValueError( 'You must provide either a URL or a site=name.' ) |
| 191 | 204 |
| 192 cached_data_table = TOOL_DATA_TABLES_LOADED_BY_URL.get( url, None ) | 205 cached_data_table = TOOL_DATA_TABLES_LOADED_BY_URL.get( url, None ) |
| 193 refresh, attribs = data_table_needs_refresh( cached_data_table, url ) | 206 refresh, attribs = data_table_needs_refresh( cached_data_table, url ) |
| 194 if refresh: | 207 if refresh: |
| 195 data_table_text = attribs.get( 'data_table_text' )or urllib2.urlopen( url ).read() | 208 data_table_text = attribs.get( 'data_table_text' )or urlopen( url ).read() |
| 196 loc_file_attrs = attribs.get( 'loc_file_attrs' ) or rsync_list_dir( RSYNC_SERVER, LOCATION_DIR ) | 209 loc_file_attrs = attribs.get( 'loc_file_attrs' ) or rsync_list_dir( RSYNC_SERVER, LOCATION_DIR ) |
| 197 | 210 |
| 198 tmp_dir = tempfile.mkdtemp( prefix='rsync_g2_' ) | 211 tmp_dir = tempfile.mkdtemp( prefix='rsync_g2_' ) |
| 199 tmp_loc_dir = os.path.join( tmp_dir, 'location' ) | 212 tmp_loc_dir = os.path.join( tmp_dir, 'location' ) |
| 200 os.mkdir( tmp_loc_dir ) | 213 os.mkdir( tmp_loc_dir ) |
| 201 rsync_sync_to_dir( rsync_urljoin( RSYNC_SERVER, LOCATION_DIR ), os.path.abspath( tmp_loc_dir ) ) | 214 rsync_sync_to_dir( rsync_urljoin( RSYNC_SERVER, LOCATION_DIR ), os.path.abspath( tmp_loc_dir ) ) |
| 202 | 215 |
| 203 | |
| 204 new_data_table_text = data_table_text.replace( TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE, TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET % ( tmp_loc_dir ) ) | 216 new_data_table_text = data_table_text.replace( TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE, TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET % ( tmp_loc_dir ) ) |
| 205 data_table_fh = tempfile.NamedTemporaryFile( dir=tmp_dir, prefix='rysnc_data_manager_data_table_conf_' ) | 217 data_table_fh = tempfile.NamedTemporaryFile( dir=tmp_dir, prefix='rysnc_data_manager_data_table_conf_' ) |
| 206 data_table_fh.write( new_data_table_text ) | 218 data_table_fh.write( new_data_table_text ) |
| 207 data_table_fh.flush() | 219 data_table_fh.flush() |
| 208 tmp_data_dir = os.path.join( tmp_dir, 'tool-data' ) | 220 tmp_data_dir = os.path.join( tmp_dir, 'tool-data' ) |
| 209 os.mkdir( tmp_data_dir ) | 221 os.mkdir( tmp_data_dir ) |
| 210 data_tables = data_table_class( tmp_data_dir, config_filename=data_table_fh.name ) | 222 data_tables = data_table_class( tmp_data_dir, config_filename=data_table_fh.name ) |
| 211 for name, data_table in data_tables.data_tables.items(): | 223 for name, data_table in list(data_tables.data_tables.items()): |
| 212 if name in EXCLUDE_DATA_TABLES or not data_table_has_path_column( data_table ): | 224 if name in EXCLUDE_DATA_TABLES or not data_table_has_path_column( data_table ): |
| 213 log.debug( 'Removing data table "%s" because it is excluded by name or does not have a defined "path" column.', name ) | 225 log.debug( 'Removing data table "%s" because it is excluded by name or does not have a defined "path" column.', name ) |
| 214 del data_tables.data_tables[name] | 226 del data_tables.data_tables[name] |
| 215 cached_data_table = { 'data_tables': data_tables, 'tmp_dir': tmp_dir, 'data_table_text': data_table_text, 'tmp_loc_dir': tmp_loc_dir, 'loc_file_attrs': loc_file_attrs, 'time_loaded': datetime.datetime.now() } | 227 cached_data_table = { 'data_tables': data_tables, 'tmp_dir': tmp_dir, 'data_table_text': data_table_text, 'tmp_loc_dir': tmp_loc_dir, 'loc_file_attrs': loc_file_attrs, 'time_loaded': datetime.datetime.now() } |
| 216 TOOL_DATA_TABLES_LOADED_BY_URL[ url ] = cached_data_table | 228 TOOL_DATA_TABLES_LOADED_BY_URL[ url ] = cached_data_table |
| 217 #delete the files | 229 # delete the files |
| 218 data_table_fh.close() | 230 data_table_fh.close() |
| 219 cleanup_before_exit( tmp_dir ) | 231 cleanup_before_exit( tmp_dir ) |
| 220 return cached_data_table | 232 return cached_data_table |
| 233 | |
| 221 | 234 |
| 222 def data_table_has_path_column( data_table ): | 235 def data_table_has_path_column( data_table ): |
| 223 col_names = data_table.get_column_name_list() | 236 col_names = data_table.get_column_name_list() |
| 224 for name in PATH_COLUMN_NAMES: | 237 for name in PATH_COLUMN_NAMES: |
| 225 if name in col_names: | 238 if name in col_names: |
| 226 return True | 239 return True |
| 227 return False | 240 return False |
| 228 | 241 |
| 242 | |
| 229 def get_available_tables( trans ): | 243 def get_available_tables( trans ): |
| 230 #list of data tables | 244 # list of data tables |
| 231 data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ ) | 245 data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ ) |
| 232 return data_tables.get( 'data_tables' ).get_tables().keys() | 246 return list(data_tables.get( 'data_tables' ).get_tables().keys()) |
| 247 | |
| 233 | 248 |
| 234 def get_new_xml_definition( app, data_table, data_manager, repo_info=None, location_file_dir=None ): | 249 def get_new_xml_definition( app, data_table, data_manager, repo_info=None, location_file_dir=None ): |
| 235 sub_dict = { 'table_name': data_table.name, 'comment_char': '', 'columns': '', 'file_path': '' } | 250 sub_dict = { 'table_name': data_table.name, 'comment_char': '', 'columns': '', 'file_path': '' } |
| 236 sub_dict.update( data_manager.get_tool_shed_repository_info_dict() ) | 251 sub_dict.update( data_manager.get_tool_shed_repository_info_dict() ) |
| 237 if data_table.comment_char: | 252 if data_table.comment_char: |
| 240 if name is not None: | 255 if name is not None: |
| 241 sub_dict['columns'] = "%s\n%s" % ( sub_dict['columns'], '<column name="%s" index="%s" />' % ( name, i ) ) | 256 sub_dict['columns'] = "%s\n%s" % ( sub_dict['columns'], '<column name="%s" index="%s" />' % ( name, i ) ) |
| 242 location_file_dir = location_file_dir or app.config.galaxy_data_manager_data_path | 257 location_file_dir = location_file_dir or app.config.galaxy_data_manager_data_path |
| 243 for filename in data_table.filenames.keys(): | 258 for filename in data_table.filenames.keys(): |
| 244 sub_dict['file_path'] = basename( filename ) | 259 sub_dict['file_path'] = basename( filename ) |
| 245 sub_dict['file_path'] = os.path.join( location_file_dir, sub_dict['file_path'] ) #os.path.abspath? | 260 sub_dict['file_path'] = os.path.join( location_file_dir, sub_dict['file_path'] ) # os.path.abspath? |
| 246 if not os.path.exists( sub_dict['file_path'] ): | 261 if not os.path.exists( sub_dict['file_path'] ): |
| 247 # Create empty file | 262 # Create empty file |
| 248 open( sub_dict['file_path'], 'wb+' ).close() | 263 open( sub_dict['file_path'], 'wb+' ).close() |
| 249 break | 264 break |
| 250 sub_dict[ 'repo_info' ] = repo_info or '' | 265 sub_dict[ 'repo_info' ] = repo_info or '' |
| 254 <file path="%(file_path)s" /> | 269 <file path="%(file_path)s" /> |
| 255 %(repo_info)s | 270 %(repo_info)s |
| 256 </table></tables> | 271 </table></tables> |
| 257 """ % sub_dict | 272 """ % sub_dict |
| 258 | 273 |
| 274 | |
| 259 def get_available_tables_for_dbkey( trans, dbkey, data_table_names ): | 275 def get_available_tables_for_dbkey( trans, dbkey, data_table_names ): |
| 260 my_data_tables = trans.app.tool_data_tables.get_tables() | |
| 261 data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ ) | 276 data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ ) |
| 262 rval = {} | 277 rval = {} |
| 263 for name, data_table in data_tables.get( 'data_tables' ).get_tables().iteritems(): | 278 for name, data_table in data_tables.get( 'data_tables' ).get_tables().items(): |
| 264 if ( not data_table_names or name in data_table_names ): #name in my_data_tables.keys() and | 279 if ( not data_table_names or name in data_table_names ): |
| 265 #TODO: check that columns are similiar | 280 # TODO: check that columns are similiar |
| 266 if not dbkey: | 281 if not dbkey: |
| 267 entry_getter = data_table.get_named_fields_list() | 282 entry_getter = data_table.get_named_fields_list() |
| 268 else: | 283 else: |
| 269 entry_getter = data_table.get_entries( 'dbkey', dbkey, None, default=[] ) | 284 entry_getter = data_table.get_entries( 'dbkey', dbkey, None, default=[] ) |
| 270 for entry in entry_getter: | 285 for entry in entry_getter: |
| 271 name = "%s: %s" % ( data_table.name, dumps( entry ) ) | 286 name = "%s: %s" % ( data_table.name, dumps( entry ) ) |
| 272 rval[name] = entry | 287 rval[name] = entry |
| 273 return rval | 288 return rval |
| 289 | |
| 274 | 290 |
| 275 def split_path_all( path ): | 291 def split_path_all( path ): |
| 276 rval = [] | 292 rval = [] |
| 277 path = path.rstrip( '/' ) | 293 path = path.rstrip( '/' ) |
| 278 while True: | 294 while True: |
| 284 rval.append( head ) | 300 rval.append( head ) |
| 285 break | 301 break |
| 286 else: | 302 else: |
| 287 break | 303 break |
| 288 rval.reverse() | 304 rval.reverse() |
| 289 return rval | 305 return rval |
| 306 | |
| 290 | 307 |
| 291 def get_data_for_path( path, data_root_dir ): | 308 def get_data_for_path( path, data_root_dir ): |
| 292 # We list dir with a /, but copy data without | 309 # We list dir with a /, but copy data without |
| 293 # listing with / gives a . entry when its a dir | 310 # listing with / gives a . entry when its a dir |
| 294 # cloning without the / will copy that whole directory into the target, | 311 # cloning without the / will copy that whole directory into the target, |
| 295 # instead of just that target's contents | 312 # instead of just that target's contents |
| 296 if path.startswith( GALAXY_DATA_CANONICAL_PATH ): | 313 if path.startswith( GALAXY_DATA_CANONICAL_PATH ): |
| 297 path = path[ len( GALAXY_DATA_CANONICAL_PATH ):] | 314 path = path[ len( GALAXY_DATA_CANONICAL_PATH ):] |
| 298 make_path = path | 315 make_path = path |
| 299 rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), path ) | 316 rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), path ) |
| 300 if rsync_source.endswith( '/' ): | 317 if rsync_source.endswith( '/' ): |
| 301 rsync_source = rsync_source[:-1] | 318 rsync_source = rsync_source[:-1] |
| 302 try: | 319 try: |
| 303 dir_list = rsync_list_dir( rsync_source + "/" ) | 320 dir_list = rsync_list_dir( rsync_source + "/" ) |
| 304 except Exception, e: | 321 except Exception: |
| 305 dir_list = None | 322 dir_list = None |
| 306 while not dir_list or '.' not in dir_list: | 323 while not dir_list or '.' not in dir_list: |
| 307 head, tail = os.path.split( make_path ) | 324 head, tail = os.path.split( make_path ) |
| 308 if not head: | 325 if not head: |
| 309 head = tail | 326 head = tail |
| 310 make_path = head | 327 make_path = head |
| 311 rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head ) #if we error here, likely due to a connection issue | 328 rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head ) # if we error here, likely due to a connection issue |
| 312 if rsync_source.endswith( '/' ): | 329 if rsync_source.endswith( '/' ): |
| 313 rsync_source = rsync_source[:-1] | 330 rsync_source = rsync_source[:-1] |
| 314 dir_list = rsync_list_dir( rsync_source + "/" ) | 331 dir_list = rsync_list_dir( rsync_source + "/" ) |
| 315 split_path = split_path_all( make_path ) | 332 split_path = split_path_all( make_path ) |
| 316 target_path = data_root_dir | 333 target_path = data_root_dir |
| 319 if not os.path.exists( target_path ): | 336 if not os.path.exists( target_path ): |
| 320 os.mkdir( target_path ) | 337 os.mkdir( target_path ) |
| 321 rsync_sync_to_dir( rsync_source, target_path ) | 338 rsync_sync_to_dir( rsync_source, target_path ) |
| 322 return path | 339 return path |
| 323 | 340 |
| 341 | |
| 324 def get_data_and_munge_path( data_table_name, data_table_entry, data_root_dir ): | 342 def get_data_and_munge_path( data_table_name, data_table_entry, data_root_dir ): |
| 325 path_cols = [] | 343 path_cols = [] |
| 326 for key, value in data_table_entry.iteritems(): | 344 for key, value in data_table_entry.items(): |
| 327 if key in PATH_COLUMN_NAMES: | 345 if key in PATH_COLUMN_NAMES: |
| 328 path_cols.append( ( key, value ) ) | 346 path_cols.append( ( key, value ) ) |
| 329 found_data = False | |
| 330 if path_cols: | 347 if path_cols: |
| 331 for col_name, value in path_cols: | 348 for col_name, value in path_cols: |
| 332 #GALAXY_DATA_CANONICAL_PATH | |
| 333 if value.startswith( GALAXY_DATA_CANONICAL_PATH ): | 349 if value.startswith( GALAXY_DATA_CANONICAL_PATH ): |
| 334 data_table_entry[col_name] = get_data_for_path( value, data_root_dir ) | 350 data_table_entry[col_name] = get_data_for_path( value, data_root_dir ) |
| 335 found_data = True | |
| 336 else: | 351 else: |
| 337 print 'unable to determine location of rsync data for', data_table_name, data_table_entry | 352 print('unable to determine location of rsync data for', data_table_name, data_table_entry) |
| 338 return data_table_entry | 353 return data_table_entry |
| 339 | 354 |
| 355 | |
| 340 def fulfill_data_table_entries( data_table_entries, data_manager_dict, data_root_dir ): | 356 def fulfill_data_table_entries( data_table_entries, data_manager_dict, data_root_dir ): |
| 341 for data_table_name, entries in data_table_entries.iteritems(): | 357 for data_table_name, entries in data_table_entries.items(): |
| 342 for entry in entries: | 358 for entry in entries: |
| 343 entry = get_data_and_munge_path( data_table_name, entry, data_root_dir ) | 359 entry = get_data_and_munge_path( data_table_name, entry, data_root_dir ) |
| 344 _add_data_table_entry( data_manager_dict, data_table_name, entry ) | 360 _add_data_table_entry( data_manager_dict, data_table_name, entry ) |
| 345 return data_manager_dict | 361 return data_manager_dict |
| 362 | |
| 346 | 363 |
| 347 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): | 364 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): |
| 348 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 365 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
| 349 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 366 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) |
| 350 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) | 367 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) |
| 351 return data_manager_dict | 368 return data_manager_dict |
| 352 | 369 |
| 370 | |
| 353 def cleanup_before_exit( tmp_dir ): | 371 def cleanup_before_exit( tmp_dir ): |
| 354 if tmp_dir and os.path.exists( tmp_dir ): | 372 if tmp_dir and os.path.exists( tmp_dir ): |
| 355 shutil.rmtree( tmp_dir ) | 373 shutil.rmtree( tmp_dir ) |
| 356 | 374 |
| 375 | |
| 357 def get_data_table_entries( params ): | 376 def get_data_table_entries( params ): |
| 358 rval = {} | 377 rval = {} |
| 359 data_table_entries = params.get( 'data_table_entries', None ) | 378 data_table_entries = params.get( 'data_table_entries', None ) |
| 360 if data_table_entries : | 379 if data_table_entries: |
| 361 for entry_text in data_table_entries.split( ',' ): | 380 for entry_text in data_table_entries.split( ',' ): |
| 362 entry_text = entry_text.strip().decode( 'base64' ) | 381 entry_text = entry_text.strip().decode( 'base64' ) |
| 363 entry_dict = loads( entry_text ) | 382 entry_dict = loads( entry_text ) |
| 364 data_table_name = entry_dict['name'] | 383 data_table_name = entry_dict['name'] |
| 365 data_table_entry = entry_dict['entry'] | 384 data_table_entry = entry_dict['entry'] |
| 366 rval[ data_table_name ] = rval.get( data_table_name, [] ) | 385 rval[ data_table_name ] = rval.get( data_table_name, [] ) |
| 367 rval[ data_table_name ].append( data_table_entry ) | 386 rval[ data_table_name ].append( data_table_entry ) |
| 368 return rval | 387 return rval |
| 369 | 388 |
| 389 | |
| 370 def main(): | 390 def main(): |
| 371 #Parse Command Line | |
| 372 parser = optparse.OptionParser() | 391 parser = optparse.OptionParser() |
| 373 (options, args) = parser.parse_args() | 392 (options, args) = parser.parse_args() |
| 374 | 393 |
| 375 filename = args[0] | 394 filename = args[0] |
| 376 | 395 |
| 377 params = loads( open( filename ).read() ) | 396 params = loads( open( filename ).read() ) |
| 378 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 397 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
| 379 os.mkdir( target_directory ) | 398 os.mkdir( target_directory ) |
| 380 data_manager_dict = {} | 399 data_manager_dict = {} |
| 381 | 400 |
| 382 data_table_entries = get_data_table_entries( params['param_dict'] ) | 401 data_table_entries = get_data_table_entries( params['param_dict'] ) |
| 383 | 402 |
| 384 # Populate the data Tables | 403 # Populate the data Tables |
| 385 data_manager_dict = fulfill_data_table_entries( data_table_entries, data_manager_dict, target_directory ) | 404 data_manager_dict = fulfill_data_table_entries( data_table_entries, data_manager_dict, target_directory ) |
| 386 | 405 |
| 387 #save info to json file | 406 # save info to json file |
| 388 open( filename, 'wb' ).write( dumps( data_manager_dict ) ) | 407 open( filename, 'wb' ).write( dumps( data_manager_dict ) ) |
| 389 | 408 |
| 390 if __name__ == "__main__": main() | 409 |
| 410 if __name__ == "__main__": | |
| 411 main() |
