Mercurial > repos > bgruening > sklearn_stacking_ensemble_models
comparison keras_deep_learning.py @ 2:e18d9b17c322 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c035d399196b3bef9982db4f8e47331411dbb20e
| author | bgruening |
|---|---|
| date | Fri, 09 Aug 2019 13:52:50 -0400 |
| parents | |
| children | 963e449636d3 |
comparison
equal
deleted
inserted
replaced
| 1:f2a391479a01 | 2:e18d9b17c322 |
|---|---|
| 1 import argparse | |
| 2 import json | |
| 3 import keras | |
| 4 import pandas as pd | |
| 5 import pickle | |
| 6 import six | |
| 7 import warnings | |
| 8 | |
| 9 from ast import literal_eval | |
| 10 from keras.models import Sequential, Model | |
| 11 from galaxy_ml.utils import try_get_attr, get_search_params | |
| 12 | |
| 13 | |
| 14 def _handle_shape(literal): | |
| 15 """Eval integer or list/tuple of integers from string | |
| 16 | |
| 17 Parameters: | |
| 18 ----------- | |
| 19 literal : str. | |
| 20 """ | |
| 21 literal = literal.strip() | |
| 22 if not literal: | |
| 23 return None | |
| 24 try: | |
| 25 return literal_eval(literal) | |
| 26 except NameError as e: | |
| 27 print(e) | |
| 28 return literal | |
| 29 | |
| 30 | |
| 31 def _handle_regularizer(literal): | |
| 32 """Construct regularizer from string literal | |
| 33 | |
| 34 Parameters | |
| 35 ---------- | |
| 36 literal : str. E.g. '(0.1, 0)' | |
| 37 """ | |
| 38 literal = literal.strip() | |
| 39 if not literal: | |
| 40 return None | |
| 41 | |
| 42 l1, l2 = literal_eval(literal) | |
| 43 | |
| 44 if not l1 and not l2: | |
| 45 return None | |
| 46 | |
| 47 if l1 is None: | |
| 48 l1 = 0. | |
| 49 if l2 is None: | |
| 50 l2 = 0. | |
| 51 | |
| 52 return keras.regularizers.l1_l2(l1=l1, l2=l2) | |
| 53 | |
| 54 | |
| 55 def _handle_constraint(config): | |
| 56 """Construct constraint from galaxy tool parameters. | |
| 57 Suppose correct dictionary format | |
| 58 | |
| 59 Parameters | |
| 60 ---------- | |
| 61 config : dict. E.g. | |
| 62 "bias_constraint": | |
| 63 {"constraint_options": | |
| 64 {"max_value":1.0, | |
| 65 "min_value":0.0, | |
| 66 "axis":"[0, 1, 2]" | |
| 67 }, | |
| 68 "constraint_type": | |
| 69 "MinMaxNorm" | |
| 70 } | |
| 71 """ | |
| 72 constraint_type = config['constraint_type'] | |
| 73 if constraint_type == 'None': | |
| 74 return None | |
| 75 | |
| 76 klass = getattr(keras.constraints, constraint_type) | |
| 77 options = config.get('constraint_options', {}) | |
| 78 if 'axis' in options: | |
| 79 options['axis'] = literal_eval(options['axis']) | |
| 80 | |
| 81 return klass(**options) | |
| 82 | |
| 83 | |
| 84 def _handle_lambda(literal): | |
| 85 return None | |
| 86 | |
| 87 | |
| 88 def _handle_layer_parameters(params): | |
| 89 """Access to handle all kinds of parameters | |
| 90 """ | |
| 91 for key, value in six.iteritems(params): | |
| 92 if value == 'None': | |
| 93 params[key] = None | |
| 94 continue | |
| 95 | |
| 96 if type(value) in [int, float, bool]\ | |
| 97 or (type(value) is str and value.isalpha()): | |
| 98 continue | |
| 99 | |
| 100 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', | |
| 101 'target_shape', 'dims', 'kernel_size', 'strides', | |
| 102 'dilation_rate', 'output_padding', 'cropping', 'size', | |
| 103 'padding', 'pool_size', 'axis', 'shared_axes']: | |
| 104 params[key] = _handle_shape(value) | |
| 105 | |
| 106 elif key.endswith('_regularizer'): | |
| 107 params[key] = _handle_regularizer(value) | |
| 108 | |
| 109 elif key.endswith('_constraint'): | |
| 110 params[key] = _handle_constraint(value) | |
| 111 | |
| 112 elif key == 'function': # No support for lambda/function eval | |
| 113 params.pop(key) | |
| 114 | |
| 115 return params | |
| 116 | |
| 117 | |
| 118 def get_sequential_model(config): | |
| 119 """Construct keras Sequential model from Galaxy tool parameters | |
| 120 | |
| 121 Parameters: | |
| 122 ----------- | |
| 123 config : dictionary, galaxy tool parameters loaded by JSON | |
| 124 """ | |
| 125 model = Sequential() | |
| 126 input_shape = _handle_shape(config['input_shape']) | |
| 127 layers = config['layers'] | |
| 128 for layer in layers: | |
| 129 options = layer['layer_selection'] | |
| 130 layer_type = options.pop('layer_type') | |
| 131 klass = getattr(keras.layers, layer_type) | |
| 132 other_options = options.pop('layer_options', {}) | |
| 133 options.update(other_options) | |
| 134 | |
| 135 # parameters needs special care | |
| 136 options = _handle_layer_parameters(options) | |
| 137 | |
| 138 # add input_shape to the first layer only | |
| 139 if not getattr(model, '_layers') and input_shape is not None: | |
| 140 options['input_shape'] = input_shape | |
| 141 | |
| 142 model.add(klass(**options)) | |
| 143 | |
| 144 return model | |
| 145 | |
| 146 | |
| 147 def get_functional_model(config): | |
| 148 """Construct keras functional model from Galaxy tool parameters | |
| 149 | |
| 150 Parameters | |
| 151 ----------- | |
| 152 config : dictionary, galaxy tool parameters loaded by JSON | |
| 153 """ | |
| 154 layers = config['layers'] | |
| 155 all_layers = [] | |
| 156 for layer in layers: | |
| 157 options = layer['layer_selection'] | |
| 158 layer_type = options.pop('layer_type') | |
| 159 klass = getattr(keras.layers, layer_type) | |
| 160 inbound_nodes = options.pop('inbound_nodes', None) | |
| 161 other_options = options.pop('layer_options', {}) | |
| 162 options.update(other_options) | |
| 163 | |
| 164 # parameters needs special care | |
| 165 options = _handle_layer_parameters(options) | |
| 166 # merge layers | |
| 167 if 'merging_layers' in options: | |
| 168 idxs = literal_eval(options.pop('merging_layers')) | |
| 169 merging_layers = [all_layers[i-1] for i in idxs] | |
| 170 new_layer = klass(**options)(merging_layers) | |
| 171 # non-input layers | |
| 172 elif inbound_nodes is not None: | |
| 173 new_layer = klass(**options)(all_layers[inbound_nodes-1]) | |
| 174 # input layers | |
| 175 else: | |
| 176 new_layer = klass(**options) | |
| 177 | |
| 178 all_layers.append(new_layer) | |
| 179 | |
| 180 input_indexes = _handle_shape(config['input_layers']) | |
| 181 input_layers = [all_layers[i-1] for i in input_indexes] | |
| 182 | |
| 183 output_indexes = _handle_shape(config['output_layers']) | |
| 184 output_layers = [all_layers[i-1] for i in output_indexes] | |
| 185 | |
| 186 return Model(inputs=input_layers, outputs=output_layers) | |
| 187 | |
| 188 | |
| 189 def get_batch_generator(config): | |
| 190 """Construct keras online data generator from Galaxy tool parameters | |
| 191 | |
| 192 Parameters | |
| 193 ----------- | |
| 194 config : dictionary, galaxy tool parameters loaded by JSON | |
| 195 """ | |
| 196 generator_type = config.pop('generator_type') | |
| 197 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) | |
| 198 | |
| 199 if generator_type == 'GenomicIntervalBatchGenerator': | |
| 200 config['ref_genome_path'] = 'to_be_determined' | |
| 201 config['intervals_path'] = 'to_be_determined' | |
| 202 config['target_path'] = 'to_be_determined' | |
| 203 config['features'] = 'to_be_determined' | |
| 204 else: | |
| 205 config['fasta_path'] = 'to_be_determined' | |
| 206 | |
| 207 return klass(**config) | |
| 208 | |
| 209 | |
| 210 def config_keras_model(inputs, outfile): | |
| 211 """ config keras model layers and output JSON | |
| 212 | |
| 213 Parameters | |
| 214 ---------- | |
| 215 inputs : dict | |
| 216 loaded galaxy tool parameters from `keras_model_config` | |
| 217 tool. | |
| 218 outfile : str | |
| 219 Path to galaxy dataset containing keras model JSON. | |
| 220 """ | |
| 221 model_type = inputs['model_selection']['model_type'] | |
| 222 layers_config = inputs['model_selection'] | |
| 223 | |
| 224 if model_type == 'sequential': | |
| 225 model = get_sequential_model(layers_config) | |
| 226 else: | |
| 227 model = get_functional_model(layers_config) | |
| 228 | |
| 229 json_string = model.to_json() | |
| 230 | |
| 231 with open(outfile, 'w') as f: | |
| 232 f.write(json_string) | |
| 233 | |
| 234 | |
| 235 def build_keras_model(inputs, outfile, model_json, infile_weights=None, | |
| 236 batch_mode=False, outfile_params=None): | |
| 237 """ for `keras_model_builder` tool | |
| 238 | |
| 239 Parameters | |
| 240 ---------- | |
| 241 inputs : dict | |
| 242 loaded galaxy tool parameters from `keras_model_builder` tool. | |
| 243 outfile : str | |
| 244 Path to galaxy dataset containing the keras_galaxy model output. | |
| 245 model_json : str | |
| 246 Path to dataset containing keras model JSON. | |
| 247 infile_weights : str or None | |
| 248 If string, path to dataset containing model weights. | |
| 249 batch_mode : bool, default=False | |
| 250 Whether to build online batch classifier. | |
| 251 outfile_params : str, default=None | |
| 252 File path to search parameters output. | |
| 253 """ | |
| 254 with open(model_json, 'r') as f: | |
| 255 json_model = json.load(f) | |
| 256 | |
| 257 config = json_model['config'] | |
| 258 | |
| 259 options = {} | |
| 260 | |
| 261 if json_model['class_name'] == 'Sequential': | |
| 262 options['model_type'] = 'sequential' | |
| 263 klass = Sequential | |
| 264 elif json_model['class_name'] == 'Model': | |
| 265 options['model_type'] = 'functional' | |
| 266 klass = Model | |
| 267 else: | |
| 268 raise ValueError("Unknow Keras model class: %s" | |
| 269 % json_model['class_name']) | |
| 270 | |
| 271 # load prefitted model | |
| 272 if inputs['mode_selection']['mode_type'] == 'prefitted': | |
| 273 estimator = klass.from_config(config) | |
| 274 estimator.load_weights(infile_weights) | |
| 275 # build train model | |
| 276 else: | |
| 277 cls_name = inputs['mode_selection']['learning_type'] | |
| 278 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) | |
| 279 | |
| 280 options['loss'] = (inputs['mode_selection'] | |
| 281 ['compile_params']['loss']) | |
| 282 options['optimizer'] =\ | |
| 283 (inputs['mode_selection']['compile_params'] | |
| 284 ['optimizer_selection']['optimizer_type']).lower() | |
| 285 | |
| 286 options.update((inputs['mode_selection']['compile_params'] | |
| 287 ['optimizer_selection']['optimizer_options'])) | |
| 288 | |
| 289 train_metrics = (inputs['mode_selection']['compile_params'] | |
| 290 ['metrics']).split(',') | |
| 291 if train_metrics[-1] == 'none': | |
| 292 train_metrics = train_metrics[:-1] | |
| 293 options['metrics'] = train_metrics | |
| 294 | |
| 295 options.update(inputs['mode_selection']['fit_params']) | |
| 296 options['seed'] = inputs['mode_selection']['random_seed'] | |
| 297 | |
| 298 if batch_mode: | |
| 299 generator = get_batch_generator(inputs['mode_selection'] | |
| 300 ['generator_selection']) | |
| 301 options['data_batch_generator'] = generator | |
| 302 options['prediction_steps'] = \ | |
| 303 inputs['mode_selection']['prediction_steps'] | |
| 304 options['class_positive_factor'] = \ | |
| 305 inputs['mode_selection']['class_positive_factor'] | |
| 306 estimator = klass(config, **options) | |
| 307 if outfile_params: | |
| 308 hyper_params = get_search_params(estimator) | |
| 309 # TODO: remove this after making `verbose` tunable | |
| 310 for h_param in hyper_params: | |
| 311 if h_param[1].endswith('verbose'): | |
| 312 h_param[0] = '@' | |
| 313 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) | |
| 314 df.to_csv(outfile_params, sep='\t', index=False) | |
| 315 | |
| 316 print(repr(estimator)) | |
| 317 # save model by pickle | |
| 318 with open(outfile, 'wb') as f: | |
| 319 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) | |
| 320 | |
| 321 | |
| 322 if __name__ == '__main__': | |
| 323 warnings.simplefilter('ignore') | |
| 324 | |
| 325 aparser = argparse.ArgumentParser() | |
| 326 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | |
| 327 aparser.add_argument("-m", "--model_json", dest="model_json") | |
| 328 aparser.add_argument("-t", "--tool_id", dest="tool_id") | |
| 329 aparser.add_argument("-w", "--infile_weights", dest="infile_weights") | |
| 330 aparser.add_argument("-o", "--outfile", dest="outfile") | |
| 331 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") | |
| 332 args = aparser.parse_args() | |
| 333 | |
| 334 input_json_path = args.inputs | |
| 335 with open(input_json_path, 'r') as param_handler: | |
| 336 inputs = json.load(param_handler) | |
| 337 | |
| 338 tool_id = args.tool_id | |
| 339 outfile = args.outfile | |
| 340 outfile_params = args.outfile_params | |
| 341 model_json = args.model_json | |
| 342 infile_weights = args.infile_weights | |
| 343 | |
| 344 # for keras_model_config tool | |
| 345 if tool_id == 'keras_model_config': | |
| 346 config_keras_model(inputs, outfile) | |
| 347 | |
| 348 # for keras_model_builder tool | |
| 349 else: | |
| 350 batch_mode = False | |
| 351 if tool_id == 'keras_batch_models': | |
| 352 batch_mode = True | |
| 353 | |
| 354 build_keras_model(inputs=inputs, | |
| 355 model_json=model_json, | |
| 356 infile_weights=infile_weights, | |
| 357 batch_mode=batch_mode, | |
| 358 outfile=outfile, | |
| 359 outfile_params=outfile_params) |
