Mercurial > repos > bgruening > sklearn_label_encoder
comparison keras_deep_learning.py @ 0:03155260beb3 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
| author | bgruening |
|---|---|
| date | Fri, 30 Apr 2021 23:36:38 +0000 |
| parents | |
| children | b008b609205e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:03155260beb3 |
|---|---|
| 1 import argparse | |
| 2 import json | |
| 3 import pickle | |
| 4 import warnings | |
| 5 from ast import literal_eval | |
| 6 | |
| 7 import keras | |
| 8 import pandas as pd | |
| 9 import six | |
| 10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr | |
| 11 from keras.models import Model, Sequential | |
| 12 | |
| 13 safe_eval = SafeEval() | |
| 14 | |
| 15 | |
| 16 def _handle_shape(literal): | |
| 17 """ | |
| 18 Eval integer or list/tuple of integers from string | |
| 19 | |
| 20 Parameters: | |
| 21 ----------- | |
| 22 literal : str. | |
| 23 """ | |
| 24 literal = literal.strip() | |
| 25 if not literal: | |
| 26 return None | |
| 27 try: | |
| 28 return literal_eval(literal) | |
| 29 except NameError as e: | |
| 30 print(e) | |
| 31 return literal | |
| 32 | |
| 33 | |
| 34 def _handle_regularizer(literal): | |
| 35 """ | |
| 36 Construct regularizer from string literal | |
| 37 | |
| 38 Parameters | |
| 39 ---------- | |
| 40 literal : str. E.g. '(0.1, 0)' | |
| 41 """ | |
| 42 literal = literal.strip() | |
| 43 if not literal: | |
| 44 return None | |
| 45 | |
| 46 l1, l2 = literal_eval(literal) | |
| 47 | |
| 48 if not l1 and not l2: | |
| 49 return None | |
| 50 | |
| 51 if l1 is None: | |
| 52 l1 = 0.0 | |
| 53 if l2 is None: | |
| 54 l2 = 0.0 | |
| 55 | |
| 56 return keras.regularizers.l1_l2(l1=l1, l2=l2) | |
| 57 | |
| 58 | |
| 59 def _handle_constraint(config): | |
| 60 """ | |
| 61 Construct constraint from galaxy tool parameters. | |
| 62 Suppose correct dictionary format | |
| 63 | |
| 64 Parameters | |
| 65 ---------- | |
| 66 config : dict. E.g. | |
| 67 "bias_constraint": | |
| 68 {"constraint_options": | |
| 69 {"max_value":1.0, | |
| 70 "min_value":0.0, | |
| 71 "axis":"[0, 1, 2]" | |
| 72 }, | |
| 73 "constraint_type": | |
| 74 "MinMaxNorm" | |
| 75 } | |
| 76 """ | |
| 77 constraint_type = config["constraint_type"] | |
| 78 if constraint_type in ("None", ""): | |
| 79 return None | |
| 80 | |
| 81 klass = getattr(keras.constraints, constraint_type) | |
| 82 options = config.get("constraint_options", {}) | |
| 83 if "axis" in options: | |
| 84 options["axis"] = literal_eval(options["axis"]) | |
| 85 | |
| 86 return klass(**options) | |
| 87 | |
| 88 | |
| 89 def _handle_lambda(literal): | |
| 90 return None | |
| 91 | |
| 92 | |
| 93 def _handle_layer_parameters(params): | |
| 94 """ | |
| 95 Access to handle all kinds of parameters | |
| 96 """ | |
| 97 for key, value in six.iteritems(params): | |
| 98 if value in ("None", ""): | |
| 99 params[key] = None | |
| 100 continue | |
| 101 | |
| 102 if type(value) in [int, float, bool] or ( | |
| 103 type(value) is str and value.isalpha() | |
| 104 ): | |
| 105 continue | |
| 106 | |
| 107 if ( | |
| 108 key | |
| 109 in [ | |
| 110 "input_shape", | |
| 111 "noise_shape", | |
| 112 "shape", | |
| 113 "batch_shape", | |
| 114 "target_shape", | |
| 115 "dims", | |
| 116 "kernel_size", | |
| 117 "strides", | |
| 118 "dilation_rate", | |
| 119 "output_padding", | |
| 120 "cropping", | |
| 121 "size", | |
| 122 "padding", | |
| 123 "pool_size", | |
| 124 "axis", | |
| 125 "shared_axes", | |
| 126 ] | |
| 127 and isinstance(value, str) | |
| 128 ): | |
| 129 params[key] = _handle_shape(value) | |
| 130 | |
| 131 elif key.endswith("_regularizer") and isinstance(value, dict): | |
| 132 params[key] = _handle_regularizer(value) | |
| 133 | |
| 134 elif key.endswith("_constraint") and isinstance(value, dict): | |
| 135 params[key] = _handle_constraint(value) | |
| 136 | |
| 137 elif key == "function": # No support for lambda/function eval | |
| 138 params.pop(key) | |
| 139 | |
| 140 return params | |
| 141 | |
| 142 | |
| 143 def get_sequential_model(config): | |
| 144 """ | |
| 145 Construct keras Sequential model from Galaxy tool parameters | |
| 146 | |
| 147 Parameters: | |
| 148 ----------- | |
| 149 config : dictionary, galaxy tool parameters loaded by JSON | |
| 150 """ | |
| 151 model = Sequential() | |
| 152 input_shape = _handle_shape(config["input_shape"]) | |
| 153 layers = config["layers"] | |
| 154 for layer in layers: | |
| 155 options = layer["layer_selection"] | |
| 156 layer_type = options.pop("layer_type") | |
| 157 klass = getattr(keras.layers, layer_type) | |
| 158 kwargs = options.pop("kwargs", "") | |
| 159 | |
| 160 # parameters needs special care | |
| 161 options = _handle_layer_parameters(options) | |
| 162 | |
| 163 if kwargs: | |
| 164 kwargs = safe_eval("dict(" + kwargs + ")") | |
| 165 options.update(kwargs) | |
| 166 | |
| 167 # add input_shape to the first layer only | |
| 168 if not getattr(model, "_layers") and input_shape is not None: | |
| 169 options["input_shape"] = input_shape | |
| 170 | |
| 171 model.add(klass(**options)) | |
| 172 | |
| 173 return model | |
| 174 | |
| 175 | |
| 176 def get_functional_model(config): | |
| 177 """ | |
| 178 Construct keras functional model from Galaxy tool parameters | |
| 179 | |
| 180 Parameters | |
| 181 ----------- | |
| 182 config : dictionary, galaxy tool parameters loaded by JSON | |
| 183 """ | |
| 184 layers = config["layers"] | |
| 185 all_layers = [] | |
| 186 for layer in layers: | |
| 187 options = layer["layer_selection"] | |
| 188 layer_type = options.pop("layer_type") | |
| 189 klass = getattr(keras.layers, layer_type) | |
| 190 inbound_nodes = options.pop("inbound_nodes", None) | |
| 191 kwargs = options.pop("kwargs", "") | |
| 192 | |
| 193 # parameters needs special care | |
| 194 options = _handle_layer_parameters(options) | |
| 195 | |
| 196 if kwargs: | |
| 197 kwargs = safe_eval("dict(" + kwargs + ")") | |
| 198 options.update(kwargs) | |
| 199 | |
| 200 # merge layers | |
| 201 if "merging_layers" in options: | |
| 202 idxs = literal_eval(options.pop("merging_layers")) | |
| 203 merging_layers = [all_layers[i - 1] for i in idxs] | |
| 204 new_layer = klass(**options)(merging_layers) | |
| 205 # non-input layers | |
| 206 elif inbound_nodes is not None: | |
| 207 new_layer = klass(**options)(all_layers[inbound_nodes - 1]) | |
| 208 # input layers | |
| 209 else: | |
| 210 new_layer = klass(**options) | |
| 211 | |
| 212 all_layers.append(new_layer) | |
| 213 | |
| 214 input_indexes = _handle_shape(config["input_layers"]) | |
| 215 input_layers = [all_layers[i - 1] for i in input_indexes] | |
| 216 | |
| 217 output_indexes = _handle_shape(config["output_layers"]) | |
| 218 output_layers = [all_layers[i - 1] for i in output_indexes] | |
| 219 | |
| 220 return Model(inputs=input_layers, outputs=output_layers) | |
| 221 | |
| 222 | |
| 223 def get_batch_generator(config): | |
| 224 """ | |
| 225 Construct keras online data generator from Galaxy tool parameters | |
| 226 | |
| 227 Parameters | |
| 228 ----------- | |
| 229 config : dictionary, galaxy tool parameters loaded by JSON | |
| 230 """ | |
| 231 generator_type = config.pop("generator_type") | |
| 232 if generator_type == "none": | |
| 233 return None | |
| 234 | |
| 235 klass = try_get_attr("galaxy_ml.preprocessors", generator_type) | |
| 236 | |
| 237 if generator_type == "GenomicIntervalBatchGenerator": | |
| 238 config["ref_genome_path"] = "to_be_determined" | |
| 239 config["intervals_path"] = "to_be_determined" | |
| 240 config["target_path"] = "to_be_determined" | |
| 241 config["features"] = "to_be_determined" | |
| 242 else: | |
| 243 config["fasta_path"] = "to_be_determined" | |
| 244 | |
| 245 return klass(**config) | |
| 246 | |
| 247 | |
| 248 def config_keras_model(inputs, outfile): | |
| 249 """ | |
| 250 config keras model layers and output JSON | |
| 251 | |
| 252 Parameters | |
| 253 ---------- | |
| 254 inputs : dict | |
| 255 loaded galaxy tool parameters from `keras_model_config` | |
| 256 tool. | |
| 257 outfile : str | |
| 258 Path to galaxy dataset containing keras model JSON. | |
| 259 """ | |
| 260 model_type = inputs["model_selection"]["model_type"] | |
| 261 layers_config = inputs["model_selection"] | |
| 262 | |
| 263 if model_type == "sequential": | |
| 264 model = get_sequential_model(layers_config) | |
| 265 else: | |
| 266 model = get_functional_model(layers_config) | |
| 267 | |
| 268 json_string = model.to_json() | |
| 269 | |
| 270 with open(outfile, "w") as f: | |
| 271 json.dump(json.loads(json_string), f, indent=2) | |
| 272 | |
| 273 | |
| 274 def build_keras_model( | |
| 275 inputs, | |
| 276 outfile, | |
| 277 model_json, | |
| 278 infile_weights=None, | |
| 279 batch_mode=False, | |
| 280 outfile_params=None, | |
| 281 ): | |
| 282 """ | |
| 283 for `keras_model_builder` tool | |
| 284 | |
| 285 Parameters | |
| 286 ---------- | |
| 287 inputs : dict | |
| 288 loaded galaxy tool parameters from `keras_model_builder` tool. | |
| 289 outfile : str | |
| 290 Path to galaxy dataset containing the keras_galaxy model output. | |
| 291 model_json : str | |
| 292 Path to dataset containing keras model JSON. | |
| 293 infile_weights : str or None | |
| 294 If string, path to dataset containing model weights. | |
| 295 batch_mode : bool, default=False | |
| 296 Whether to build online batch classifier. | |
| 297 outfile_params : str, default=None | |
| 298 File path to search parameters output. | |
| 299 """ | |
| 300 with open(model_json, "r") as f: | |
| 301 json_model = json.load(f) | |
| 302 | |
| 303 config = json_model["config"] | |
| 304 | |
| 305 options = {} | |
| 306 | |
| 307 if json_model["class_name"] == "Sequential": | |
| 308 options["model_type"] = "sequential" | |
| 309 klass = Sequential | |
| 310 elif json_model["class_name"] == "Model": | |
| 311 options["model_type"] = "functional" | |
| 312 klass = Model | |
| 313 else: | |
| 314 raise ValueError("Unknow Keras model class: %s" % json_model["class_name"]) | |
| 315 | |
| 316 # load prefitted model | |
| 317 if inputs["mode_selection"]["mode_type"] == "prefitted": | |
| 318 estimator = klass.from_config(config) | |
| 319 estimator.load_weights(infile_weights) | |
| 320 # build train model | |
| 321 else: | |
| 322 cls_name = inputs["mode_selection"]["learning_type"] | |
| 323 klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name) | |
| 324 | |
| 325 options["loss"] = inputs["mode_selection"]["compile_params"]["loss"] | |
| 326 options["optimizer"] = ( | |
| 327 inputs["mode_selection"]["compile_params"]["optimizer_selection"][ | |
| 328 "optimizer_type" | |
| 329 ] | |
| 330 ).lower() | |
| 331 | |
| 332 options.update( | |
| 333 ( | |
| 334 inputs["mode_selection"]["compile_params"]["optimizer_selection"][ | |
| 335 "optimizer_options" | |
| 336 ] | |
| 337 ) | |
| 338 ) | |
| 339 | |
| 340 train_metrics = inputs["mode_selection"]["compile_params"]["metrics"] | |
| 341 if train_metrics[-1] == "none": | |
| 342 train_metrics = train_metrics[:-1] | |
| 343 options["metrics"] = train_metrics | |
| 344 | |
| 345 options.update(inputs["mode_selection"]["fit_params"]) | |
| 346 options["seed"] = inputs["mode_selection"]["random_seed"] | |
| 347 | |
| 348 if batch_mode: | |
| 349 generator = get_batch_generator( | |
| 350 inputs["mode_selection"]["generator_selection"] | |
| 351 ) | |
| 352 options["data_batch_generator"] = generator | |
| 353 options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"] | |
| 354 options["class_positive_factor"] = inputs["mode_selection"][ | |
| 355 "class_positive_factor" | |
| 356 ] | |
| 357 estimator = klass(config, **options) | |
| 358 if outfile_params: | |
| 359 hyper_params = get_search_params(estimator) | |
| 360 # TODO: remove this after making `verbose` tunable | |
| 361 for h_param in hyper_params: | |
| 362 if h_param[1].endswith("verbose"): | |
| 363 h_param[0] = "@" | |
| 364 df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"]) | |
| 365 df.to_csv(outfile_params, sep="\t", index=False) | |
| 366 | |
| 367 print(repr(estimator)) | |
| 368 # save model by pickle | |
| 369 with open(outfile, "wb") as f: | |
| 370 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) | |
| 371 | |
| 372 | |
| 373 if __name__ == "__main__": | |
| 374 warnings.simplefilter("ignore") | |
| 375 | |
| 376 aparser = argparse.ArgumentParser() | |
| 377 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | |
| 378 aparser.add_argument("-m", "--model_json", dest="model_json") | |
| 379 aparser.add_argument("-t", "--tool_id", dest="tool_id") | |
| 380 aparser.add_argument("-w", "--infile_weights", dest="infile_weights") | |
| 381 aparser.add_argument("-o", "--outfile", dest="outfile") | |
| 382 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") | |
| 383 args = aparser.parse_args() | |
| 384 | |
| 385 input_json_path = args.inputs | |
| 386 with open(input_json_path, "r") as param_handler: | |
| 387 inputs = json.load(param_handler) | |
| 388 | |
| 389 tool_id = args.tool_id | |
| 390 outfile = args.outfile | |
| 391 outfile_params = args.outfile_params | |
| 392 model_json = args.model_json | |
| 393 infile_weights = args.infile_weights | |
| 394 | |
| 395 # for keras_model_config tool | |
| 396 if tool_id == "keras_model_config": | |
| 397 config_keras_model(inputs, outfile) | |
| 398 | |
| 399 # for keras_model_builder tool | |
| 400 else: | |
| 401 batch_mode = False | |
| 402 if tool_id == "keras_batch_models": | |
| 403 batch_mode = True | |
| 404 | |
| 405 build_keras_model( | |
| 406 inputs=inputs, | |
| 407 model_json=model_json, | |
| 408 infile_weights=infile_weights, | |
| 409 batch_mode=batch_mode, | |
| 410 outfile=outfile, | |
| 411 outfile_params=outfile_params, | |
| 412 ) |
