comparison keras_deep_learning.py @ 3:0a1812986bc3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 11:10:37 +0000
parents 38c4f8a98038
children
comparison
equal deleted inserted replaced
2:38c4f8a98038 3:0a1812986bc3
1 import argparse 1 import argparse
2 import json 2 import json
3 import keras 3 import warnings
4 import pandas as pd 4 from ast import literal_eval
5 import pickle 5
6 import six 6 import six
7 import warnings 7 from galaxy_ml.model_persist import dump_model_to_h5
8 8 from galaxy_ml.utils import SafeEval, try_get_attr
9 from ast import literal_eval 9 from tensorflow import keras
10 from keras.models import Sequential, Model 10 from tensorflow.keras.models import Model, Sequential
11 from galaxy_ml.utils import try_get_attr, get_search_params, SafeEval
12
13 11
14 safe_eval = SafeEval() 12 safe_eval = SafeEval()
15 13
16 14
17 def _handle_shape(literal): 15 def _handle_shape(literal):
46 44
47 if not l1 and not l2: 45 if not l1 and not l2:
48 return None 46 return None
49 47
50 if l1 is None: 48 if l1 is None:
51 l1 = 0. 49 l1 = 0.0
52 if l2 is None: 50 if l2 is None:
53 l2 = 0. 51 l2 = 0.0
54 52
55 return keras.regularizers.l1_l2(l1=l1, l2=l2) 53 return keras.regularizers.l1_l2(l1=l1, l2=l2)
56 54
57 55
58 def _handle_constraint(config): 56 def _handle_constraint(config):
70 }, 68 },
71 "constraint_type": 69 "constraint_type":
72 "MinMaxNorm" 70 "MinMaxNorm"
73 } 71 }
74 """ 72 """
75 constraint_type = config['constraint_type'] 73 constraint_type = config["constraint_type"]
76 if constraint_type in ('None', ''): 74 if constraint_type in ("None", ""):
77 return None 75 return None
78 76
79 klass = getattr(keras.constraints, constraint_type) 77 klass = getattr(keras.constraints, constraint_type)
80 options = config.get('constraint_options', {}) 78 options = config.get("constraint_options", {})
81 if 'axis' in options: 79 if "axis" in options:
82 options['axis'] = literal_eval(options['axis']) 80 options["axis"] = literal_eval(options["axis"])
83 81
84 return klass(**options) 82 return klass(**options)
85 83
86 84
87 def _handle_lambda(literal): 85 def _handle_lambda(literal):
88 return None 86 return None
89 87
90 88
91 def _handle_layer_parameters(params): 89 def _handle_layer_parameters(params):
92 """Access to handle all kinds of parameters 90 """Access to handle all kinds of parameters"""
93 """
94 for key, value in six.iteritems(params): 91 for key, value in six.iteritems(params):
95 if value in ('None', ''): 92 if value in ("None", ""):
96 params[key] = None 93 params[key] = None
97 continue 94 continue
98 95
99 if type(value) in [int, float, bool]\ 96 if type(value) in [int, float, bool] or (
100 or (type(value) is str and value.isalpha()): 97 type(value) is str and value.isalpha()
98 ):
101 continue 99 continue
102 100
103 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', 101 if key in [
104 'target_shape', 'dims', 'kernel_size', 'strides', 102 "input_shape",
105 'dilation_rate', 'output_padding', 'cropping', 'size', 103 "noise_shape",
106 'padding', 'pool_size', 'axis', 'shared_axes'] \ 104 "shape",
107 and isinstance(value, str): 105 "batch_shape",
106 "target_shape",
107 "dims",
108 "kernel_size",
109 "strides",
110 "dilation_rate",
111 "output_padding",
112 "cropping",
113 "size",
114 "padding",
115 "pool_size",
116 "axis",
117 "shared_axes",
118 ] and isinstance(value, str):
108 params[key] = _handle_shape(value) 119 params[key] = _handle_shape(value)
109 120
110 elif key.endswith('_regularizer') and isinstance(value, dict): 121 elif key.endswith("_regularizer") and isinstance(value, dict):
111 params[key] = _handle_regularizer(value) 122 params[key] = _handle_regularizer(value)
112 123
113 elif key.endswith('_constraint') and isinstance(value, dict): 124 elif key.endswith("_constraint") and isinstance(value, dict):
114 params[key] = _handle_constraint(value) 125 params[key] = _handle_constraint(value)
115 126
116 elif key == 'function': # No support for lambda/function eval 127 elif key == "function": # No support for lambda/function eval
117 params.pop(key) 128 params.pop(key)
118 129
119 return params 130 return params
120 131
121 132
125 Parameters: 136 Parameters:
126 ----------- 137 -----------
127 config : dictionary, galaxy tool parameters loaded by JSON 138 config : dictionary, galaxy tool parameters loaded by JSON
128 """ 139 """
129 model = Sequential() 140 model = Sequential()
130 input_shape = _handle_shape(config['input_shape']) 141 input_shape = _handle_shape(config["input_shape"])
131 layers = config['layers'] 142 layers = config["layers"]
132 for layer in layers: 143 for layer in layers:
133 options = layer['layer_selection'] 144 options = layer["layer_selection"]
134 layer_type = options.pop('layer_type') 145 layer_type = options.pop("layer_type")
135 klass = getattr(keras.layers, layer_type) 146 klass = getattr(keras.layers, layer_type)
136 kwargs = options.pop('kwargs', '') 147 kwargs = options.pop("kwargs", "")
137 148
138 # parameters needs special care 149 # parameters needs special care
139 options = _handle_layer_parameters(options) 150 options = _handle_layer_parameters(options)
140 151
141 if kwargs: 152 if kwargs:
142 kwargs = safe_eval('dict(' + kwargs + ')') 153 kwargs = safe_eval("dict(" + kwargs + ")")
143 options.update(kwargs) 154 options.update(kwargs)
144 155
145 # add input_shape to the first layer only 156 # add input_shape to the first layer only
146 if not getattr(model, '_layers') and input_shape is not None: 157 if not model.get_config()["layers"] and input_shape is not None:
147 options['input_shape'] = input_shape 158 options["input_shape"] = input_shape
148 159
149 model.add(klass(**options)) 160 model.add(klass(**options))
150 161
151 return model 162 return model
152 163
156 167
157 Parameters 168 Parameters
158 ----------- 169 -----------
159 config : dictionary, galaxy tool parameters loaded by JSON 170 config : dictionary, galaxy tool parameters loaded by JSON
160 """ 171 """
161 layers = config['layers'] 172 layers = config["layers"]
162 all_layers = [] 173 all_layers = []
163 for layer in layers: 174 for layer in layers:
164 options = layer['layer_selection'] 175 options = layer["layer_selection"]
165 layer_type = options.pop('layer_type') 176 layer_type = options.pop("layer_type")
166 klass = getattr(keras.layers, layer_type) 177 klass = getattr(keras.layers, layer_type)
167 inbound_nodes = options.pop('inbound_nodes', None) 178 inbound_nodes = options.pop("inbound_nodes", None)
168 kwargs = options.pop('kwargs', '') 179 kwargs = options.pop("kwargs", "")
169 180
170 # parameters needs special care 181 # parameters needs special care
171 options = _handle_layer_parameters(options) 182 options = _handle_layer_parameters(options)
172 183
173 if kwargs: 184 if kwargs:
174 kwargs = safe_eval('dict(' + kwargs + ')') 185 kwargs = safe_eval("dict(" + kwargs + ")")
175 options.update(kwargs) 186 options.update(kwargs)
176 187
177 # merge layers 188 # merge layers
178 if 'merging_layers' in options: 189 if "merging_layers" in options:
179 idxs = literal_eval(options.pop('merging_layers')) 190 idxs = literal_eval(options.pop("merging_layers"))
180 merging_layers = [all_layers[i-1] for i in idxs] 191 merging_layers = [all_layers[i - 1] for i in idxs]
181 new_layer = klass(**options)(merging_layers) 192 new_layer = klass(**options)(merging_layers)
182 # non-input layers 193 # non-input layers
183 elif inbound_nodes is not None: 194 elif inbound_nodes is not None:
184 new_layer = klass(**options)(all_layers[inbound_nodes-1]) 195 new_layer = klass(**options)(all_layers[inbound_nodes - 1])
185 # input layers 196 # input layers
186 else: 197 else:
187 new_layer = klass(**options) 198 new_layer = klass(**options)
188 199
189 all_layers.append(new_layer) 200 all_layers.append(new_layer)
190 201
191 input_indexes = _handle_shape(config['input_layers']) 202 input_indexes = _handle_shape(config["input_layers"])
192 input_layers = [all_layers[i-1] for i in input_indexes] 203 input_layers = [all_layers[i - 1] for i in input_indexes]
193 204
194 output_indexes = _handle_shape(config['output_layers']) 205 output_indexes = _handle_shape(config["output_layers"])
195 output_layers = [all_layers[i-1] for i in output_indexes] 206 output_layers = [all_layers[i - 1] for i in output_indexes]
196 207
197 return Model(inputs=input_layers, outputs=output_layers) 208 return Model(inputs=input_layers, outputs=output_layers)
198 209
199 210
200 def get_batch_generator(config): 211 def get_batch_generator(config):
202 213
203 Parameters 214 Parameters
204 ----------- 215 -----------
205 config : dictionary, galaxy tool parameters loaded by JSON 216 config : dictionary, galaxy tool parameters loaded by JSON
206 """ 217 """
207 generator_type = config.pop('generator_type') 218 generator_type = config.pop("generator_type")
208 if generator_type == 'none': 219 if generator_type == "none":
209 return None 220 return None
210 221
211 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) 222 klass = try_get_attr("galaxy_ml.preprocessors", generator_type)
212 223
213 if generator_type == 'GenomicIntervalBatchGenerator': 224 if generator_type == "GenomicIntervalBatchGenerator":
214 config['ref_genome_path'] = 'to_be_determined' 225 config["ref_genome_path"] = "to_be_determined"
215 config['intervals_path'] = 'to_be_determined' 226 config["intervals_path"] = "to_be_determined"
216 config['target_path'] = 'to_be_determined' 227 config["target_path"] = "to_be_determined"
217 config['features'] = 'to_be_determined' 228 config["features"] = "to_be_determined"
218 else: 229 else:
219 config['fasta_path'] = 'to_be_determined' 230 config["fasta_path"] = "to_be_determined"
220 231
221 return klass(**config) 232 return klass(**config)
222 233
223 234
224 def config_keras_model(inputs, outfile): 235 def config_keras_model(inputs, outfile):
225 """ config keras model layers and output JSON 236 """config keras model layers and output JSON
226 237
227 Parameters 238 Parameters
228 ---------- 239 ----------
229 inputs : dict 240 inputs : dict
230 loaded galaxy tool parameters from `keras_model_config` 241 loaded galaxy tool parameters from `keras_model_config`
231 tool. 242 tool.
232 outfile : str 243 outfile : str
233 Path to galaxy dataset containing keras model JSON. 244 Path to galaxy dataset containing keras model JSON.
234 """ 245 """
235 model_type = inputs['model_selection']['model_type'] 246 model_type = inputs["model_selection"]["model_type"]
236 layers_config = inputs['model_selection'] 247 layers_config = inputs["model_selection"]
237 248
238 if model_type == 'sequential': 249 if model_type == "sequential":
239 model = get_sequential_model(layers_config) 250 model = get_sequential_model(layers_config)
240 else: 251 else:
241 model = get_functional_model(layers_config) 252 model = get_functional_model(layers_config)
242 253
243 json_string = model.to_json() 254 json_string = model.to_json()
244 255
245 with open(outfile, 'w') as f: 256 with open(outfile, "w") as f:
246 json.dump(json.loads(json_string), f, indent=2) 257 json.dump(json.loads(json_string), f, indent=2)
247 258
248 259
249 def build_keras_model(inputs, outfile, model_json, infile_weights=None, 260 def build_keras_model(inputs, outfile, model_json, batch_mode=False):
250 batch_mode=False, outfile_params=None): 261 """for `keras_model_builder` tool
251 """ for `keras_model_builder` tool
252 262
253 Parameters 263 Parameters
254 ---------- 264 ----------
255 inputs : dict 265 inputs : dict
256 loaded galaxy tool parameters from `keras_model_builder` tool. 266 loaded galaxy tool parameters from `keras_model_builder` tool.
257 outfile : str 267 outfile : str
258 Path to galaxy dataset containing the keras_galaxy model output. 268 Path to galaxy dataset containing the keras_galaxy model output.
259 model_json : str 269 model_json : str
260 Path to dataset containing keras model JSON. 270 Path to dataset containing keras model JSON.
261 infile_weights : str or None
262 If string, path to dataset containing model weights.
263 batch_mode : bool, default=False 271 batch_mode : bool, default=False
264 Whether to build online batch classifier. 272 Whether to build online batch classifier.
265 outfile_params : str, default=None 273 """
266 File path to search parameters output. 274 with open(model_json, "r") as f:
267 """
268 with open(model_json, 'r') as f:
269 json_model = json.load(f) 275 json_model = json.load(f)
270 276
271 config = json_model['config'] 277 config = json_model["config"]
272 278
273 options = {} 279 options = {}
274 280
275 if json_model['class_name'] == 'Sequential': 281 if json_model["class_name"] == "Sequential":
276 options['model_type'] = 'sequential' 282 options["model_type"] = "sequential"
277 klass = Sequential 283 klass = Sequential
278 elif json_model['class_name'] == 'Model': 284 elif json_model["class_name"] == "Functional":
279 options['model_type'] = 'functional' 285 options["model_type"] = "functional"
280 klass = Model 286 klass = Model
281 else: 287 else:
282 raise ValueError("Unknow Keras model class: %s" 288 raise ValueError("Unknow Keras model class: %s" % json_model["class_name"])
283 % json_model['class_name'])
284 289
285 # load prefitted model 290 # load prefitted model
286 if inputs['mode_selection']['mode_type'] == 'prefitted': 291 if inputs["mode_selection"]["mode_type"] == "prefitted":
287 estimator = klass.from_config(config) 292 # estimator = klass.from_config(config)
288 estimator.load_weights(infile_weights) 293 # estimator.load_weights(infile_weights)
294 raise Exception("Prefitted was deprecated!")
289 # build train model 295 # build train model
290 else: 296 else:
291 cls_name = inputs['mode_selection']['learning_type'] 297 cls_name = inputs["mode_selection"]["learning_type"]
292 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) 298 klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name)
293 299
294 options['loss'] = (inputs['mode_selection'] 300 options["loss"] = inputs["mode_selection"]["compile_params"]["loss"]
295 ['compile_params']['loss']) 301 options["optimizer"] = (
296 options['optimizer'] =\ 302 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
297 (inputs['mode_selection']['compile_params'] 303 "optimizer_type"
298 ['optimizer_selection']['optimizer_type']).lower() 304 ]
299 305 ).lower()
300 options.update((inputs['mode_selection']['compile_params'] 306
301 ['optimizer_selection']['optimizer_options'])) 307 options.update(
302 308 (
303 train_metrics = (inputs['mode_selection']['compile_params'] 309 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
304 ['metrics']).split(',') 310 "optimizer_options"
305 if train_metrics[-1] == 'none': 311 ]
306 train_metrics = train_metrics[:-1] 312 )
307 options['metrics'] = train_metrics 313 )
308 314
309 options.update(inputs['mode_selection']['fit_params']) 315 train_metrics = inputs["mode_selection"]["compile_params"]["metrics"]
310 options['seed'] = inputs['mode_selection']['random_seed'] 316 if not isinstance(train_metrics, list): # for older galaxy
317 train_metrics = train_metrics.split(",")
318 if train_metrics[-1] == "none":
319 train_metrics.pop()
320 options["metrics"] = train_metrics
321
322 options.update(inputs["mode_selection"]["fit_params"])
323 options["seed"] = inputs["mode_selection"]["random_seed"]
311 324
312 if batch_mode: 325 if batch_mode:
313 generator = get_batch_generator(inputs['mode_selection'] 326 generator = get_batch_generator(
314 ['generator_selection']) 327 inputs["mode_selection"]["generator_selection"]
315 options['data_batch_generator'] = generator 328 )
316 options['prediction_steps'] = \ 329 options["data_batch_generator"] = generator
317 inputs['mode_selection']['prediction_steps'] 330 options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"]
318 options['class_positive_factor'] = \ 331 options["class_positive_factor"] = inputs["mode_selection"][
319 inputs['mode_selection']['class_positive_factor'] 332 "class_positive_factor"
333 ]
320 estimator = klass(config, **options) 334 estimator = klass(config, **options)
321 if outfile_params:
322 hyper_params = get_search_params(estimator)
323 # TODO: remove this after making `verbose` tunable
324 for h_param in hyper_params:
325 if h_param[1].endswith('verbose'):
326 h_param[0] = '@'
327 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value'])
328 df.to_csv(outfile_params, sep='\t', index=False)
329 335
330 print(repr(estimator)) 336 print(repr(estimator))
331 # save model by pickle 337 # save model
332 with open(outfile, 'wb') as f: 338 dump_model_to_h5(estimator, outfile, verbose=1)
333 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) 339
334 340
335 341 if __name__ == "__main__":
336 if __name__ == '__main__': 342 warnings.simplefilter("ignore")
337 warnings.simplefilter('ignore')
338 343
339 aparser = argparse.ArgumentParser() 344 aparser = argparse.ArgumentParser()
340 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) 345 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
341 aparser.add_argument("-m", "--model_json", dest="model_json") 346 aparser.add_argument("-m", "--model_json", dest="model_json")
342 aparser.add_argument("-t", "--tool_id", dest="tool_id") 347 aparser.add_argument("-t", "--tool_id", dest="tool_id")
343 aparser.add_argument("-w", "--infile_weights", dest="infile_weights")
344 aparser.add_argument("-o", "--outfile", dest="outfile") 348 aparser.add_argument("-o", "--outfile", dest="outfile")
345 aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
346 args = aparser.parse_args() 349 args = aparser.parse_args()
347 350
348 input_json_path = args.inputs 351 input_json_path = args.inputs
349 with open(input_json_path, 'r') as param_handler: 352 with open(input_json_path, "r") as param_handler:
350 inputs = json.load(param_handler) 353 inputs = json.load(param_handler)
351 354
352 tool_id = args.tool_id 355 tool_id = args.tool_id
353 outfile = args.outfile 356 outfile = args.outfile
354 outfile_params = args.outfile_params
355 model_json = args.model_json 357 model_json = args.model_json
356 infile_weights = args.infile_weights
357 358
358 # for keras_model_config tool 359 # for keras_model_config tool
359 if tool_id == 'keras_model_config': 360 if tool_id == "keras_model_config":
360 config_keras_model(inputs, outfile) 361 config_keras_model(inputs, outfile)
361 362
362 # for keras_model_builder tool 363 # for keras_model_builder tool
363 else: 364 else:
364 batch_mode = False 365 batch_mode = False
365 if tool_id == 'keras_batch_models': 366 if tool_id == "keras_batch_models":
366 batch_mode = True 367 batch_mode = True
367 368
368 build_keras_model(inputs=inputs, 369 build_keras_model(
369 model_json=model_json, 370 inputs=inputs, model_json=model_json, batch_mode=batch_mode, outfile=outfile
370 infile_weights=infile_weights, 371 )
371 batch_mode=batch_mode,
372 outfile=outfile,
373 outfile_params=outfile_params)