Mercurial > repos > bgruening > create_tool_recommendation_model
comparison optimise_hyperparameters.py @ 5:9ec705bd11cb draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
| author | bgruening |
|---|---|
| date | Sun, 16 Oct 2022 11:51:32 +0000 |
| parents | f0da532be419 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:f0da532be419 | 5:9ec705bd11cb |
|---|---|
| 1 """ | |
| 2 Find the optimal combination of hyperparameters | |
| 3 """ | |
| 4 | |
| 5 import numpy as np | |
| 6 import utils | |
| 7 from hyperopt import fmin, hp, STATUS_OK, tpe, Trials | |
| 8 from tensorflow.keras.callbacks import EarlyStopping | |
| 9 from tensorflow.keras.layers import Dense, Dropout, Embedding, GRU, SpatialDropout1D | |
| 10 from tensorflow.keras.models import Sequential | |
| 11 from tensorflow.keras.optimizers import RMSprop | |
| 12 | |
| 13 | |
| 14 class HyperparameterOptimisation: | |
| 15 def __init__(self): | |
| 16 """ Init method. """ | |
| 17 | |
| 18 def train_model( | |
| 19 self, | |
| 20 config, | |
| 21 reverse_dictionary, | |
| 22 train_data, | |
| 23 train_labels, | |
| 24 test_data, | |
| 25 test_labels, | |
| 26 tool_tr_samples, | |
| 27 class_weights, | |
| 28 ): | |
| 29 """ | |
| 30 Train a model and report accuracy | |
| 31 """ | |
| 32 # convert items to integer | |
| 33 l_batch_size = list(map(int, config["batch_size"].split(","))) | |
| 34 l_embedding_size = list(map(int, config["embedding_size"].split(","))) | |
| 35 l_units = list(map(int, config["units"].split(","))) | |
| 36 | |
| 37 # convert items to float | |
| 38 l_learning_rate = list(map(float, config["learning_rate"].split(","))) | |
| 39 l_dropout = list(map(float, config["dropout"].split(","))) | |
| 40 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(","))) | |
| 41 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) | |
| 42 | |
| 43 optimize_n_epochs = int(config["optimize_n_epochs"]) | |
| 44 | |
| 45 # get dimensions | |
| 46 dimensions = len(reverse_dictionary) + 1 | |
| 47 best_model_params = dict() | |
| 48 early_stopping = EarlyStopping( | |
| 49 monitor="val_loss", | |
| 50 mode="min", | |
| 51 verbose=1, | |
| 52 min_delta=1e-1, | |
| 53 restore_best_weights=True, | |
| 54 ) | |
| 55 | |
| 56 # specify the search space for finding the best combination of parameters using Bayesian optimisation | |
| 57 params = { | |
| 58 "embedding_size": hp.quniform( | |
| 59 "embedding_size", l_embedding_size[0], l_embedding_size[1], 1 | |
| 60 ), | |
| 61 "units": hp.quniform("units", l_units[0], l_units[1], 1), | |
| 62 "batch_size": hp.quniform( | |
| 63 "batch_size", l_batch_size[0], l_batch_size[1], 1 | |
| 64 ), | |
| 65 "learning_rate": hp.loguniform( | |
| 66 "learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1]) | |
| 67 ), | |
| 68 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), | |
| 69 "spatial_dropout": hp.uniform( | |
| 70 "spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1] | |
| 71 ), | |
| 72 "recurrent_dropout": hp.uniform( | |
| 73 "recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1] | |
| 74 ), | |
| 75 } | |
| 76 | |
| 77 def create_model(params): | |
| 78 model = Sequential() | |
| 79 model.add( | |
| 80 Embedding(dimensions, int(params["embedding_size"]), mask_zero=True) | |
| 81 ) | |
| 82 model.add(SpatialDropout1D(params["spatial_dropout"])) | |
| 83 model.add( | |
| 84 GRU( | |
| 85 int(params["units"]), | |
| 86 dropout=params["dropout"], | |
| 87 recurrent_dropout=params["recurrent_dropout"], | |
| 88 return_sequences=True, | |
| 89 activation="elu", | |
| 90 ) | |
| 91 ) | |
| 92 model.add(Dropout(params["dropout"])) | |
| 93 model.add( | |
| 94 GRU( | |
| 95 int(params["units"]), | |
| 96 dropout=params["dropout"], | |
| 97 recurrent_dropout=params["recurrent_dropout"], | |
| 98 return_sequences=False, | |
| 99 activation="elu", | |
| 100 ) | |
| 101 ) | |
| 102 model.add(Dropout(params["dropout"])) | |
| 103 model.add(Dense(2 * dimensions, activation="sigmoid")) | |
| 104 optimizer_rms = RMSprop(lr=params["learning_rate"]) | |
| 105 batch_size = int(params["batch_size"]) | |
| 106 model.compile( | |
| 107 loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms | |
| 108 ) | |
| 109 print(model.summary()) | |
| 110 model_fit = model.fit( | |
| 111 utils.balanced_sample_generator( | |
| 112 train_data, | |
| 113 train_labels, | |
| 114 batch_size, | |
| 115 tool_tr_samples, | |
| 116 reverse_dictionary, | |
| 117 ), | |
| 118 steps_per_epoch=len(train_data) // batch_size, | |
| 119 epochs=optimize_n_epochs, | |
| 120 callbacks=[early_stopping], | |
| 121 validation_data=(test_data, test_labels), | |
| 122 verbose=2, | |
| 123 shuffle=True, | |
| 124 ) | |
| 125 return { | |
| 126 "loss": model_fit.history["val_loss"][-1], | |
| 127 "status": STATUS_OK, | |
| 128 "model": model, | |
| 129 } | |
| 130 | |
| 131 # minimize the objective function using the set of parameters above | |
| 132 trials = Trials() | |
| 133 learned_params = fmin( | |
| 134 create_model, | |
| 135 params, | |
| 136 trials=trials, | |
| 137 algo=tpe.suggest, | |
| 138 max_evals=int(config["max_evals"]), | |
| 139 ) | |
| 140 best_model = trials.results[np.argmin([r["loss"] for r in trials.results])][ | |
| 141 "model" | |
| 142 ] | |
| 143 # set the best params with respective values | |
| 144 for item in learned_params: | |
| 145 item_val = learned_params[item] | |
| 146 best_model_params[item] = item_val | |
| 147 return best_model_params, best_model |
