Mercurial > repos > bgruening > create_tool_recommendation_model
comparison optimise_hyperparameters.py @ 2:50753817983a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
| author | bgruening |
|---|---|
| date | Sat, 09 May 2020 09:38:04 +0000 |
| parents | 22ebbac136c7 |
| children | 98bc44d17561 |
comparison
equal
deleted
inserted
replaced
| 1:275e98795e99 | 2:50753817983a |
|---|---|
| 15 import utils | 15 import utils |
| 16 | 16 |
| 17 | 17 |
| 18 class HyperparameterOptimisation: | 18 class HyperparameterOptimisation: |
| 19 | 19 |
| 20 @classmethod | |
| 21 def __init__(self): | 20 def __init__(self): |
| 22 """ Init method. """ | 21 """ Init method. """ |
| 23 | 22 |
| 24 @classmethod | 23 def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, l_tool_tr_samples, class_weights): |
| 25 def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, class_weights): | |
| 26 """ | 24 """ |
| 27 Train a model and report accuracy | 25 Train a model and report accuracy |
| 28 """ | 26 """ |
| 29 l_recurrent_activations = config["activation_recurrent"].split(",") | |
| 30 l_output_activations = config["activation_output"].split(",") | |
| 31 | |
| 32 # convert items to integer | 27 # convert items to integer |
| 33 l_batch_size = list(map(int, config["batch_size"].split(","))) | 28 l_batch_size = list(map(int, config["batch_size"].split(","))) |
| 34 l_embedding_size = list(map(int, config["embedding_size"].split(","))) | 29 l_embedding_size = list(map(int, config["embedding_size"].split(","))) |
| 35 l_units = list(map(int, config["units"].split(","))) | 30 l_units = list(map(int, config["units"].split(","))) |
| 36 | 31 |
| 39 l_dropout = list(map(float, config["dropout"].split(","))) | 34 l_dropout = list(map(float, config["dropout"].split(","))) |
| 40 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(","))) | 35 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(","))) |
| 41 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) | 36 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) |
| 42 | 37 |
| 43 optimize_n_epochs = int(config["optimize_n_epochs"]) | 38 optimize_n_epochs = int(config["optimize_n_epochs"]) |
| 44 validation_split = float(config["validation_share"]) | |
| 45 | 39 |
| 46 # get dimensions | 40 # get dimensions |
| 47 dimensions = len(reverse_dictionary) + 1 | 41 dimensions = len(reverse_dictionary) + 1 |
| 48 best_model_params = dict() | 42 best_model_params = dict() |
| 49 early_stopping = EarlyStopping(monitor='val_loss', mode='min', min_delta=1e-4, verbose=1, patience=1) | 43 early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-1, restore_best_weights=True) |
| 50 | 44 |
| 51 # specify the search space for finding the best combination of parameters using Bayesian optimisation | 45 # specify the search space for finding the best combination of parameters using Bayesian optimisation |
| 52 params = { | 46 params = { |
| 53 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), | 47 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), |
| 54 "units": hp.quniform("units", l_units[0], l_units[1], 1), | 48 "units": hp.quniform("units", l_units[0], l_units[1], 1), |
| 55 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), | 49 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), |
| 56 "activation_recurrent": hp.choice("activation_recurrent", l_recurrent_activations), | |
| 57 "activation_output": hp.choice("activation_output", l_output_activations), | |
| 58 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), | 50 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), |
| 59 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), | 51 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), |
| 60 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), | 52 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), |
| 61 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1]) | 53 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1]) |
| 62 } | 54 } |
| 63 | 55 |
| 64 def create_model(params): | 56 def create_model(params): |
| 65 model = Sequential() | 57 model = Sequential() |
| 66 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) | 58 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) |
| 67 model.add(SpatialDropout1D(params["spatial_dropout"])) | 59 model.add(SpatialDropout1D(params["spatial_dropout"])) |
| 68 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation=params["activation_recurrent"])) | 60 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation="elu")) |
| 69 model.add(Dropout(params["dropout"])) | 61 model.add(Dropout(params["dropout"])) |
| 70 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation=params["activation_recurrent"])) | 62 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation="elu")) |
| 71 model.add(Dropout(params["dropout"])) | 63 model.add(Dropout(params["dropout"])) |
| 72 model.add(Dense(dimensions, activation=params["activation_output"])) | 64 model.add(Dense(2 * dimensions, activation="sigmoid")) |
| 73 optimizer_rms = RMSprop(lr=params["learning_rate"]) | 65 optimizer_rms = RMSprop(lr=params["learning_rate"]) |
| 66 batch_size = int(params["batch_size"]) | |
| 74 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) | 67 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) |
| 75 model_fit = model.fit( | 68 print(model.summary()) |
| 76 train_data, | 69 model_fit = model.fit_generator( |
| 77 train_labels, | 70 utils.balanced_sample_generator( |
| 78 batch_size=int(params["batch_size"]), | 71 train_data, |
| 72 train_labels, | |
| 73 batch_size, | |
| 74 l_tool_tr_samples | |
| 75 ), | |
| 76 steps_per_epoch=len(train_data) // batch_size, | |
| 79 epochs=optimize_n_epochs, | 77 epochs=optimize_n_epochs, |
| 80 shuffle="batch", | 78 callbacks=[early_stopping], |
| 79 validation_data=(test_data, test_labels), | |
| 81 verbose=2, | 80 verbose=2, |
| 82 validation_split=validation_split, | 81 shuffle=True |
| 83 callbacks=[early_stopping] | |
| 84 ) | 82 ) |
| 85 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK} | 83 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model} |
| 86 # minimize the objective function using the set of parameters above4 | 84 # minimize the objective function using the set of parameters above |
| 87 trials = Trials() | 85 trials = Trials() |
| 88 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) | 86 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) |
| 89 print(learned_params) | 87 best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model'] |
| 90 # set the best params with respective values | 88 # set the best params with respective values |
| 91 for item in learned_params: | 89 for item in learned_params: |
| 92 item_val = learned_params[item] | 90 item_val = learned_params[item] |
| 93 if item == 'activation_output': | 91 best_model_params[item] = item_val |
| 94 best_model_params[item] = l_output_activations[item_val] | 92 return best_model_params, best_model |
| 95 elif item == 'activation_recurrent': | |
| 96 best_model_params[item] = l_recurrent_activations[item_val] | |
| 97 else: | |
| 98 best_model_params[item] = item_val | |
| 99 return best_model_params |
