I want to build a non linear regression model using keras to predict a +ve continuous variable. For the below model how do I select the following hyperparameters?
Code
def dnn_reg():
model = Sequential()
#layer 1
model.add(Dense(40, input_dim=13, kernel_initializer='normal'))
model.add(Activation('tanh'))
model.add(Dropout(0.2))
#layer 2
model.add(Dense(30, kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))
#layer 3
model.add(Dense(5, kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(1, kernel_initializer='normal'))
model.add(Activation('relu'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
I have considered random gridsearch but instead want to use hyperopt which I believe will be faster. I initially implemented the tuning using https://github.com/maxpumperla/hyperas. Hyperas is not working with latest version of keras. I suspect that keras is evolving fast and it's difficult for the maintainer to make it compatible. So I think using hyperopt directly will be a better option.
PS: I am new to bayesian optimization for hyper parameter tuning and hyperopt.
This can be also another approach:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys
X = []
y = []
X_val = []
y_val = []
space = {'choice': hp.choice('num_layers',
[ {'layers':'two', },
{'layers':'three',
'units3': hp.uniform('units3', 64,1024),
'dropout3': hp.uniform('dropout3', .25,.75)}
]),
'units1': hp.uniform('units1', 64,1024),
'units2': hp.uniform('units2', 64,1024),
'dropout1': hp.uniform('dropout1', .25,.75),
'dropout2': hp.uniform('dropout2', .25,.75),
'batch_size' : hp.uniform('batch_size', 28,128),
'nb_epochs' : 100,
'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']),
'activation': 'relu'
}
def f_nn(params):
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import Adadelta, Adam, rmsprop
print ('Params testing: ', params)
model = Sequential()
model.add(Dense(output_dim=params['units1'], input_dim = X.shape[1]))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout1']))
model.add(Dense(output_dim=params['units2'], init = "glorot_uniform"))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout2']))
if params['choice']['layers']== 'three':
model.add(Dense(output_dim=params['choice']['units3'], init = "glorot_uniform"))
model.add(Activation(params['activation']))
model.add(Dropout(params['choice']['dropout3']))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=params['optimizer'])
model.fit(X, y, nb_epoch=params['nb_epochs'], batch_size=params['batch_size'], verbose = 0)
pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0)
acc = roc_auc_score(y_val, pred_auc)
print('AUC:', acc)
sys.stdout.flush()
return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials)
print('best: ', best)
Source
I've had a lot of success with Hyperas. The following are the things I've learned to make it work.
1) Run it as a python script from the terminal (not from an Ipython notebook) 2) Make sure that you do not have any comments in your code (Hyperas doesn't like comments!) 3) Encapsulate your data and model in a function as described in the hyperas readme.
Below is an example of a Hyperas script that worked for me (following the instructions above).
from __future__ import print_function
from hyperopt import Trials, STATUS_OK, tpe
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
import numpy as np
from hyperas import optim
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD , Adam
import tensorflow as tf
from hyperas.distributions import choice, uniform, conditional
__author__ = 'JOnathan Hilgart'
def data():
"""
Data providing function:
This function is separated from model() so that hyperopt
won't reload data for each evaluation run.
"""
import numpy as np
x = np.load('training_x.npy')
y = np.load('training_y.npy')
x_train = x[:15000,:]
y_train = y[:15000,:]
x_test = x[15000:,:]
y_test = y[15000:,:]
return x_train, y_train, x_test, y_test
def model(x_train, y_train, x_test, y_test):
"""
Model providing function:
Create Keras model with double curly brackets dropped-in as needed.
Return value has to be a valid python dictionary with two customary keys:
- loss: Specify a numeric evaluation metric to be minimized
- status: Just use STATUS_OK and see hyperopt documentation if not feasible
The last one is optional, though recommended, namely:
- model: specify the model just created so that we can later use it again.
"""
model_mlp = Sequential()
model_mlp.add(Dense({{choice([32, 64,126, 256, 512, 1024])}},
activation='relu', input_shape= (2,)))
model_mlp.add(Dropout({{uniform(0, .5)}}))
model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
model_mlp.add(Dropout({{uniform(0, .5)}}))
model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
model_mlp.add(Dropout({{uniform(0, .5)}}))
model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
model_mlp.add(Dropout({{uniform(0, .5)}}))
model_mlp.add(Dense(9))
model_mlp.add(Activation({{choice(['softmax','linear'])}}))
model_mlp.compile(loss={{choice(['categorical_crossentropy','mse'])}}, metrics=['accuracy'],
optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})
model_mlp.fit(x_train, y_train,
batch_size={{choice([16, 32, 64, 128])}},
epochs=50,
verbose=2,
validation_data=(x_test, y_test))
score, acc = model_mlp.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:', acc)
return {'loss': -acc, 'status': STATUS_OK, 'model': model_mlp}
enter code here
if __name__ == '__main__':
import gc; gc.collect()
with K.get_session(): ## TF session
best_run, best_model = optim.minimize(model=model,
data=data,
algo=tpe.suggest,
max_evals=2,
trials=Trials())
X_train, Y_train, X_test, Y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)
it induced by different gc sequence, if python collect session first , the program will exit successfully, if python collect swig memory(tf_session) first, the program exit with failure.
you can force python to del session by:
del session
or if you are using keras, you cant get the session instance, you can run following code at end of your code:
import gc; gc.collect()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With