How to save Scikit-Learn-Keras Model into a Persistence File (pickle/hd5/json/yaml)

Tags:

I have the following code, using Keras Scikit-Learn Wrapper:

from keras.models import Sequential from sklearn import datasets from keras.layers import Dense from sklearn.model_selection import train_test_split from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import cross_val_score from sklearn import preprocessing import pickle import numpy as np import json  def classifier(X, y):     """     Description of classifier     """     NOF_ROW, NOF_COL =  X.shape      def create_model():         # create model         model = Sequential()         model.add(Dense(12, input_dim=NOF_COL, init='uniform', activation='relu'))         model.add(Dense(6, init='uniform', activation='relu'))         model.add(Dense(1, init='uniform', activation='sigmoid'))         # Compile model         model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])         return model      # evaluate using 10-fold cross validation     seed = 7     np.random.seed(seed)     model = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10, verbose=0)     return model   def main():     """     Description of main     """      iris = datasets.load_iris()     X, y = iris.data, iris.target     X = preprocessing.scale(X)      X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)     model_tt = classifier(X_train, y_train)     model_tt.fit(X_train,y_train)      #--------------------------------------------------     # This fail     #--------------------------------------------------      filename = 'finalized_model.sav'     pickle.dump(model_tt, open(filename, 'wb'))     # load the model from disk     loaded_model = pickle.load(open(filename, 'rb'))     result = loaded_model.score(X_test, Y_test)     print(result)      #--------------------------------------------------     # This also fail     #--------------------------------------------------     # from keras.models import load_model            # model_tt.save('test_model.h5')       #--------------------------------------------------     # This works OK      #--------------------------------------------------      # print model_tt.score(X_test, y_test)     # print model_tt.predict_proba(X_test)     # print model_tt.predict(X_test)   # Output of predict_proba # 2nd column is the probability that the prediction is 1 # this value is used as final score, which can be used # with other method as comparison # [   [ 0.25311464  0.74688536] #     [ 0.84401423  0.15598579] #     [ 0.96047372  0.03952631] #     ..., #     [ 0.25518912  0.74481088] #     [ 0.91467732  0.08532269] #     [ 0.25473493  0.74526507]]  # Output of predict # [[1] # [0] # [0] # ..., # [1] # [0] # [1]]   if __name__ == '__main__':     main()

As stated in the code there it fails at this line:

pickle.dump(model_tt, open(filename, 'wb'))

With this error:

pickle.PicklingError: Can't pickle <function create_model at 0x101c09320>: it's not found as __main__.create_model

How can I get around it?

826

asked Nov 03 '16 07:11

neversaint

2 Answers

Edit 1 : Original answer about saving model

With HDF5 :

# saving model json_model = model_tt.model.to_json() open('model_architecture.json', 'w').write(json_model) # saving weights model_tt.model.save_weights('model_weights.h5', overwrite=True)   # loading model from keras.models import model_from_json  model = model_from_json(open('model_architecture.json').read()) model.load_weights('model_weights.h5')  # dont forget to compile your model model.compile(loss='binary_crossentropy', optimizer='adam')

Edit 2 : full code example with iris dataset

# Train model and make predictions import numpy import pandas from keras.models import Sequential, model_from_json from keras.layers import Dense from keras.utils import np_utils from sklearn import datasets from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder  # fix random seed for reproducibility seed = 7 numpy.random.seed(seed)  # load dataset iris = datasets.load_iris() X, Y, labels = iris.data, iris.target, iris.target_names X = preprocessing.scale(X)  # encode class values as integers encoder = LabelEncoder() encoder.fit(Y) encoded_Y = encoder.transform(Y)  # convert integers to dummy variables (i.e. one hot encoded) y = np_utils.to_categorical(encoded_Y)  def build_model():     # create model     model = Sequential()     model.add(Dense(4, input_dim=4, init='normal', activation='relu'))     model.add(Dense(3, init='normal', activation='sigmoid'))     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])     return model  def save_model(model):     # saving model     json_model = model.to_json()     open('model_architecture.json', 'w').write(json_model)     # saving weights     model.save_weights('model_weights.h5', overwrite=True)  def load_model():     # loading model     model = model_from_json(open('model_architecture.json').read())     model.load_weights('model_weights.h5')     model.compile(loss='categorical_crossentropy', optimizer='adam')     return model   X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=seed)  # build model = build_model() model.fit(X_train, Y_train, nb_epoch=200, batch_size=5, verbose=0)  # save save_model(model)  # load model = load_model()  # predictions predictions = model.predict_classes(X_test, verbose=0) print(predictions) # reverse encoding for pred in predictions:     print(labels[pred])

Please note that I used Keras only, not the wrapper. It only add some complexity in something simple. Also code is volontary not factored so you can have the whole picture.

Also, you said you want to output 1 or 0. It is not possible in this dataset because you have 3 output dims and classes (Iris-setosa, Iris-versicolor, Iris-virginica). If you had only 2 classes then your output dim and classes would be 0 or 1 using sigmoid output fonction.

answered Sep 22 '22 19:09

Gaarv

Just adding to gaarv's answer - If you don't require the separation between the model structure (model.to_json()) and the weights (model.save_weights()), you can use one of the following:

Use the built-in keras.models.save_model and 'keras.models.load_model` that store everything together in a hdf5 file.
Use pickle to serialize the Model object (or any class that contains references to it) into file/network/whatever..
Unfortunetaly, Keras doesn't support pickle by default. You can use my patchy solution that adds this missing feature. Working code is here: http://zachmoshe.com/2017/04/03/pickling-keras-models.html

answered Sep 22 '22 19:09

Zach Moshe

Related questions
                            
                                How to redirect to external URL in Django?
                            
                                How to enable intellisense for python in Visual Studio Code with anaconda3?
                            
                                Running Flask dev server in Python 3.6 raises ImportError for SocketServer and ForkingMixIn
                            
                                Is boto3 client thread-safe
                            
                                Label Smoothing in PyTorch
                            
                                Py2exe lxml woes
                            
                                How to iterate over columns of a matrix?
                            
                                Importing modules in Python and __init__.py
                            
                                Using additional command line arguments with gunicorn
                            
                                scipy.special import issue
                            
                                Spaces in Python Dictionary Keys
                            
                                Fill in missing pandas data with previous non-missing value, grouped by key
                            
                                Preventing PyQt to silence exceptions occurring in slots
                            
                                Rounding to significant figures in numpy
                            
                                How to access a sharepoint site via the REST API in Python?
                            
                                python: Is there a downside to using faulthandler?
                            
                                Sorting the order of bars in pandas/matplotlib bar plots
                            
                                AttributeError: '_io.TextIOWrapper' object has no attribute 'next' python
                            
                                Sort by column within multi index level in pandas
                            
                                using pandas.read_csv to read certain columns

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

How to save Scikit-Learn-Keras Model into a Persistence File (pickle/hd5/json/yaml)

Tags:

python

pickle

keras

persistence

scikit-learn

neversaint

People also ask

2 Answers

Gaarv

Zach Moshe

Recent Activity

Donate For Us