Logo Questions Linux Laravel Mysql Ubuntu Git Menu

how to load a tensorflow model and continue training

I want to load a pretrained model and continue training with this model.
Standard code snippet to save a model (pretrain.py):


# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

saver = tf.train.Saver()

with tf.Session() as sess:

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))

    print("Optimization Finished!")
    saver.save(sess, 'model')
    print("Model saved")

Now load the pretrained model and continue training with it (continue.py).

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

with tf.Session() as sess:
    saver = tf.train.import_meta_graph('model.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file

    graph = tf.get_default_graph()

    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))

But it shows following error:

tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value h1 [[Node: h1/read = IdentityT=DT_FLOAT, _class=["loc:@h1"], _device="/job:localhost/replica:0/task:0/cpu:0"]]

Here are my questions:
1. In many tensorflow's tutorial, it uses get_tensor_by_name() to load weights and biases. Here, I don't want to get weights and biases. I just want to load the model and continue training with it.
2. The error showed that tensor is uninitialized. However, I think saver.restore(sess, tf.train.latest_checkpoint('./')) should have loaded the weights and biases succesfully.
Here is multilayer_perceptron() if it helps to illustrate my questoins.

def multilayer_perceptron(x, n_input, n_classes, name):
    n_hidden_1 = 512
    n_hidden_2 = 256
    n_hidden_3 = 128
    # Store layers weight & bias
    weights = {
        'h1' : tf.get_variable(name[0], initializer=tf.random_normal([n_input, n_hidden_1])),
        'h2' : tf.get_variable(name[2], initializer=tf.random_normal([n_hidden_1, n_hidden_2])),
        'h3' : tf.get_variable(name[4], initializer=tf.random_normal([n_hidden_2, n_hidden_3])),
        'w_o': tf.get_variable(name[6], initializer=tf.random_normal([n_hidden_3, n_classes]))
    biases = {
        'b1' : tf.get_variable(name[1], initializer=tf.random_normal([n_hidden_1])),
        'b2' : tf.get_variable(name[3], initializer=tf.random_normal([n_hidden_2])),
        'b3' : tf.get_variable(name[5], initializer=tf.random_normal([n_hidden_3])),
        'b_o': tf.get_variable(name[7], initializer=tf.random_normal([n_classes]))

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x      , weights['h1']), biases['b1']))
    layer_1 = tf.layers.dropout(layer_1, rate=0.5, training=True)
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    layer_2 = tf.layers.dropout(layer_2, rate=0.3, training=True)
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
    layer_3 = tf.layers.dropout(layer_3, rate=0.1, training=True)
    out_layer = tf.matmul(layer_3, weights['w_o']) + biases['b_o']
    return out_layer
like image 467
Lion Lai Avatar asked Dec 08 '17 06:12

Lion Lai

People also ask

How do you continue training in TensorFlow?

To continue training a loaded model with checkpoints, we simply rerun the model. fit function with the callback still parsed. This however overwrites the currently saved best model, so make sure to change the checkpoint file path if this is undesired.

How do I save my model in training keras?

Callback to save the Keras model or model weights at some frequency. ModelCheckpoint callback is used in conjunction with training using model. fit() to save a model or weights (in a checkpoint file) at some interval, so the model or weights can be loaded later to continue the training from the state saved.

1 Answers

I think I found the answer. The key is that it doesn't need to call tf.train.import_meta_graph() if it has already uses saver.restore(sess, tf.train.latest_checkpoint('./')). Here is my code.

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

with tf.Session() as sess:
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file

    graph = tf.get_default_graph()

    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))
like image 113
Lion Lai Avatar answered Nov 03 '22 01:11

Lion Lai