how to load a tensorflow model and continue training

Tags:

tensorflow

I want to load a pretrained model and continue training with this model.
Standard code snippet to save a model (pretrain.py):

tf.reset_default_graph()

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))

    print("Optimization Finished!")
    saver.save(sess, 'model')
    print("Model saved")

Now load the pretrained model and continue training with it (continue.py).

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

with tf.Session() as sess:
    saver = tf.train.import_meta_graph('model.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file

    graph = tf.get_default_graph()

    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))

But it shows following error:

tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value h1 [[Node: h1/read = IdentityT=DT_FLOAT, _class=["loc:@h1"], _device="/job:localhost/replica:0/task:0/cpu:0"]]

Here are my questions:
1. In many tensorflow's tutorial, it uses get_tensor_by_name() to load weights and biases. Here, I don't want to get weights and biases. I just want to load the model and continue training with it.
2. The error showed that tensor is uninitialized. However, I think saver.restore(sess, tf.train.latest_checkpoint('./')) should have loaded the weights and biases succesfully.
Here is multilayer_perceptron() if it helps to illustrate my questoins.

def multilayer_perceptron(x, n_input, n_classes, name):
    n_hidden_1 = 512
    n_hidden_2 = 256
    n_hidden_3 = 128
    # Store layers weight & bias
    weights = {
        'h1' : tf.get_variable(name[0], initializer=tf.random_normal([n_input, n_hidden_1])),
        'h2' : tf.get_variable(name[2], initializer=tf.random_normal([n_hidden_1, n_hidden_2])),
        'h3' : tf.get_variable(name[4], initializer=tf.random_normal([n_hidden_2, n_hidden_3])),
        'w_o': tf.get_variable(name[6], initializer=tf.random_normal([n_hidden_3, n_classes]))
    }
    biases = {
        'b1' : tf.get_variable(name[1], initializer=tf.random_normal([n_hidden_1])),
        'b2' : tf.get_variable(name[3], initializer=tf.random_normal([n_hidden_2])),
        'b3' : tf.get_variable(name[5], initializer=tf.random_normal([n_hidden_3])),
        'b_o': tf.get_variable(name[7], initializer=tf.random_normal([n_classes]))
    }

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x      , weights['h1']), biases['b1']))
    layer_1 = tf.layers.dropout(layer_1, rate=0.5, training=True)
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    layer_2 = tf.layers.dropout(layer_2, rate=0.3, training=True)
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
    layer_3 = tf.layers.dropout(layer_3, rate=0.1, training=True)
    out_layer = tf.matmul(layer_3, weights['w_o']) + biases['b_o']
    return out_layer

467

asked Dec 08 '17 06:12

Lion Lai

1 Answers

I think I found the answer. The key is that it doesn't need to call tf.train.import_meta_graph() if it has already uses saver.restore(sess, tf.train.latest_checkpoint('./')). Here is my code.

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')

with tf.Session() as sess:
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file

    graph = tf.get_default_graph()

    for epoch in range(training_epochs):
        avg_cost = 0.

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = next(train_generator)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch

        print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))

113

answered Nov 03 '22 01:11

Lion Lai

Related questions
                            
                                Python Firebase issue No module named firebase_admin
                            
                                How can I JSON serialize an object from google's natural language API? (No __dict__ attribute)
                            
                                Django - missing 1 required positional argument: 'request'
                            
                                How can you implement a C callable from Numba for efficient integration with nquad?
                            
                                Numba: calling jit with explicit signature using arguments with default values
                            
                                Maximum value of timestamp
                            
                                How to tag corrupted data in dataframe after an error has been raised
                            
                                Piecewise linear fit with n breakpoints
                            
                                How can I restore Tensors to a past value, without saving the value to disk?
                            
                                Seaborn heatmap throws "isnan" type of error after update
                            
                                Using inception v4 in retrain example
                            
                                Django sqlite3 timeout has no effect
                            
                                Pydrive google drive automate authentication
                            
                                Python annotations: difference between Tuple and ()
                            
                                extracting Bottleneck features using pretrained Inceptionv3 - differences between Keras' implementation and Native Tensorflow implementation
                            
                                Get the attributes of the selected item in a GeoJSONDataSource
                            
                                Expected tensorflow model size from learned variables
                            
                                How to feed into LSTM with 4 dimensional input?
                            
                                Asyncio exception handler: not getting called until event loop thread stopped
                            
                                How to send a django signal from other signal

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

how to load a tensorflow model and continue training

Tags:

python

tensorflow

Lion Lai

People also ask

1 Answers

Lion Lai

Recent Activity

Donate For Us