I want to load a pretrained model and continue training with this model.
Standard code snippet to save a model (pretrain.py
):
tf.reset_default_graph()
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = next(train_generator)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))
print("Optimization Finished!")
saver.save(sess, 'model')
print("Model saved")
Now load the pretrained model and continue training with it (continue.py
).
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')
with tf.Session() as sess:
saver = tf.train.import_meta_graph('model.meta')
saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file
graph = tf.get_default_graph()
for epoch in range(training_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = next(train_generator)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))
But it shows following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value h1 [[Node: h1/read = IdentityT=DT_FLOAT, _class=["loc:@h1"], _device="/job:localhost/replica:0/task:0/cpu:0"]]
Here are my questions:
1. In many tensorflow's tutorial, it uses get_tensor_by_name()
to load weights and biases. Here, I don't want to get weights and biases. I just want to load the model and continue training with it.
2. The error showed that tensor is uninitialized. However, I think saver.restore(sess, tf.train.latest_checkpoint('./'))
should have loaded the weights and biases succesfully.
Here is multilayer_perceptron()
if it helps to illustrate my questoins.
def multilayer_perceptron(x, n_input, n_classes, name):
n_hidden_1 = 512
n_hidden_2 = 256
n_hidden_3 = 128
# Store layers weight & bias
weights = {
'h1' : tf.get_variable(name[0], initializer=tf.random_normal([n_input, n_hidden_1])),
'h2' : tf.get_variable(name[2], initializer=tf.random_normal([n_hidden_1, n_hidden_2])),
'h3' : tf.get_variable(name[4], initializer=tf.random_normal([n_hidden_2, n_hidden_3])),
'w_o': tf.get_variable(name[6], initializer=tf.random_normal([n_hidden_3, n_classes]))
}
biases = {
'b1' : tf.get_variable(name[1], initializer=tf.random_normal([n_hidden_1])),
'b2' : tf.get_variable(name[3], initializer=tf.random_normal([n_hidden_2])),
'b3' : tf.get_variable(name[5], initializer=tf.random_normal([n_hidden_3])),
'b_o': tf.get_variable(name[7], initializer=tf.random_normal([n_classes]))
}
layer_1 = tf.nn.relu(tf.add(tf.matmul(x , weights['h1']), biases['b1']))
layer_1 = tf.layers.dropout(layer_1, rate=0.5, training=True)
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
layer_2 = tf.layers.dropout(layer_2, rate=0.3, training=True)
layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
layer_3 = tf.layers.dropout(layer_3, rate=0.1, training=True)
out_layer = tf.matmul(layer_3, weights['w_o']) + biases['b_o']
return out_layer
To continue training a loaded model with checkpoints, we simply rerun the model. fit function with the callback still parsed. This however overwrites the currently saved best model, so make sure to change the checkpoint file path if this is undesired.
Callback to save the Keras model or model weights at some frequency. ModelCheckpoint callback is used in conjunction with training using model. fit() to save a model or weights (in a checkpoint file) at some interval, so the model or weights can be loaded later to continue the training from the state saved.
I think I found the answer. The key is that it doesn't need to call tf.train.import_meta_graph()
if it has already uses saver.restore(sess, tf.train.latest_checkpoint('./'))
. Here is my code.
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
mlp_layer_name = ['h1', 'b1', 'h2', 'b2', 'h3', 'b3', 'w_o', 'b_o']
logits = multilayer_perceptron(X, n_input, n_classes, mlp_layer_name)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name='loss_op')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, name='train_op')
with tf.Session() as sess:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('./')) # search for checkpoint file
graph = tf.get_default_graph()
for epoch in range(training_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = next(train_generator)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
print("Epoch: {:3d}, cost = {:.6f}".format(epoch+1, avg_cost))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With