In the code below l2 surprisingly returns the same value as l1, but since the optimizer is being requested in the list before l2, I expected the loss to be the new loss after training. Can I not request multiple values at the same time from the graph and expect consistent output?
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=[None, 10])
y = tf.placeholder(tf.float32, shape=[None, 2])
weight = tf.Variable(tf.random_uniform((10, 2), dtype=tf.float32))
loss = tf.nn.sigmoid_cross_entropy_with_logits(tf.matmul(x, weight), y)
optimizer = tf.train.AdamOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.initialize_all_variables().run()
X = np.random.rand(1, 10)
Y = np.array([[0, 1]])
# Evaluate loss before running training step
l1 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l1) # 3.32393
# Running the training step
_, l2 = sess.run([optimizer, loss], feed_dict={x: X, y: Y})
print(l2[0][0]) # 3.32393 -- didn't change?
# Evaluate loss again after training step as sanity check
l3 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l3) # 2.71041
No - the order in which you request them in the list has no effect on the evaluation order. For side-effect-having operations such as the optimizer, if you want to guarantee a specific ordering, you need to enforce it using with_dependencies
or similar control-flow constructs. In general, ignoring side-effects, TensorFlow will return results to you by grabbing the node from the graph as soon as it's computed - and, obviously, the loss is computed before the optimizer, since the optimizer requires the loss as one of its input. (Remember that 'loss' is not a variable; it's a tensor; so it's not actually affected by the optimizer step.)
sess.run([loss, optimizer], ...)
and
sess.run([optimizer, loss], ...)
are equivalent.
As Dave points out, the order of arguments to Session.run()
has no effect on the order of evaluation, and the loss
tensor in your example does not have a dependency on the optimizer
op. To add a dependency, you could use tf.control_dependencies()
to add an explicit dependency on the optimizer running before fetching the loss:
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
_, l2 = sess.run([optimizer, loss_after_optimizer], feed_dict={x: X, y: Y})
I've tested logistic regression implemented in tensorflow with three ways of session.run:
all together
res1, res2, res3 = sess.run([op1, op2, op3])
separately
res1 = sess.run(op1)
res2 = sess.run(op2)
res3 = sess.run(op3)
with dependencies
with tf.control_dependencies([op1]):
op2_after = tf.identity(op1)
op3_after = tf.identity(op1)
res1,res2,res3 = session.run([op1, op2_after, op3_after])
set batch size as 10000, the result is:
1: 0.05+ secs < 2: 0.11+ secs < 3: 0.25+ secs
The main difference between 1 and 3 is only one mini-batch. It may not worth it to use 3 instead of 1.
Here is the test code (it is an LR example written by someone else...).
Here is the data
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 2 13:38:14 2017
@author: inse7en
"""
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import time
pickle_file = '/Users/inse7en/Downloads/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
# This is to expedite the process
train_subset = 10000
# This is a good beta value to start with
beta = 0.01
graph = tf.Graph()
with graph.as_default():
# Input data.
# They're all constants.
tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
tf_train_labels = tf.constant(train_labels[:train_subset])
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables
# They are variables we want to update and optimize.
weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros([num_labels]))
# Training computation.
logits = tf.matmul(tf_train_dataset, weights) + biases
# Original loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Loss function using L2 Regularization
regularizer = tf.nn.l2_loss(weights)
loss = tf.reduce_mean(loss + beta * regularizer)
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
num_steps = 50
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
with tf.Session(graph=graph) as session:
# This is a one-time operation which ensures the parameters get initialized as
# we described in the graph: random weights for the matrix, zeros for the
# biases.
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
# Run the computations. We tell .run() that we want to run the optimizer,
# and get the loss value and the training predictions returned as numpy
# arrays.
#_, l, predictions = session.run([optimizer, loss, train_prediction])
start_time = time.time()
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
predictions_after = tf.identity(train_prediction)
regularizers_after = tf.identity(regularizer)
_, l, predictions,regularizers = session.run([optimizer, loss_after_optimizer, predictions_after, regularizers_after])
print("--- with dependencies: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#opt = session.run(optimizer)
#l = session.run(loss)
#predictions = session.run(train_prediction)
#regularizers = session.run(regularizer)
#print("--- run separately: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#_, l, predictions,regularizers = session.run([optimizer, loss, train_prediction, regularizer])
#print("--- all together: %s seconds ---" % (time.time() - start_time))
#if (step % 100 == 0):
#print('Loss at step {}: {}'.format(step, l))
#print('Training accuracy: {:.1f}'.format(accuracy(predictions,
#train_labels[:train_subset, :])))
# Calling .eval() on valid_prediction is basically like calling run(), but
# just to get that one numpy array. Note that it recomputes all its graph
# dependencies.
# You don't have to do .eval above because we already ran the session for the
# train_prediction
#print('Validation accuracy: {:.1f}'.format(accuracy(valid_prediction.eval(),
#valid_labels)))
#print('Test accuracy: {:.1f}'.format(accuracy(test_prediction.eval(), test_labels)))
#print(regularizer)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With