I'm trying to implement RBM with tensorflow, here is the code:
rbm.py
""" An rbm implementation for TensorFlow, based closely on the one in Theano """
import tensorflow as tf
import math
def sample_prob(probs):
return tf.nn.relu(
tf.sign(
probs - tf.random_uniform(probs.get_shape())))
class RBM(object):
def __init__(self, name, input_size, output_size):
with tf.name_scope("rbm_" + name):
self.weights = tf.Variable(
tf.truncated_normal([input_size, output_size],
stddev=1.0 / math.sqrt(float(input_size))), name="weights")
self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
def propup(self, visible):
return tf.nn.sigmoid(tf.matmul(visible, self.weights) + self.h_bias)
def propdown(self, hidden):
return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias)
def sample_h_given_v(self, v_sample):
return sample_prob(self.propup(v_sample))
def sample_v_given_h(self, h_sample):
return sample_prob(self.propdown(h_sample))
def gibbs_hvh(self, h0_sample):
v_sample = self.sample_v_given_h(h0_sample)
h_sample = self.sample_h_given_v(v_sample)
return [v_sample, h_sample]
def gibbs_vhv(self, v0_sample):
h_sample = self.sample_h_given_v(v0_sample)
v_sample = self.sample_v_given_h(h_sample)
return [h_sample, v_sample]
def cd1(self, visibles, learning_rate=0.1):
h_start = self.propup(visibles)
v_end = self.propdown(h_start)
h_end = self.propup(v_end)
w_positive_grad = tf.matmul(tf.transpose(visibles), h_start)
w_negative_grad = tf.matmul(tf.transpose(v_end), h_end)
update_w = self.weights.assign_add(learning_rate * (w_positive_grad - w_negative_grad))
update_vb = self.v_bias.assign_add(learning_rate * tf.reduce_mean(visibles - v_end, 0))
update_hb = self.h_bias.assign_add(learning_rate * tf.reduce_mean(h_start - h_end, 0))
return [update_w, update_vb, update_hb]
def reconstruction_error(self, dataset):
err = tf.stop_gradient(dataset - self.gibbs_vhv(dataset)[1])
return tf.reduce_sum(err * err)
rbm_MNIST_test.py
import tensorflow as tf
import numpy as np
import rbm
import input_data
def build_model(X, w1, b1, wo, bo):
h1 = tf.nn.sigmoid(tf.matmul(X, w1)+b1)
model = tf.nn.sigmoid(tf.matmul(h1, wo)+bo)
return model
def init_weight(shape):
return tf.Variable(tf.random_normal(shape, mean=0.0, stddev=0.01))
def init_bias(dim):
return tf.Variable(tf.zeros([dim]))
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])
rbm_layer = rbm.RBM("mnist", 784, 500)
for i in range(10):
print "RBM CD: ", i
rbm_layer.cd1(trX)
rbm_w, rbm_vb, rbm_hb = rbm_layer.cd1(trX)
wo = init_weight([500,10])
bo = init_bias(10)
py_x = build_model(X, rbm_w, rbm_hb, wo, bo)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(py_x, 1)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
for i in range(10):
for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
print i, np.mean(np.argmax(teY, axis=1) ==
sess.run(predict_op, feed_dict={X: teX, Y: teY}))
but here comes the error:
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1626, in as_graph_def raise ValueError("GraphDef cannot be larger than 2GB.") ValueError: GraphDef cannot be larger than 2GB.
Can someone help me to solve this?
TensorFlow does have a limit of 2GB on the GraphDef
protos, which stems from a limitation of the protocol buffers implementation. You can quickly reach that limit if you have large constant tensors in your graph. In particular, if you use the same numpy array multiple times, TensorFlow will add multiple constant tensors to your graph.
In your case, mnist.train.images
returned by input_data.read_data_sets
is a numpy floating point array with shape (55000, 784)
, so it is about 164 MB
. You pass that numpy array to rbm_layer.cd1
, and inside that function, every time you use visibles
, a TensorFlow Const
node gets created from the numpy array. You use visibiles
in 3 locations, so every call to cd1
is increasing the graph size by approximately 492 MB
, so you easily exceed the limit. The solution is to create a TensorFlow constant once and pass that constant to the cd1
function like so :
trX_constant = tf.constant(trX)
for i in range(10):
print "RBM CD: ", i
rbm_layer.cd1(trX_constant)
BTW, I am not sure what your intention is in the above loop. Note that the cd1
function simply adds the assign_add
nodes to the graph, and does NOT actually perform the assigns. If you really want those assigns to happen while you train, you should consider chaining those assigns via control dependencies to your final train_op
node.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With