I am trying to create a multi-label classifier using TensorFlow. Though I'm having trouble adding and connecting hidden layers.
I was following this tutorial: http://jrmeyer.github.io/tutorial/2016/02/01/TensorFlow-Tutorial.html
The data I'm using is UCI's Iris data, encoded to one-hot:
Training X [105,4]
5,3.2,1.2,0.2
5.5,3.5,1.3,0.2
4.9,3.1,1.5,0.1
4.4,3,1.3,0.2
5.1,3.4,1.5,0.2
.
.
.
Training Y [105,3]
0,0,1
0,0,1
0,0,1
0,0,1
0,0,1
0,0,1
.
.
.
I'm also using testing data X and Y which are [45,4] and [45,3] respectively.
Here is my python code:
import tensorflow as tf
import numpy as np
import tarfile
import os
import matplotlib.pyplot as plt
import time
## Import data
def csv_to_numpy_array(filePath, delimiter):
return np.genfromtxt(filePath, delimiter=delimiter, dtype=None)
trainX = csv_to_numpy_array("Iris_training_x.csv", delimiter=",").astype(np.float32)
trainY = csv_to_numpy_array("Iris_training_y.csv", delimiter=",").astype(np.float32)
testX = csv_to_numpy_array("Iris_testing_x.csv", delimiter=",").astype(np.float32)
testY = csv_to_numpy_array("Iris_testing_y.csv", delimiter=",").astype(np.float32)
# Data Set Paramaters
numFeatures = trainX.shape[1]
numLabels = trainY.shape[1]
# Training Session Parameters
numEpochs = 1000
learningRate = tf.train.exponential_decay(learning_rate=0.008,
global_step= 1,
decay_steps=trainX.shape[0],
decay_rate= 0.95,
staircase=True)
# Placeholders
X=tf.placeholder(tf.float32, [None, numFeatures])
y=tf.placeholder(tf.float32, [None, numLabels])
# Initialize our weights and biases
Weights = tf.Variable(tf.random_normal([numFeatures, numLabels],
mean=0,
stddev=(np.sqrt(6 / numFeatures + numLabels + 1)),
name="Weights"))
bias = tf.Variable(tf.random_normal([1, numLabels],
mean=0,
stddev=(np.sqrt(6 / numFeatures + numLabels + 1)),
name="bias"))
# Prediction algorithm (feedforward)
apply_weights_OP = tf.matmul(X, Weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
numFeatures = activation_OP
apply_weights_OP = tf.matmul(X, Weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
init_OP = tf.initialize_all_variables()
# Cost function (Mean Squeared Error)
cost_OP = tf.nn.l2_loss(activation_OP-y, name="squared_error_cost")
# Optimization Algorithm (Gradient Descent)
training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)
# Visualize
epoch_values=[]
accuracy_values=[]
cost_values=[]
# Turn on interactive plotting
plt.ion()
# Create the main, super plot
fig = plt.figure()
# Create two subplots on their own axes and give titles
ax1 = plt.subplot("211")
ax1.set_title("TRAINING ACCURACY", fontsize=18)
ax2 = plt.subplot("212")
ax2.set_title("TRAINING COST", fontsize=18)
plt.tight_layout()
# Create a tensorflow session
sess = tf.Session()
# Initialize all tensorflow variables
sess.run(init_OP)
## Ops for vizualization
# argmax(activation_OP, 1) gives the label our model thought was most likely
# argmax(y, 1) is the correct label
correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(y,1))
# False is 0 and True is 1, what was our average?
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))
# Summary op for regression output
activation_summary_OP = tf.histogram_summary("output", activation_OP)
# Summary op for accuracy
accuracy_summary_OP = tf.scalar_summary("accuracy", accuracy_OP)
# Summary op for cost
cost_summary_OP = tf.scalar_summary("cost", cost_OP)
# Summary ops to check how variables (W, b) are updating after each iteration
weightSummary = tf.histogram_summary("Weights", Weights.eval(session=sess))
biasSummary = tf.histogram_summary("biases", bias.eval(session=sess))
# Merge all summaries
all_summary_OPS = tf.merge_all_summaries()
# Summary writer
writer = tf.train.SummaryWriter("summary_logs", sess.graph_def)
# Initialize reporting variables
cost = 0
diff = 1
# Training epochs
for i in range(numEpochs):
if i > 1 and diff < .0001:
print("change in cost %g; convergence."%diff)
break
else:
# Run training step
step = sess.run(training_OP, feed_dict={X: trainX, y: trainY})
# Report occasional stats
if i % 10 == 0:
#Add epoch to epoch_values
epoch_values.append(i)
#Generate accuracy stats on test data
summary_results, train_accuracy, newCost = sess.run(
[all_summary_OPS, accuracy_OP, cost_OP],
feed_dict={X: trainX, y: trainY}
)
# Add accuracy to live graphing variable
accuracy_values.append(train_accuracy)
# Add cost to live graphing variable
cost_values.append(newCost)
#Write summary stats to writer
#writer.add_summary(summary_results, i)
# Re-assign values for variables
diff = abs(newCost - cost)
cost = newCost
#generate print statements
print("step %d, training accuracy %g"%(i, train_accuracy))
print("step %d, cost %g"%(i, newCost))
print("step %d, change in cost %g"%(i, diff))
# Plot progress to our two subplots
accuracyLine, = ax1.plot(epoch_values, accuracy_values)
costLine, = ax2.plot(epoch_values, cost_values)
fig.canvas.draw()
#time.sleep(1)
# How well do we perform on held-out test data?
print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, feed_dict={X: testX, y: testY})))
# Create Saver
saver = tf.train.Saver()
# Save variables to .ckpt file
# saver.save(sess, "trained_variables.ckpt")
# Close tensorflow session
sess.close()
The problem is here:
# Prediction algorithm (feedforward)
apply_weights_OP = tf.matmul(X, Weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
numFeatures = activation_OP
apply_weights_OP = tf.matmul(activation_OP, Weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
My understanding is that the output of one layer should connect to the input of the next one. I just don't know how to modify either the output or input of the layers; it keeps giving me this compatibility error:
/usr/bin/python3.5 /home/marco/PycharmProjects/NN_Iris/main
Traceback (most recent call last):
File "/home/marco/PycharmProjects/NN_Iris/main", line 132, in <module>
apply_weights_OP = tf.matmul(activation_OP, Weights, name="apply_weights")
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/math_ops.py", line 1346, in matmul
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1271, in _mat_mul
transpose_b=transpose_b, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/common_shapes.py", line 94, in matmul_shape
inner_a.assert_is_compatible_with(inner_b)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_shape.py", line 108, in assert_is_compatible_with
% (self, other))
ValueError: Dimensions 3 and 4 are not compatible
Process finished with exit code 1
Any suggestions on how to properly connect the two hidden layers? Thanks.
In artificial neural networks, hidden layers are required if and only if the data must be separated non-linearly. Looking at figure 2, it seems that the classes must be non-linearly separated. A single line will not work. As a result, we must use hidden layers in order to get the best decision boundary.
There are three built-in RNN layers in Keras: keras. layers.
If you want a fully connected network with one hidden layer and an output layer there is how their shapes should look like:
# hidden layer
weights_hidden = tf.Variable(tf.random_normal([numFeatures, num_nodes])
bias_hidden = tf.Variable(tf.random_normal([num_nodes])
preactivations_hidden = tf.add(tf.matmul(X, weights_hidden), bias_hidden)
activations_hidden = tf.nn.sigmoid(preactivations_hidden)
# output layer
weights_output = tf.Variable(tf.random_normal([num_nodes, numLabels])
bias_output = tf.Variable(tf.random_normal([numLabels])
preactivations_output = tf.add(tf.matmul(activations_hidden, weights_output), bias_output)
Where num_nodes
is number of nodes in the hidden layer that you select yourself. X
is a [105, numFeatures]
matrix, weights_hidden
is [numFeatures, num_nodes]
matrix, so output of first hidden layer is [105, num_nodes]
. In the same way, [105, num_nodes]
multiplied by [num_nodes, numLabels]
yields [105, numLabels]
output.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With