I have a multi-layer perceptron for a multi-output regression problem which predicts 14 continuous values. The following is the code snippet for the same:
# Parameters
learning_rate = 0.001
training_epochs = 1000
batch_size = 500
# Network Parameters
n_hidden_1 = 32
n_hidden_2 = 200
n_hidden_3 = 200
n_hidden_4 = 256
n_input = 14
n_classes = 14
# tf Graph input
x = tf.placeholder("float", [None, n_input],name="x")
y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.1)),
'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Create model
def multilayer_perceptron(x):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Construct model
pred = multilayer_perceptron(x)
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Run the graph in the session
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x, y: batch_y})
avg_cost += c / total_batch
OUTPUT:
x_batch_data:
[ 1.77560000e+04 4.00000000e+00 4.00000000e+00 ..., 1.00000000e+00
5.61000000e+02 1.00000000e+00]
[ 1.34310000e+04 4.00000000e+00 4.00000000e+00 ..., 1.00000000e+00
5.61000000e+02 1.00000000e+00]
[ 2.98800000e+03 1.00000000e+00 0.00000000e+00 ..., 0.00000000e+00
0.00000000e+00 1.00000000e+00]
y_batch_data:
[[ 4.19700000e-01 1.04298450e+02 1.50000000e+02 ..., 2.75250000e-01
1.02000000e-01 7.28565000e+00]
[ 5.59600000e-01 1.39064600e+02 2.00000000e+02 ..., 3.67000000e-01
1.36000000e-01 9.71420000e+00]
[ 2.79800000e-01 6.95323000e+01 1.00000000e+02 ..., 1.83500000e-01
6.80000000e-02 4.85710000e+00]
Prediction:
[[ 0.85085869 90.53585815 130.17015076 ..., 0.62335277
0.26637274 5.52062225]
[ 0.85085869 90.53585815 130.17015076 ..., 0.62335277
0.26637274 5.52062225]
[ 0.85085869 90.53585815 130.17015076 ..., 0.62335277
0.26637274 5.52062225]
The predicted value is always same despite different input values. Can someone point out what could be the reason behind this?
P.S Similar questions referred to: tensorflow deep neural network for regression always predict same results in one batch
Approaches tried:
1. Gradually reduced the learning rate from 0.1 to 0.0001
2. Tried other optimizer algorithms
3. Changed the network architecture (number of hidden nodes and layers and activation functions)
Any help is appreciated.
The problems seem to be:
The predictions will remain the same if the weights stay the same and the batch inputs remain the same. Hopefully that helps to fix it.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With