Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Logistic Regression using Tensorflow 2.0?

I'm trying to build a multi-class logistic regression using TensorFlow 2.0 and I've wrote the code which I think is correct but it's not giving out good results. My accuracy is literally 0.1% and even loss is not decreasing. I was hoping someone could help me out here.

This is the code I've written so far. Please points out what am I doing wrong here that I need to improve so the my model works. Thanks you!

from tensorflow.keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train/255., x_test/255.

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15)
x_train = tf.reshape(x_train, shape=(-1, 784))
x_test  = tf.reshape(x_test, shape=(-1, 784))

weights = tf.Variable(tf.random.normal(shape=(784, 10), dtype=tf.float64))
biases  = tf.Variable(tf.random.normal(shape=(10,), dtype=tf.float64))

def logistic_regression(x):
    lr = tf.add(tf.matmul(x, weights), biases)
    return tf.nn.sigmoid(lr)

def cross_entropy(y_true, y_pred):
    y_true = tf.one_hot(y_true, 10)
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    return tf.reduce_mean(loss)

def accuracy(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=tf.int32)
    preds = tf.cast(tf.argmax(y_pred, axis=1), dtype=tf.int32)
    preds = tf.equal(y_true, preds)
    return tf.reduce_mean(tf.cast(preds, dtype=tf.float32))

def grad(x, y):
    with tf.GradientTape() as tape:
        y_pred = logistic_regression(x)
        loss_val = cross_entropy(y, y_pred)
    return tape.gradient(loss_val, [weights, biases])

epochs = 1000
learning_rate = 0.01
batch_size = 128

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.repeat().shuffle(x_train.shape[0]).batch(batch_size)

optimizer = tf.optimizers.SGD(learning_rate)

for epoch, (batch_xs, batch_ys) in enumerate(dataset.take(epochs), 1):
    gradients = grad(batch_xs, batch_ys)
    optimizer.apply_gradients(zip(gradients, [weights, biases]))

    y_pred = logistic_regression(batch_xs)
    loss = cross_entropy(batch_ys, y_pred)
    acc = accuracy(batch_ys, y_pred)
    print("step: %i, loss: %f, accuracy: %f" % (epoch, loss, acc))

    step: 1000, loss: 2.458979, accuracy: 0.101562
like image 688
user_6396 Avatar asked Jul 05 '19 18:07

user_6396


1 Answers

The model is not converging, and the problem seems to be that you are doing a sigmoid activation directly followed by tf.nn.softmax_cross_entropy_with_logits. In the documentation for the tf.nn.softmax_cross_entropy_with_logits it says:

WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results.

Hence no softmax, sigmoid, relu, tanh or any other activations should be done on the output of the previous layer before passed to tf.nn.softmax_cross_entropy_with_logits. For more in depth description of when to use sigmoid or softmax output activation, see here.

Therfore by replacing return tf.nn.sigmoid(lr) with just return lr in the logistic_regression function, the model is converging.

Below is a working example of your code with the above fix. I also changed the variable name epochs to n_batches as your training loop actually goes through 1000 batches not 1000 epochs (i also bumped it up to 10000 as there was sign of more iterations needed).

from tensorflow.keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train/255., x_test/255.

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15)
x_train = tf.reshape(x_train, shape=(-1, 784))
x_test  = tf.reshape(x_test, shape=(-1, 784))

weights = tf.Variable(tf.random.normal(shape=(784, 10), dtype=tf.float64))
biases  = tf.Variable(tf.random.normal(shape=(10,), dtype=tf.float64))

def logistic_regression(x):
    lr = tf.add(tf.matmul(x, weights), biases)
    #return tf.nn.sigmoid(lr)
    return lr


def cross_entropy(y_true, y_pred):
    y_true = tf.one_hot(y_true, 10)
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    return tf.reduce_mean(loss)

def accuracy(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=tf.int32)
    preds = tf.cast(tf.argmax(y_pred, axis=1), dtype=tf.int32)
    preds = tf.equal(y_true, preds)
    return tf.reduce_mean(tf.cast(preds, dtype=tf.float32))

def grad(x, y):
    with tf.GradientTape() as tape:
        y_pred = logistic_regression(x)
        loss_val = cross_entropy(y, y_pred)
    return tape.gradient(loss_val, [weights, biases])

n_batches = 10000
learning_rate = 0.01
batch_size = 128

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.repeat().shuffle(x_train.shape[0]).batch(batch_size)

optimizer = tf.optimizers.SGD(learning_rate)

for batch_numb, (batch_xs, batch_ys) in enumerate(dataset.take(n_batches), 1):
    gradients = grad(batch_xs, batch_ys)
    optimizer.apply_gradients(zip(gradients, [weights, biases]))

    y_pred = logistic_regression(batch_xs)
    loss = cross_entropy(batch_ys, y_pred)
    acc = accuracy(batch_ys, y_pred)
    print("Batch number: %i, loss: %f, accuracy: %f" % (batch_numb, loss, acc))

(removed printouts)
>> Batch number: 1000, loss: 2.868473, accuracy: 0.546875
(removed printouts)
>> Batch number: 10000, loss: 1.482554, accuracy: 0.718750
like image 173
KrisR89 Avatar answered Sep 21 '22 11:09

KrisR89