Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to specify the correlation coefficient as the loss function in keras

I am using keras+tensorflow for the first time. I would like to specify the correlation coefficient as the loss function. It makes sense to square it so that it is a number between 0 and 1 where 0 is bad and 1 is good.

My basic code currently looks like:

def baseline_model():
        model = Sequential()
        model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=0)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

How can I change this so that it optimizes to minimize the squared correlation coefficient instead?


I tried the following:

def correlation_coefficient(y_true, y_pred):
    pearson_r, _ = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
    return 1-pearson_r**2

def baseline_model():
# create model
        model = Sequential()
        model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
#        model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss=correlation_coefficient, optimizer='adam')
        return model

but this crashes with:

Traceback (most recent call last):
  File "deeplearning-det.py", line 67, in <module>
    results = cross_val_score(pipeline, X, Y, cv=kfold)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 321, in cross_val_score
    pre_dispatch=pre_dispatch)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
    for train, test in cv.split(X, y, groups))
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
    while self.dispatch_one_batch(iterator):
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
    self._dispatch(tasks)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
    result = ImmediateResult(func)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
    self.results = batch()
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/user/.local/lib/python3.5/site-packages/sklearn/pipeline.py", line 259, in fit
    self._final_estimator.fit(Xt, y, **fit_params)
  File "/home/user/.local/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py", line 147, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/home/user/.local/lib/python3.5/site-packages/keras/models.py", line 867, in fit
    initial_epoch=initial_epoch)
  File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1575, in fit
    self._make_train_function()
  File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 960, in _make_train_function
    loss=self.total_loss)
  File "/home/user/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/user/.local/lib/python3.5/site-packages/keras/optimizers.py", line 432, in get_updates
    m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 856, in binary_op_wrapper
    y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 611, in convert_to_tensor
    as_ref=False)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 676, in internal_convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 121, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
  File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 364, in make_tensor_proto
    raise ValueError("None values not supported.")
ValueError: None values not supported.

Update 1

Following the answer below the code now runs. Unfortunately, the correlation_coefficient and correlation_coefficient_loss functions give different values from each other and I am not sure either of them is the same as you would get from 1- scipy.stats.pearsonr()[0]**2.

Why are loss functions giving the wrong outputs and how can they be corrected to give the same values as 1 - scipy.stats.pearsonr()[0]**2 would give?

Here is the completely self contained code that should just run:

import numpy as np
import sys
import math
from scipy.stats import ortho_group
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import tensorflow as tf
from keras import backend as K


def permanent(M):
    n = M.shape[0]
    d = np.ones(n)
    j = 0
    s = 1
    f = np.arange(n)
    v = M.sum(axis=0)
    p = np.prod(v)
    while (j < n-1):
        v -= 2*d[j]*M[j]
        d[j] = -d[j]
        s = -s
        prod = np.prod(v)
        p += s*prod
        f[0] = 0
        f[j] = f[j+1]
        f[j+1] = j+1
        j = f[0]
    return p/2**(n-1)


def correlation_coefficient_loss(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x)
    my = K.mean(y)
    xm, ym = x-mx, y-my
    r_num = K.sum(xm * ym)
    r_den = K.sum(K.sum(K.square(xm)) * K.sum(K.square(ym)))
    r = r_num / r_den
    return 1 - r**2


def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'correlation_coefficient' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return 1-pearson_r**2


def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(4000, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
#    model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient])
    return model


no_rows = 8

print("Making the input data using seed 7", file=sys.stderr)
np.random.seed(7)
U = ortho_group.rvs(no_rows**2)
U = U[:, :no_rows]
# U is a random orthogonal matrix
X = []
Y = []
print(U)
for i in range(40000):
        I = np.random.choice(no_rows**2, size = no_rows)
        A = U[I][np.lexsort(np.rot90(U[I]))]
        X.append(A.ravel())
        Y.append(-math.log(permanent(A)**2, 2))

X = np.array(X)
Y = np.array(Y)

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    train_size=0.75, test_size=0.25)
pipeline.fit(X_train, y_train)

Update 2

I have given up on the correlation_coefficient function and am now just using the correlation_coefficient_loss one as given by JulioDanielReyes below. However, either this is still wrong or keras is dramatically overfitting. Even when I have:

def baseline_model():
        model = Sequential()
        model.add(Dense(40, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient_loss])
        return model

I get a loss of, for example, 0.6653 after 100 epochs but 0.857 when I test the trained model.

How can it be overfitting which such a tiny number of nodes in the hidden layer?

like image 888
graffe Avatar asked Oct 07 '17 11:10

graffe


People also ask

How do I create a custom loss function in keras?

Creating custom loss functions in Keras A custom loss function can be created by defining a function that takes the true values and predicted values as required parameters. The function should return an array of losses. The function can then be passed at the compile stage.

How is keras loss calculated?

Loss calculation is based on the difference between predicted and actual values. If the predicted values are far from the actual values, the loss function will produce a very large number. Keras is a library for creating neural networks.

What is the loss function in keras?

Loss is used to calculate the gradients for the neural net. And gradients are used to update the weights. This is how a Neural Net is trained. Keras has many inbuilt loss functions, which I have covered in one of my previous blog.

What is loss and metric in keras?

The loss function is used to optimize your model. This is the function that will get minimized by the optimizer. A metric is used to judge the performance of your model. This is only for you to look at and has nothing to do with the optimization process.


2 Answers

According to keras documentation, you should pass the squared correlation coefficient as a function instead of the string 'mean_squared_error'.

The function needs to receive 2 tensors (y_true, y_pred). You can look at keras source code for inspiration.

There is also a function tf.contrib.metrics.streaming_pearson_correlation implemented on tensorflow. Just be careful on the order of the parameters, it should be something like this:

Update 1: initialize local variables according to this issue

import tensorflow as tf
def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r'
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'pearson_r'  in i.name.split('/')]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return 1-pearson_r**2

...

model.compile(loss=correlation_coefficient, optimizer='adam')

Update 2: even though you cannot use the scipy function directly, you can look at the implementation and port it to your code using keras backend.

Update 3: The tensorflow function as it is may not be differentiable, your loss function needs to be something like this: (Please check the math)

from keras import backend as K
def correlation_coefficient_loss(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x)
    my = K.mean(y)
    xm, ym = x-mx, y-my
    r_num = K.sum(tf.multiply(xm,ym))
    r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
    r = r_num / r_den

    r = K.maximum(K.minimum(r, 1.0), -1.0)
    return 1 - K.square(r)

Update 4: The results are different on both functions, but correlation_coefficient_loss gives the same results as scipy.stats.pearsonr: Here is the code to test it:

import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy.stats

inputa = np.array([[3,1,2,3,4,5],
                    [1,2,3,4,5,6],
                    [1,2,3,4,5,6]])
inputb = np.array([[3,1,2,3,4,5],
                    [3,1,2,3,4,5],
                    [6,5,4,3,2,1]])

with tf.Session() as sess:
    a = tf.placeholder(tf.float32, shape=[None])
    b = tf.placeholder(tf.float32, shape=[None])
    f1 = correlation_coefficient(a, b)
    f2 = correlation_coefficient_loss(a, b)

    sess.run(tf.global_variables_initializer())

    for i in range(inputa.shape[0]):

        f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]})
        scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
        print("a: "+ str(inputa[i]) + " b: " + str(inputb[i]))
        print("correlation_coefficient: " + str(f1_result))
        print("correlation_coefficient_loss: " + str(f2_result))
        print("scipy.stats.pearsonr:" + str(scipy_result))

Results:

a: [3 1 2 3 4 5] b: [3 1 2 3 4 5]
correlation_coefficient: -2.38419e-07
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5]
correlation_coefficient: 0.292036
correlation_coefficient_loss: 0.428571
scipy.stats.pearsonr:0.428571428571
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1]
correlation_coefficient: 0.994918
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
like image 124
Julio Daniel Reyes Avatar answered Oct 11 '22 23:10

Julio Daniel Reyes


The following code is an implementation of correlation coefficient in tensorflow version 2.0

import tensorflow as tf

def correlation(x, y):    
    mx = tf.math.reduce_mean(x)
    my = tf.math.reduce_mean(y)
    xm, ym = x-mx, y-my
    r_num = tf.math.reduce_mean(tf.multiply(xm,ym))        
    r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
    return r_num / r_den

It returns the same result as numpy's corrcoef function.

like image 22
Trifon Avatar answered Oct 12 '22 00:10

Trifon