I am using keras+tensorflow for the first time. I would like to specify the correlation coefficient as the loss function. It makes sense to square it so that it is a number between 0 and 1 where 0 is bad and 1 is good.
My basic code currently looks like:
def baseline_model():
model = Sequential()
model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=0)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
How can I change this so that it optimizes to minimize the squared correlation coefficient instead?
I tried the following:
def correlation_coefficient(y_true, y_pred):
pearson_r, _ = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
return 1-pearson_r**2
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
# model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss=correlation_coefficient, optimizer='adam')
return model
but this crashes with:
Traceback (most recent call last):
File "deeplearning-det.py", line 67, in <module>
results = cross_val_score(pipeline, X, Y, cv=kfold)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 321, in cross_val_score
pre_dispatch=pre_dispatch)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
for train, test in cv.split(X, y, groups))
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
while self.dispatch_one_batch(iterator):
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
self._dispatch(tasks)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
result = ImmediateResult(func)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
self.results = batch()
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/pipeline.py", line 259, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "/home/user/.local/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py", line 147, in fit
history = self.model.fit(x, y, **fit_args)
File "/home/user/.local/lib/python3.5/site-packages/keras/models.py", line 867, in fit
initial_epoch=initial_epoch)
File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1575, in fit
self._make_train_function()
File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 960, in _make_train_function
loss=self.total_loss)
File "/home/user/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/home/user/.local/lib/python3.5/site-packages/keras/optimizers.py", line 432, in get_updates
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 856, in binary_op_wrapper
y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 611, in convert_to_tensor
as_ref=False)
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 676, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 121, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 364, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
Update 1
Following the answer below the code now runs. Unfortunately, the correlation_coefficient
and correlation_coefficient_loss
functions give different values from each other and I am not sure either of them is the same as you would get from 1- scipy.stats.pearsonr()[0]**2.
Why are loss functions giving the wrong outputs and how can they be corrected to give the same values as
1 - scipy.stats.pearsonr()[0]**2
would give?
Here is the completely self contained code that should just run:
import numpy as np
import sys
import math
from scipy.stats import ortho_group
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import tensorflow as tf
from keras import backend as K
def permanent(M):
n = M.shape[0]
d = np.ones(n)
j = 0
s = 1
f = np.arange(n)
v = M.sum(axis=0)
p = np.prod(v)
while (j < n-1):
v -= 2*d[j]*M[j]
d[j] = -d[j]
s = -s
prod = np.prod(v)
p += s*prod
f[0] = 0
f[j] = f[j+1]
f[j+1] = j+1
j = f[0]
return p/2**(n-1)
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
r_num = K.sum(xm * ym)
r_den = K.sum(K.sum(K.square(xm)) * K.sum(K.square(ym)))
r = r_num / r_den
return 1 - r**2
def correlation_coefficient(y_true, y_pred):
pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
# find all variables created for this metric
metric_vars = [i for i in tf.local_variables() if 'correlation_coefficient' in i.name.split('/')[1]]
# Add metric variables to GLOBAL_VARIABLES collection.
# They will be initialized for new session.
for v in metric_vars:
tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
# force to update metric values
with tf.control_dependencies([update_op]):
pearson_r = tf.identity(pearson_r)
return 1-pearson_r**2
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4000, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
# model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient])
return model
no_rows = 8
print("Making the input data using seed 7", file=sys.stderr)
np.random.seed(7)
U = ortho_group.rvs(no_rows**2)
U = U[:, :no_rows]
# U is a random orthogonal matrix
X = []
Y = []
print(U)
for i in range(40000):
I = np.random.choice(no_rows**2, size = no_rows)
A = U[I][np.lexsort(np.rot90(U[I]))]
X.append(A.ravel())
Y.append(-math.log(permanent(A)**2, 2))
X = np.array(X)
Y = np.array(Y)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
X_train, X_test, y_train, y_test = train_test_split(X, Y,
train_size=0.75, test_size=0.25)
pipeline.fit(X_train, y_train)
Update 2
I have given up on the correlation_coefficient
function and am now just using the correlation_coefficient_loss
one as given by JulioDanielReyes below. However, either this is still wrong or keras is dramatically overfitting. Even when I have:
def baseline_model():
model = Sequential()
model.add(Dense(40, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient_loss])
return model
I get a loss of, for example, 0.6653 after 100 epochs but 0.857 when I test the trained model.
How can it be overfitting which such a tiny number of nodes in the hidden layer?
Creating custom loss functions in Keras A custom loss function can be created by defining a function that takes the true values and predicted values as required parameters. The function should return an array of losses. The function can then be passed at the compile stage.
Loss calculation is based on the difference between predicted and actual values. If the predicted values are far from the actual values, the loss function will produce a very large number. Keras is a library for creating neural networks.
Loss is used to calculate the gradients for the neural net. And gradients are used to update the weights. This is how a Neural Net is trained. Keras has many inbuilt loss functions, which I have covered in one of my previous blog.
The loss function is used to optimize your model. This is the function that will get minimized by the optimizer. A metric is used to judge the performance of your model. This is only for you to look at and has nothing to do with the optimization process.
According to keras documentation, you should pass the squared correlation coefficient as a function instead of the string 'mean_squared_error'
.
The function needs to receive 2 tensors (y_true, y_pred)
. You can look at keras source code for inspiration.
There is also a function tf.contrib.metrics.streaming_pearson_correlation
implemented on tensorflow. Just be careful on the order of the parameters, it should be something like this:
Update 1: initialize local variables according to this issue
import tensorflow as tf
def correlation_coefficient(y_true, y_pred):
pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r'
# find all variables created for this metric
metric_vars = [i for i in tf.local_variables() if 'pearson_r' in i.name.split('/')]
# Add metric variables to GLOBAL_VARIABLES collection.
# They will be initialized for new session.
for v in metric_vars:
tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
# force to update metric values
with tf.control_dependencies([update_op]):
pearson_r = tf.identity(pearson_r)
return 1-pearson_r**2
...
model.compile(loss=correlation_coefficient, optimizer='adam')
Update 2: even though you cannot use the scipy function directly, you can look at the implementation and port it to your code using keras backend.
Update 3: The tensorflow function as it is may not be differentiable, your loss function needs to be something like this: (Please check the math)
from keras import backend as K
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
r_num = K.sum(tf.multiply(xm,ym))
r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
r = r_num / r_den
r = K.maximum(K.minimum(r, 1.0), -1.0)
return 1 - K.square(r)
Update 4: The results are different on both functions, but correlation_coefficient_loss
gives the same results as scipy.stats.pearsonr
:
Here is the code to test it:
import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy.stats
inputa = np.array([[3,1,2,3,4,5],
[1,2,3,4,5,6],
[1,2,3,4,5,6]])
inputb = np.array([[3,1,2,3,4,5],
[3,1,2,3,4,5],
[6,5,4,3,2,1]])
with tf.Session() as sess:
a = tf.placeholder(tf.float32, shape=[None])
b = tf.placeholder(tf.float32, shape=[None])
f1 = correlation_coefficient(a, b)
f2 = correlation_coefficient_loss(a, b)
sess.run(tf.global_variables_initializer())
for i in range(inputa.shape[0]):
f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]})
scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
print("a: "+ str(inputa[i]) + " b: " + str(inputb[i]))
print("correlation_coefficient: " + str(f1_result))
print("correlation_coefficient_loss: " + str(f2_result))
print("scipy.stats.pearsonr:" + str(scipy_result))
Results:
a: [3 1 2 3 4 5] b: [3 1 2 3 4 5]
correlation_coefficient: -2.38419e-07
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5]
correlation_coefficient: 0.292036
correlation_coefficient_loss: 0.428571
scipy.stats.pearsonr:0.428571428571
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1]
correlation_coefficient: 0.994918
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
The following code is an implementation of correlation coefficient in tensorflow version 2.0
import tensorflow as tf
def correlation(x, y):
mx = tf.math.reduce_mean(x)
my = tf.math.reduce_mean(y)
xm, ym = x-mx, y-my
r_num = tf.math.reduce_mean(tf.multiply(xm,ym))
r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
return r_num / r_den
It returns the same result as numpy's corrcoef
function.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With