I need to compute the Pearson and Spearman correlations, and use it as metrics in tensorflow.
For Pearson, it's trivial :
tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
But for Spearman, I am clueless !
From this answer :
samples = 1
predictions_rank = tf.nn.top_k(y_pred, k=samples, sorted=True, name='prediction_rank').indices
real_rank = tf.nn.top_k(y_true, k=samples, sorted=True, name='real_rank').indices
rank_diffs = predictions_rank - real_rank
rank_diffs_squared_sum = tf.reduce_sum(rank_diffs * rank_diffs)
six = tf.constant(6)
one = tf.constant(1.0)
numerator = tf.cast(six * rank_diffs_squared_sum, dtype=tf.float32)
divider = tf.cast(samples * samples * samples - samples, dtype=tf.float32)
spearman_batch = one - numerator / divider
But this return NaN
...
Following the definition of Wikipedia :
I tried :
size = tf.size(y_pred)
indice_of_ranks_pred = tf.nn.top_k(y_pred, k=size)[1]
indice_of_ranks_label = tf.nn.top_k(y_true, k=size)[1]
rank_pred = tf.nn.top_k(-indice_of_ranks_pred, k=size)[1]
rank_label = tf.nn.top_k(-indice_of_ranks_label, k=size)[1]
rank_pred = tf.to_float(rank_pred)
rank_label = tf.to_float(rank_label)
spearman = tf.contrib.metrics.streaming_pearson_correlation(rank_pred, rank_label)
But running this I got the following error :
tensorflow.python.framework.errors_impl.InvalidArgumentError: input must have at least k columns. Had 1, needed 32
[[{{node metrics/spearman/TopKV2}} = TopKV2[T=DT_FLOAT, sorted=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](lambda_1/add, metrics/pearson/pearson_r/variance_predictions/Size)]]
One thing you can do is use Tensorflow's function tf.py_function
to use with the
scipy.stats.spearmanr
and define the input and output like that:
from scipy.stats import spearmanr
def get_spearman_rankcor(y_true, y_pred):
return ( tf.py_function(spearmanr, [tf.cast(y_pred, tf.float32),
tf.cast(y_true, tf.float32)], Tout = tf.float32) )
I have been working around to implement directly the Spearman rank correlation coefficient in tensorflow following the definition of this website (https://rpubs.com/aaronsc32/spearman-rank-correlation) and I have reached the following code (I share it just in case anyone found it useful).
@tf.function
def get_rank(y_pred):
rank = tf.argsort(tf.argsort(y_pred, axis=-1, direction="ASCENDING"), axis=-1)+1 #+1 to get the rank starting in 1 instead of 0
return rank
@tf.function
def sp_rank(x, y):
cov = tfp.stats.covariance(x, y, sample_axis=0, event_axis=None)
sd_x = tfp.stats.stddev(x, sample_axis=0, keepdims=False, name=None)
sd_y = tfp.stats.stddev(y, sample_axis=0, keepdims=False, name=None)
return 1-cov/(sd_x*sd_y) #1- because we want to minimize loss
@tf.function
def spearman_correlation(y_true, y_pred):
#First we obtain the ranking of the predicted values
y_pred_rank = tf.map_fn(lambda x: get_rank(x), y_pred, dtype=tf.float32)
#Spearman rank correlation between each pair of samples:
#Sample dim: (1, 8)
#Batch of samples dim: (None, 8) None=batch_size=64
#Output dim: (batch_size, ) = (64, )
sp = tf.map_fn(lambda x: sp_rank(x[0],x[1]), (y_true, y_pred_rank), dtype=tf.float32)
#Reduce to a single value
loss = tf.reduce_mean(sp)
return loss
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With