I want to use some of these metrics when training my neural network:
METRICS = [
keras.metrics.TruePositives(name='tp'),
keras.metrics.FalsePositives(name='fp'),
keras.metrics.TrueNegatives(name='tn'),
keras.metrics.FalseNegatives(name='fn'),
keras.metrics.Precision(name='precision'),
keras.metrics.Recall(name='recall'),
keras.metrics.CategoricalAccuracy(name='acc'),
keras.metrics.AUC(name='auc'),
]
BATCH_SIZE = 1024
SHUFFLE_BUFFER_SIZE = 4000
train_dataset = tf.data.Dataset.from_tensor_slices((sent_vectors, labels))
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embed_dim)))
for units in [256, 256]:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=METRICS)
model.fit(
train_dataset,
epochs=100)
But I get Shapes (None, 4) and (None, 1) are incompatible
. I believe this is because I am doing multiclass classification on 4 classes but the metrics are calculated based on binary classification. How do I adjust my code for multiclass classification?
Update: I am interested in gathering the metrics during the learning process like in Tensorflow Imbalanced Classification, not just at the end of the fitting process.
Additional infos:
My input data are numpy arrays with the shape sent_vectors.shape = (number_examples, 65, 300)
and labels=(number_examples, 1)
. I have 4 labels: 0-3.
Stacktrace:
ValueErrorTraceback (most recent call last)
<ipython-input-46-2b73afaf7726> in <module>
1 model.fit(
2 train_dataset,
----> 3 epochs=10)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1846 if self.input_signature:
1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1849 return graph_function
1850
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2148 graph_function = self._function_cache.primary.get(cache_key, None)
2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs)
2151 self._function_cache.primary[cache_key] = graph_function
2152 return graph_function, args, kwargs
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2039 arg_names=arg_names,
2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value),
2042 self._function_attributes,
2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
71 strategy = distribution_strategy_context.get_strategy()
72 outputs = strategy.experimental_run_v2(
---> 73 per_replica_function, args=(model, x, y, sample_weights))
74 # Out of PerReplica outputs reduce or pick values to return.
75 all_outputs = dist_utils.unwrap_output_dict(
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
758 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
759 convert_by_default=False)
--> 760 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
761
762 def reduce(self, reduce_op, value, axis):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1785 kwargs = {}
1786 with self._container_strategy().scope():
-> 1787 return self._call_for_each_replica(fn, args, kwargs)
1788
1789 def _call_for_each_replica(self, fn, args, kwargs):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2130 self._container_strategy(),
2131 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2132 return fn(*args, **kwargs)
2133
2134 def _reduce_to(self, reduce_op, value, destinations):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
313 outs = [outs]
314 metrics_results = _eager_metrics_fn(
--> 315 model, outs, targets, sample_weights=sample_weights, masks=masks)
316 total_loss = nest.flatten(total_loss)
317 return {'total_loss': total_loss,
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _eager_metrics_fn(model, outputs, targets, sample_weights, masks)
72 masks=masks,
73 return_weighted_and_unweighted_metrics=True,
---> 74 skip_target_masks=model._prepare_skip_target_masks())
75
76 # Add metric results from the `add_metric` metrics.
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _handle_metrics(self, outputs, targets, skip_target_masks, sample_weights, masks, return_weighted_metrics, return_weighted_and_unweighted_metrics)
2061 metric_results.extend(
2062 self._handle_per_output_metrics(self._per_output_metrics[i],
-> 2063 target, output, output_mask))
2064 if return_weighted_and_unweighted_metrics or return_weighted_metrics:
2065 metric_results.extend(
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _handle_per_output_metrics(self, metrics_dict, y_true, y_pred, mask, weights)
2012 with K.name_scope(metric_name):
2013 metric_result = training_utils.call_metric_function(
-> 2014 metric_fn, y_true, y_pred, weights=weights, mask=mask)
2015 metric_results.append(metric_result)
2016 return metric_results
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py in call_metric_function(metric_fn, y_true, y_pred, weights, mask)
1065
1066 if y_pred is not None:
-> 1067 return metric_fn(y_true, y_pred, sample_weight=weights)
1068 # `Mean` metric only takes a single value.
1069 return metric_fn(y_true, sample_weight=weights)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/metrics.py in __call__(self, *args, **kwargs)
191 from tensorflow.python.keras.distribute import distributed_training_utils # pylint:disable=g-import-not-at-top
192 return distributed_training_utils.call_replica_local_fn(
--> 193 replica_local_fn, *args, **kwargs)
194
195 @property
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/distribute/distributed_training_utils.py in call_replica_local_fn(fn, *args, **kwargs)
1133 with strategy.scope():
1134 return strategy.extended.call_for_each_replica(fn, args, kwargs)
-> 1135 return fn(*args, **kwargs)
1136
1137
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/metrics.py in replica_local_fn(*args, **kwargs)
174 def replica_local_fn(*args, **kwargs):
175 """Updates the state of the metric in a replica-local context."""
--> 176 update_op = self.update_state(*args, **kwargs) # pylint: disable=not-callable
177 with ops.control_dependencies([update_op]):
178 result_t = self.result() # pylint: disable=not-callable
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/metrics_utils.py in decorated(metric_obj, *args, **kwargs)
73
74 with tf_utils.graph_context_for_symbolic_tensors(*args, **kwargs):
---> 75 update_op = update_state_fn(*args, **kwargs)
76 if update_op is not None: # update_op will be None in eager execution.
77 metric_obj.add_update(update_op)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/metrics.py in update_state(self, y_true, y_pred, sample_weight)
881 y_pred,
882 thresholds=self.thresholds,
--> 883 sample_weight=sample_weight)
884
885 def result(self):
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/metrics_utils.py in update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k, class_id, sample_weight)
276 y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
277 sample_weight)
--> 278 y_pred.shape.assert_is_compatible_with(y_true.shape)
279
280 if not any(
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/tensor_shape.py in assert_is_compatible_with(self, other)
1113 """
1114 if not self.is_compatible_with(other):
-> 1115 raise ValueError("Shapes %s and %s are incompatible" % (self, other))
1116
1117 def most_specific_compatible_shape(self, other):
ValueError: Shapes (None, 4) and (None, 1) are incompatible
Most commonly used metrics for multi-classes are F1 score, Average Accuracy, Log-loss.
Accuracy is one of the most popular metrics in multi-class classification and it is directly computed from the confusion matrix. The formula of the Accuracy considers the sum of True Positive and True Negative elements at the numerator and the sum of all the entries of the confusion matrix at the denominator.
As OP edited his question, I decided to edit my solution either with the intention of providing a more compact answer:
Import and define all we need later:
import numpy as np
from numpy import random as random
import tensorflow as tf
import keras
import keras.backend as K
tf.config.experimental_run_functions_eagerly(False)
VERBOSE = 1
keras.backend.clear_session()
sess = tf.compat.v1.Session()
sess.as_default()
### Just for dummy data
sent_vectors = random.rand(100, 65, 300).astype(np.float32)
labels = random.randint(0, 4, (100, 1))
labels = np.squeeze(labels, 1)
NUM_CLASSES = np.max(labels) + 1
BATCH_SIZE = 10
SHUFFLE_BUFFER_SIZE = 200
embed_dim = 8
### Just for dummy data
Create custom metric:
class CategoricalTruePositives(tf.keras.metrics.Metric):
def __init__(self, num_classes, batch_size,
name="categorical_true_positives", **kwargs):
super(CategoricalTruePositives, self).__init__(name=name, **kwargs)
self.batch_size = batch_size
self.num_classes = num_classes
self.cat_true_positives = self.add_weight(name="ctp", initializer="zeros")
def update_state(self, y_true, y_pred, sample_weight=None):
y_true = K.argmax(y_true, axis=-1)
y_pred = K.argmax(y_pred, axis=-1)
y_true = K.flatten(y_true)
true_poss = K.sum(K.cast((K.equal(y_true, y_pred)), dtype=tf.float32))
self.cat_true_positives.assign_add(true_poss)
def result(self):
return self.cat_true_positives
First compile and fit your model using only the metrics for multilabel evaluation including our custom function:
Important note:
OP provided alabel
shape(number_examples, 1)
. Originally he usedloss='sparse_categorical_crossentropy'
, but the built_in metrickeras.metrics.CategoricalAccuracy
, he wanted to use, is not compatible withsparse_categorical_crossentropy
, instead I usedcategorical_crossentropy
i.e. the one-hot version of the original loss, which is appropriate forkeras.metrics.CategoricalAccuracy
. Thus I one-hot encodedlabels
for the loss function.
METRICS = [
tf.keras.metrics.CategoricalAccuracy(name='acc'),
CategoricalTruePositives(NUM_CLASSES, BATCH_SIZE),
]
# Transform labels to onehot encoding for metric CategoricalAccuracy
labels = tf.compat.v1.one_hot(labels, depth=NUM_CLASSES)
train_dataset = tf.data.Dataset.from_tensor_slices((sent_vectors, labels))
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embed_dim)))
for units in [256, 256]:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=[METRICS])
model.fit(
train_dataset,
epochs=10,
verbose=VERBOSE,
shuffle=True)
Predict and postprocess the results:
result = model.predict(train_dataset)
pred_size = sent_vectors.shape[0]
preds = K.argmax(result, axis=-1)
preds = K.one_hot(preds, NUM_CLASSES)
print("\nTrue positives per classes:")
for i in range(4):
m = tf.keras.metrics.TruePositives(name='tp')
m.update_state(labels[:, i], preds[:, i])
print("Class {} true positives: {}".format(i, m.result()))
Out:
Epoch 1/10
10/10 [==============================] - 3s 328ms/step - loss: 1.4226 - acc: 0.2300 - categorical_true_positives: 23.0000
Epoch 2/10
10/10 [==============================] - 0s 21ms/step - loss: 1.3876 - acc: 0.2900 - categorical_true_positives: 29.0000
Epoch 3/10
10/10 [==============================] - 0s 20ms/step - loss: 1.3721 - acc: 0.2800 - categorical_true_positives: 28.0000
Epoch 4/10
10/10 [==============================] - 0s 20ms/step - loss: 1.3628 - acc: 0.2900 - categorical_true_positives: 29.0000
Epoch 5/10
10/10 [==============================] - 0s 22ms/step - loss: 1.3447 - acc: 0.3800 - categorical_true_positives: 38.0000
Epoch 6/10
10/10 [==============================] - 0s 22ms/step - loss: 1.3187 - acc: 0.3800 - categorical_true_positives: 38.0000
Epoch 7/10
10/10 [==============================] - 0s 22ms/step - loss: 1.2653 - acc: 0.4300 - categorical_true_positives: 43.0000
Epoch 8/10
10/10 [==============================] - 0s 21ms/step - loss: 1.1760 - acc: 0.6000 - categorical_true_positives: 60.0000
Epoch 9/10
10/10 [==============================] - 0s 22ms/step - loss: 1.1809 - acc: 0.4600 - categorical_true_positives: 46.0000
Epoch 10/10
10/10 [==============================] - 0s 22ms/step - loss: 1.2739 - acc: 0.3800 - categorical_true_positives: 38.0000
True positives per classes:
Class 0 true positives: 16.0
Class 1 true positives: 0.0
Class 2 true positives: 5.0
Class 3 true positives: 7.0
Note:
We can recognize, that the true positives' sum is not equal with our training result, that is because we trained and predicted our model against a different data get from train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With