I'm trying to learn and use tensorboard and followed these guideline codes with a few modifications.
When I run the code
model.fit(x=x_train,
y=y_train,
epochs=5,
validation_data=(x_test, y_test),
callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])
, I got ProfilerNotRunningError with this message "summary_ops_v2.py:1161] Trace already enabled".
Why trace already enabled? How can I solve the problem?
I tried to solve it with new log directions(I thought then it would make the trace be renewed), but it happened again.
import tensorflow as tf
import datetime
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
def create_model():
return tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model = create_model()
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_summary_writer = tf.summary.create_file_writer(log_dir)
tensorboard_callback = [tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)]
model.fit(x=x_train,
y=y_train,
epochs=5,
validation_data=(x_test, y_test),
callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])
Epoch 1/5
W0619 17:02:10.383985 15544 summary_ops_v2.py:1161] Trace already enabled
32/60000 [..............................] - ETA: 15:05 - loss: 2.3275 - accuracy: 0.0625
---------------------------------------------------------------------------
ProfilerNotRunningError Traceback (most recent call last)
<ipython-input-23-0c608b0df5ad> in <module>
3 epochs=5,
4 validation_data=(x_test, y_test),
----> 5 callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
641 max_queue_size=max_queue_size,
642 workers=workers,
--> 643 use_multiprocessing=use_multiprocessing)
644
645 def evaluate(self,
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
662 validation_steps=validation_steps,
663 validation_freq=validation_freq,
--> 664 steps_name='steps_per_epoch')
665
666 def evaluate(self,
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
392 # Callbacks batch end.
393 batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
--> 394 callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs)
395 progbar.on_batch_end(batch_index, batch_logs)
396
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
230 for callback in self.callbacks:
231 batch_hook = getattr(callback, hook_name)
--> 232 batch_hook(batch, logs)
233 self._delta_ts[hook_name].append(time.time() - t_before_callbacks)
234
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in on_train_batch_end(self, batch, logs)
513 """
514 # For backwards compatibility.
--> 515 self.on_batch_end(batch, logs=logs)
516
517 def on_test_batch_begin(self, batch, logs=None):
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in on_batch_end(self, batch, logs)
1600 self._total_batches_seen += 1
1601 if self._is_tracing:
-> 1602 self._log_trace()
1603 elif (not self._is_tracing and
1604 self._total_batches_seen == self._profile_batch - 1):
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in _log_trace(self)
1634 name='batch_%d' % self._total_batches_seen,
1635 step=self._total_batches_seen,
-> 1636 profiler_outdir=os.path.join(self.log_dir, 'train'))
1637 self._is_tracing = False
1638
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in trace_export(name, step, profiler_outdir)
1216
1217 if profiler:
-> 1218 _profiler.save(profiler_outdir, _profiler.stop())
1219
1220 trace_off()
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\eager\profiler.py in stop()
101 if _profiler is None:
102 raise ProfilerNotRunningError(
--> 103 'Cannot stop profiling. No profiler is running.')
104 with c_api_util.tf_buffer() as buffer_:
105 pywrap_tensorflow.TFE_ProfilerSerializeToString(
ProfilerNotRunningError: Cannot stop profiling. No profiler is running.
I was facing the same issue and even customizing the log_dir option using datetime didn't work. Check this page: https://github.com/tensorflow/tensorboard/issues/2819 which helped me. I just added the 'profile_batch = 100000000' in this callback as:
TensorBoard(log_dir=log_dir, .., profile_batch = 100000000)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With