Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

tensorboard - error:Trace already enabled - How to solve?

I'm trying to learn and use tensorboard and followed these guideline codes with a few modifications.

When I run the code

model.fit(x=x_train, 
          y=y_train, 
          epochs=5, 
          validation_data=(x_test, y_test), 
          callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])

, I got ProfilerNotRunningError with this message "summary_ops_v2.py:1161] Trace already enabled".

Why trace already enabled? How can I solve the problem?

I tried to solve it with new log directions(I thought then it would make the trace be renewed), but it happened again.

    import tensorflow as tf
    import datetime

    mnist = tf.keras.datasets.mnist

    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0


    def create_model():
      return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
      ])
    model = create_model()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_summary_writer = tf.summary.create_file_writer(log_dir)
    tensorboard_callback = [tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)]


    model.fit(x=x_train, 
              y=y_train, 
              epochs=5, 
              validation_data=(x_test, y_test), 
              callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])
  • error --> enter image description here
Epoch 1/5
W0619 17:02:10.383985 15544 summary_ops_v2.py:1161] Trace already enabled
   32/60000 [..............................] - ETA: 15:05 - loss: 2.3275 - accuracy: 0.0625
---------------------------------------------------------------------------
ProfilerNotRunningError                   Traceback (most recent call last)
<ipython-input-23-0c608b0df5ad> in <module>
      3           epochs=5,
      4           validation_data=(x_test, y_test),
----> 5           callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    641         max_queue_size=max_queue_size,
    642         workers=workers,
--> 643         use_multiprocessing=use_multiprocessing)
    644 
    645   def evaluate(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    662         validation_steps=validation_steps,
    663         validation_freq=validation_freq,
--> 664         steps_name='steps_per_epoch')
    665 
    666   def evaluate(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
    392         # Callbacks batch end.
    393         batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
--> 394         callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs)
    395         progbar.on_batch_end(batch_index, batch_logs)
    396 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
    230     for callback in self.callbacks:
    231       batch_hook = getattr(callback, hook_name)
--> 232       batch_hook(batch, logs)
    233     self._delta_ts[hook_name].append(time.time() - t_before_callbacks)
    234 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in on_train_batch_end(self, batch, logs)
    513     """
    514     # For backwards compatibility.
--> 515     self.on_batch_end(batch, logs=logs)
    516 
    517   def on_test_batch_begin(self, batch, logs=None):

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in on_batch_end(self, batch, logs)
   1600     self._total_batches_seen += 1
   1601     if self._is_tracing:
-> 1602       self._log_trace()
   1603     elif (not self._is_tracing and
   1604           self._total_batches_seen == self._profile_batch - 1):

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\callbacks.py in _log_trace(self)
   1634             name='batch_%d' % self._total_batches_seen,
   1635             step=self._total_batches_seen,
-> 1636             profiler_outdir=os.path.join(self.log_dir, 'train'))
   1637       self._is_tracing = False
   1638 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in trace_export(name, step, profiler_outdir)
   1216 
   1217   if profiler:
-> 1218     _profiler.save(profiler_outdir, _profiler.stop())
   1219 
   1220   trace_off()

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\eager\profiler.py in stop()
    101     if _profiler is None:
    102       raise ProfilerNotRunningError(
--> 103           'Cannot stop profiling. No profiler is running.')
    104     with c_api_util.tf_buffer() as buffer_:
    105       pywrap_tensorflow.TFE_ProfilerSerializeToString(

ProfilerNotRunningError: Cannot stop profiling. No profiler is running.
like image 908
2 1 Avatar asked Dec 14 '22 11:12

2 1


1 Answers

I was facing the same issue and even customizing the log_dir option using datetime didn't work. Check this page: https://github.com/tensorflow/tensorboard/issues/2819 which helped me. I just added the 'profile_batch = 100000000' in this callback as:

TensorBoard(log_dir=log_dir, .., profile_batch = 100000000)

like image 193
Kanishk Mair Avatar answered Jan 01 '23 08:01

Kanishk Mair