Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Can I export a tensorflow summary to CSV?

Tags:

csv

tensorflow

Is there a way to extract scalar summaries to CSV (preferably from within tensorboard) from tfevents files?

Example code

The following code generates tfevent files in a summary_dir within the same directory. Suppose you let it run and you find something interesting. You want to get the raw data for further investigation. How would you do that?

#!/usr/bin/env python """A very simple MNIST classifier.""" import argparse import sys  from tensorflow.examples.tutorials.mnist import input_data  import tensorflow as tf ce_with_logits = tf.nn.softmax_cross_entropy_with_logits  FLAGS = None   def inference(x):     """     Build the inference graph.      Parameters     ----------     x : placeholder      Returns     -------     Output tensor with the computed logits.     """     W = tf.Variable(tf.zeros([784, 10]))     b = tf.Variable(tf.zeros([10]))     y = tf.matmul(x, W) + b     return y   def loss(logits, labels):     """     Calculate the loss from the logits and the labels.      Parameters     ----------     logits : Logits tensor, float - [batch_size, NUM_CLASSES].     labels : Labels tensor, int32 - [batch_size]     """     cross_entropy = tf.reduce_mean(ce_with_logits(labels=labels,                                                   logits=logits))     return cross_entropy   def training(loss, learning_rate=0.5):     """     Set up the training Ops.      Parameters     ----------     loss : Loss tensor, from loss().     learning_rate : The learning rate to use for gradient descent.      Returns     -------     train_op: The Op for training.     """     optimizer = tf.train.GradientDescentOptimizer(learning_rate)     train_step = optimizer.minimize(loss)     return train_step   def main(_):     # Import data     mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)      # Create the model     x = tf.placeholder(tf.float32, [None, 784])     y = inference(x)      # Define loss and optimizer     y_ = tf.placeholder(tf.float32, [None, 10])     loss_ = loss(logits=y, labels=y_)     train_step = training(loss_)      # Test trained model     correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))      with tf.name_scope('accuracy'):         tf.summary.scalar('accuracy', accuracy)     merged = tf.summary.merge_all()      sess = tf.InteractiveSession()     train_writer = tf.summary.FileWriter('summary_dir/train', sess.graph)     test_writer = tf.summary.FileWriter('summary_dir/test', sess.graph)     tf.global_variables_initializer().run()      for train_step_i in range(100000):         if train_step_i % 100 == 0:             summary, acc = sess.run([merged, accuracy],                                     feed_dict={x: mnist.test.images,                                                y_: mnist.test.labels})             test_writer.add_summary(summary, train_step_i)             summary, acc = sess.run([merged, accuracy],                                     feed_dict={x: mnist.train.images,                                                y_: mnist.train.labels})             train_writer.add_summary(summary, train_step_i)         batch_xs, batch_ys = mnist.train.next_batch(100)         sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})      print(sess.run(accuracy, feed_dict={x: mnist.test.images,                                         y_: mnist.test.labels}))  if __name__ == '__main__':     parser = argparse.ArgumentParser()     parser.add_argument('--data_dir',                         type=str,                         default='/tmp/tensorflow/mnist/input_data',                         help='Directory for storing input data')     FLAGS, unparsed = parser.parse_known_args()     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 
like image 801
Martin Thoma Avatar asked Feb 20 '17 22:02

Martin Thoma


People also ask

How do I export a dataset to CSV?

Go to File > Save As. Click Browse. In the Save As dialog box, under Save as type box, choose the text file format for the worksheet; for example, click Text (Tab delimited) or CSV (Comma delimited).


2 Answers

While the answer here is as requested within tensorboard it only allows to download a csv for a single run of a single tag. If you have for example 10 tags and 20 runs (what is not at all much) you would need to do the above step 200 times (that alone will probably take you more than a hour). If now you for some reason would like to actually do something with the data for all runs for a single tag you would need to write some weird CSV accumulation script or copy everything by hand (what will probably cost you more than a day).

Therefore I would like to add a solution that extracts a CSV file for every tag with all runs contained. Column headers are the run path names and row indices are the run step numbers.

import os import numpy as np import pandas as pd  from collections import defaultdict from tensorboard.backend.event_processing.event_accumulator import EventAccumulator   def tabulate_events(dpath):     summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)]      tags = summary_iterators[0].Tags()['scalars']      for it in summary_iterators:         assert it.Tags()['scalars'] == tags      out = defaultdict(list)     steps = []      for tag in tags:         steps = [e.step for e in summary_iterators[0].Scalars(tag)]          for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):             assert len(set(e.step for e in events)) == 1              out[tag].append([e.value for e in events])      return out, steps   def to_csv(dpath):     dirs = os.listdir(dpath)      d, steps = tabulate_events(dpath)     tags, values = zip(*d.items())     np_values = np.array(values)      for index, tag in enumerate(tags):         df = pd.DataFrame(np_values[index], index=steps, columns=dirs)         df.to_csv(get_file_path(dpath, tag))   def get_file_path(dpath, tag):     file_name = tag.replace("/", "_") + '.csv'     folder_path = os.path.join(dpath, 'csv')     if not os.path.exists(folder_path):         os.makedirs(folder_path)     return os.path.join(folder_path, file_name)   if __name__ == '__main__':     path = "path_to_your_summaries"     to_csv(path) 

My solution builds upon: https://stackoverflow.com/a/48774926/2230045


EDIT:

I created a more sophisticated version and released it on GitHub: https://github.com/Spenhouet/tensorboard-aggregator

This version aggregates multiple tensorboard runs and is able to save the aggregates to a new tensorboard summary or as a .csv file.

like image 69
Spenhouet Avatar answered Oct 28 '22 19:10

Spenhouet


Just check the "Data download links" option on the upper-left in TensorBoard, and then click on the "CSV" button that will appear under your scalar summary.

enter image description here

like image 24
jabalazs Avatar answered Oct 28 '22 17:10

jabalazs