I generated a pascal voc 2007 tfrecords file using the code in tensorflow object detection API. I use tf.contrib.data.Dataset
API to read data from tfrecords. I tried mehtod without tf.contrib.data.Dataset
API, and the code can run without any error, but when changed to tf.contrib.data.Dataset
API it can not work correctly.
The code without tf.contrib.data.Dataset
:
import tensorflow as tf
if __name__ == '__main__':
slim_example_decoder = tf.contrib.slim.tfexample_decoder
features = {"image/height": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/width": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/filename": tf.FixedLenFeature((), tf.string, default_value=""),
"image/source_id": tf.FixedLenFeature((), tf.string, default_value=""),
"image/key/sha256": tf.FixedLenFeature((), tf.string, default_value=""),
"image/encoded": tf.FixedLenFeature((), tf.string, default_value=""),
"image/format": tf.FixedLenFeature((), tf.string, default_value="jpeg"),
"image/object/bbox/xmin": tf.VarLenFeature(tf.float32),
"image/object/bbox/xmax": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymin": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymax": tf.VarLenFeature(tf.float32),
"image/object/class/text": tf.VarLenFeature(tf.string),
"image/object/class/label": tf.VarLenFeature(tf.int64),
"image/object/difficult": tf.VarLenFeature(tf.int64),
"image/object/truncated": tf.VarLenFeature(tf.int64),
"image/object/view": tf.VarLenFeature(tf.int64)}
items_to_handlers = {
'image': slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
'source_id': (
slim_example_decoder.Tensor('image/source_id')),
'key': (
slim_example_decoder.Tensor('image/key/sha256')),
'filename': (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
'groundtruth_boxes': (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
'groundtruth_classes': (
slim_example_decoder.Tensor('image/object/class/label')),
'groundtruth_difficult': (
slim_example_decoder.Tensor('image/object/difficult')),
'image/object/truncated': (
slim_example_decoder.Tensor('image/object/truncated')),
'image/object/view': (
slim_example_decoder.Tensor('image/object/view')),
}
decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
keys = decoder.list_items()
for example in tf.python_io.tf_record_iterator(
"/home/aurora/workspaces/data/tfrecords_data/oxford_pet/pet_train.record"):
serialized_example = tf.reshape(example, shape=[])
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
tensor_dict['image'].set_shape([None, None, 3])
print(tensor_dict)
the output of above code is :
{'image': <tf.Tensor 'case/If_1/Merge:0' shape=(?, ?, 3) dtype=uint8>,
'filename': <tf.Tensor 'Reshape_2:0' shape=() dtype=string>,
'groundtruth_boxes': <tf.Tensor 'transpose:0' shape=(?, 4) dtype=float32>,
'key': <tf.Tensor 'Reshape_5:0' shape=() dtype=string>,
'image/object/truncated': <tf.Tensor 'SparseToDense:0' shape=(?,) dtype=int64>,
'groundtruth_classes': <tf.Tensor 'SparseToDense_2:0' shape=(?,) dtype=int64>,
'image/object/view': <tf.Tensor 'SparseToDense_1:0' shape=(?,) dtype=int64>,
'source_id': <tf.Tensor 'Reshape_6:0' shape=() dtype=string>,
'groundtruth_difficult': <tf.Tensor 'SparseToDense_3:0' shape=(?,) dtype=int64>}
...
Code with tf.contrib.data.Dataset
:
import tensorflow as tf
from tensorflow.contrib.data import Iterator
slim_example_decoder = tf.contrib.slim.tfexample_decoder
flags = tf.app.flags
flags.DEFINE_string('data_dir',
'/home/aurora/workspaces/data/tfrecords_data/voc_dataset/trainval.tfrecords',
'tfrecords file output path')
flags.DEFINE_integer('batch_size', 32, 'training batch size')
flags.DEFINE_integer('capacity', 10000, 'training batch size')
FLAGS = flags.FLAGS
features = {"image/height": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/width": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/filename": tf.FixedLenFeature((), tf.string, default_value=""),
"image/source_id": tf.FixedLenFeature((), tf.string, default_value=""),
"image/key/sha256": tf.FixedLenFeature((), tf.string, default_value=""),
"image/encoded": tf.FixedLenFeature((), tf.string, default_value=""),
"image/format": tf.FixedLenFeature((), tf.string, default_value="jpeg"),
"image/object/bbox/xmin": tf.VarLenFeature(tf.float32),
"image/object/bbox/xmax": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymin": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymax": tf.VarLenFeature(tf.float32),
"image/object/class/text": tf.VarLenFeature(tf.string),
"image/object/class/label": tf.VarLenFeature(tf.int64),
"image/object/difficult": tf.VarLenFeature(tf.int64),
"image/object/truncated": tf.VarLenFeature(tf.int64),
"image/object/view": tf.VarLenFeature(tf.int64)
}
items_to_handlers = {
'image': slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
'source_id': (
slim_example_decoder.Tensor('image/source_id')),
'key': (
slim_example_decoder.Tensor('image/key/sha256')),
'filename': (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
'groundtruth_boxes': (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
'groundtruth_classes': (
slim_example_decoder.Tensor('image/object/class/label')),
'groundtruth_difficult': (
slim_example_decoder.Tensor('image/object/difficult')),
'image/object/truncated': (
slim_example_decoder.Tensor('image/object/truncated')),
'image/object/view': (
slim_example_decoder.Tensor('image/object/view')),
}
decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
keys = decoder.list_items()
def _parse_function_train(example):
serialized_example = tf.reshape(example, shape=[])
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
tensor_dict['image'].set_shape([None, None, 3])
print(tensor_dict)
return tensor_dict
if __name__ == '__main__':
train_dataset = tf.contrib.data.TFRecordDataset(FLAGS.data_dir)
train_dataset = train_dataset.map(_parse_function_train)
train_dataset = train_dataset.repeat(1)
train_dataset = train_dataset.batch(FLAGS.batch_size)
train_dataset = train_dataset.shuffle(buffer_size=FLAGS.capacity)
iterator = Iterator.from_structure(train_dataset.output_types,
train_dataset.output_shapes)
next_element = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
sess = tf.Session()
sess.run(training_init_op)
counter = 0
while True:
try:
sess.run(next_element)
counter += 1
except tf.errors.OutOfRangeError:
print('End of training data in step %d' %counter)
break
When run the above code, it report the following errors:
2017-10-09 23:41:43.488439: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Name: <unknown>, Key: image/object/view, Index: 0. Data types don't match. Expected type: int64
2017-10-09 23:41:43.488554: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Name: <unknown>, Key: image/object/view, Index: 0. Data types don't match. Expected type: int64
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=7, Nsparse=9, Tdense=[DT_STRING, DT_STRING, DT_STRING, DT_INT64, DT_STRING, DT_STRING, DT_INT64], dense_shapes=[[], [], [], [], [], [], []], sparse_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_STRING, DT_INT64, DT_INT64, DT_INT64]](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/sparse_keys_0, ParseSingleExample/ParseExample/ParseExample/sparse_keys_1, ParseSingleExample/ParseExample/ParseExample/sparse_keys_2, ParseSingleExample/ParseExample/ParseExample/sparse_keys_3, ParseSingleExample/ParseExample/ParseExample/sparse_keys_4, ParseSingleExample/ParseExample/ParseExample/sparse_keys_5, ParseSingleExample/ParseExample/ParseExample/sparse_keys_6, ParseSingleExample/ParseExample/ParseExample/sparse_keys_7, ParseSingleExample/ParseExample/ParseExample/sparse_keys_8, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/ParseExample/dense_keys_4, ParseSingleExample/ParseExample/ParseExample/dense_keys_5, ParseSingleExample/ParseExample/ParseExample/dense_keys_6, ParseSingleExample/ParseExample/Reshape, ParseSingleExample/ParseExample/Reshape_1, ParseSingleExample/ParseExample/Reshape_2, ParseSingleExample/ParseExample/Reshape_3, ParseSingleExample/ParseExample/Reshape_4, ParseSingleExample/ParseExample/Reshape_5, ParseSingleExample/ParseExample/Reshape_6)]]
Traceback (most recent call last):
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/usr/software/anaconda3/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Name: <unknown>, Key: image/object/view, Index: 0. Data types don't match. Expected type: int64
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=7, Nsparse=9, Tdense=[DT_STRING, DT_STRING, DT_STRING, DT_INT64, DT_STRING, DT_STRING, DT_INT64], dense_shapes=[[], [], [], [], [], [], []], sparse_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_STRING, DT_INT64, DT_INT64, DT_INT64]](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/sparse_keys_0, ParseSingleExample/ParseExample/ParseExample/sparse_keys_1, ParseSingleExample/ParseExample/ParseExample/sparse_keys_2, ParseSingleExample/ParseExample/ParseExample/sparse_keys_3, ParseSingleExample/ParseExample/ParseExample/sparse_keys_4, ParseSingleExample/ParseExample/ParseExample/sparse_keys_5, ParseSingleExample/ParseExample/ParseExample/sparse_keys_6, ParseSingleExample/ParseExample/ParseExample/sparse_keys_7, ParseSingleExample/ParseExample/ParseExample/sparse_keys_8, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/ParseExample/dense_keys_4, ParseSingleExample/ParseExample/ParseExample/dense_keys_5, ParseSingleExample/ParseExample/ParseExample/dense_keys_6, ParseSingleExample/ParseExample/Reshape, ParseSingleExample/ParseExample/Reshape_1, ParseSingleExample/ParseExample/Reshape_2, ParseSingleExample/ParseExample/Reshape_3, ParseSingleExample/ParseExample/Reshape_4, ParseSingleExample/ParseExample/Reshape_5, ParseSingleExample/ParseExample/Reshape_6)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?,?,?,3], [?,?], [?,?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_INT64, DT_STRING, DT_STRING], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/aurora/workspaces/PycharmProjects/object_detection_models/datasets/voc_dataset/voc_tfrecords_decode_test.py", line 83, in <module>
sess.run(next_element)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/usr/software/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Name: <unknown>, Key: image/object/view, Index: 0. Data types don't match. Expected type: int64
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=7, Nsparse=9, Tdense=[DT_STRING, DT_STRING, DT_STRING, DT_INT64, DT_STRING, DT_STRING, DT_INT64], dense_shapes=[[], [], [], [], [], [], []], sparse_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_STRING, DT_INT64, DT_INT64, DT_INT64]](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/sparse_keys_0, ParseSingleExample/ParseExample/ParseExample/sparse_keys_1, ParseSingleExample/ParseExample/ParseExample/sparse_keys_2, ParseSingleExample/ParseExample/ParseExample/sparse_keys_3, ParseSingleExample/ParseExample/ParseExample/sparse_keys_4, ParseSingleExample/ParseExample/ParseExample/sparse_keys_5, ParseSingleExample/ParseExample/ParseExample/sparse_keys_6, ParseSingleExample/ParseExample/ParseExample/sparse_keys_7, ParseSingleExample/ParseExample/ParseExample/sparse_keys_8, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/ParseExample/dense_keys_4, ParseSingleExample/ParseExample/ParseExample/dense_keys_5, ParseSingleExample/ParseExample/ParseExample/dense_keys_6, ParseSingleExample/ParseExample/Reshape, ParseSingleExample/ParseExample/Reshape_1, ParseSingleExample/ParseExample/Reshape_2, ParseSingleExample/ParseExample/Reshape_3, ParseSingleExample/ParseExample/Reshape_4, ParseSingleExample/ParseExample/Reshape_5, ParseSingleExample/ParseExample/Reshape_6)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?,?,?,3], [?,?], [?,?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_INT64, DT_STRING, DT_STRING], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
The code to generate tfrecords file can reference create_pascal_tf_record.py.
Dataset. prefetch transformation. It can be used to decouple the time when data is produced from the time when data is consumed. In particular, the transformation uses a background thread and an internal buffer to prefetch elements from the input dataset ahead of the time they are requested.
Sparse tensors enable efficient storage and processing of tensors that contain a lot of zero values. Sparse tensors are used extensively in encoding schemes like TF-IDF as part of data pre-processing in NLP applications and for pre-processing images with a lot of dark pixels in computer vision applications.
A sparse tensor is a dataset in which most of the entries are zero, one such example would be a large diagonal matrix. (which has many zero elements). It does not store the whole values of the tensor object but stores the non-zero values and the corresponding coordinates of them.
EDIT (2018/01/25): tf.SparseTensor
support was added to tf.data
in TensorFlow 1.5. The code in the question should work in TensorFlow 1.5 or later.
Up to TF 1.4, the tf.contrib.data
API did not support tf.SparseTensor
objects in dataset elements. There are a couple of workarounds:
(Harder, but probably faster.) If a tf.SparseTensor
object st
represents a variable-length list of features, you may be able to return st.values
instead of st
from the map()
function. Note that you would probably then need to pad the results using Dataset.padded_batch()
instead of Dataset.batch()
.
(Easier, but probably slower.) In the _parse_function_train()
function, iterate over tensor_dict
and produce a new version where any tf.SparseTensor
objects have been converted to a tf.Tensor
using tf.serialize_sparse()
. When you
# NOTE: You could probably infer these from `keys`.
sparse_keys = set()
def _parse_function_train(example):
serialized_example = tf.reshape(example, shape=[])
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
tensor_dict['image'].set_shape([None, None, 3])
rewritten_tensor_dict = {}
for key, value in tensor_dict.items():
if isinstance(value, tf.SparseTensor):
rewritten_tensor_dict[key] = tf.serialize_sparse(value)
sparse_keys.add(key)
else:
rewritten_tensor_dict[key] = value
return rewritten_tensor_dict
Then, after you get the next_element
dictionary from iterator.get_next()
, you can reverse this conversion using tf.deserialize_many_sparse()
:
next_element = iterator.get_next()
for key in sparse_keys:
next_element[key] = tf.deserialize_many_sparse(key)
In addition to tf.SparseTensor
, there is tf.scatter_nd
that seems to achieve the same result except you may have to recover the indices later.
These two code blocks achieves same result
indices = tf.concat([xy_indices, z_indices], axis=-1)
values = tf.concat([bboxes, objectness, one_hot], axis=1)
SparseTensor = tf.SparseTensor(indices= indices,
values = values,
dense_shape =[output_size, output_size,
len(anchors), 4 + 1 + num_classes])
DenseTensor = tf.scatter_nd(indices = indices,
updates = values
shape = [output_size, output_size,
len(anchors),4 + 1 + num_classes])
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With