If you don't specify a padding_values
then padded_batch
will autopad with 0. However, if you want a different value such as -1, you can't just set padded_batch = -1
. You need to input a sequence for every slot that needs to be padded.
However, I'm working with a dataset which has random values for the array lengths, so I can't really do that, since I don't know by how many numbers I'll need to pad.
Since padding_values
will automatically fill the rest of the value with 0, I hope there's some way it can do that with a different value such as '-1'.
Here is a minimal example
import math
import numpy as np
import tensorflow as tf
cells = np.array([[0,1,2,3], [2,3,4], [3,6,5,4,3], [3,9]])
mells = np.array([[0], [2], [3], [9]])
print(cells)
writer = tf.python_io.TFRecordWriter('test.tfrecords')
for index in range(mells.shape[0]):
example = tf.train.Example(features=tf.train.Features(feature={
'num_value':tf.train.Feature(int64_list=tf.train.Int64List(value=mells[index])),
'list_value':tf.train.Feature(int64_list=tf.train.Int64List(value=cells[index]))
}))
writer.write(example.SerializeToString())
writer.close()
#Generate Samples with batch size of 2
filenames = ["test.tfrecords"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
keys_to_features = {'num_value':tf.VarLenFeature(tf.int64),
'list_value':tf.VarLenFeature(tf.int64)}
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
return tf.sparse.to_dense(parsed_features['num_value']), \
tf.sparse.to_dense(parsed_features['list_value'])
# Parse the record into tensors.
dataset = dataset.map(_parse_function)
# Shuffle the dataset
dataset = dataset.shuffle(buffer_size=1)
# Repeat the input indefinitly
dataset = dataset.repeat()
# Generate batches
dataset = dataset.padded_batch(2, padded_shapes=([None],[None]), padding_values=-1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
i, data = iterator.get_next()
This is the error message
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-8-65494605bf11> in <module>()
14 dataset = dataset.repeat()
15 # Generate batches
---> 16 dataset = dataset.padded_batch(2, padded_shapes=([None],[None]), padding_values=-1)
17 # Create a one-shot iterator
18 iterator = dataset.make_one_shot_iterator()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/data/ops/dataset_ops.py in padded_batch(self, batch_size, padded_shapes, padding_values, drop_remainder)
943 """
944 return PaddedBatchDataset(self, batch_size, padded_shapes, padding_values,
--> 945 drop_remainder)
946
947 def map(self, map_func, num_parallel_calls=None):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, batch_size, padded_shapes, padding_values, drop_remainder)
2526 self._padding_values = nest.map_structure_up_to(
2527 input_dataset.output_shapes, _padding_value_to_tensor, padding_values,
-> 2528 input_dataset.output_types)
2529 self._drop_remainder = ops.convert_to_tensor(
2530 drop_remainder, dtype=dtypes.bool, name="drop_remainder")
/usr/local/lib/python3.6/dist-packages/tensorflow/python/data/util/nest.py in map_structure_up_to(shallow_tree, func, *inputs)
465 raise ValueError("Cannot map over no sequences")
466 for input_tree in inputs:
--> 467 assert_shallow_structure(shallow_tree, input_tree)
468
469 # Flatten each input separately, apply the function to corresponding elements,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/data/util/nest.py in assert_shallow_structure(shallow_tree, input_tree, check_types)
299 raise TypeError(
300 "If shallow structure is a sequence, input must also be a sequence. "
--> 301 "Input has type: %s." % type(input_tree))
302
303 if check_types and not isinstance(input_tree, type(shallow_tree)):
TypeError: If shallow structure is a sequence, input must also be a sequence. Input has type: <class 'int'>.
The problem line is
# Generate batches
dataset = dataset.padded_batch(2, padded_shapes=([None],[None]), padding_values=-1)
if you remove padding_values, it generates batches with padded zeros no problem
with tf.Session() as sess:
print(sess.run([i, data]))
print(sess.run([i, data]))
[array([[0],
[2]]), array([[0, 1, 2, 3],
[2, 3, 4, 0]])]
[array([[3],
[9]]), array([[3, 6, 5, 4, 3],
[3, 9, 0, 0, 0]])]
To iterate over the dataset several times, use . repeat() . We can enumerate each batch by using either Python's enumerator or a build-in method. The former produces a tensor, which is recommended.
Creates a Dataset comprising lines from one or more text files.
from_tensor_slices() It removes the first dimension and use it as a dataset dimension.
Prefetching. Prefetching overlaps the preprocessing and model execution of a training step. While the model is executing training step s , the input pipeline is reading the data for step s+1 . Doing so reduces the step time to the maximum (as opposed to the sum) of the training and the time it takes to extract the data ...
You should change padding_values
.
dataset = dataset.padded_batch(2, padded_shapes=([None],[None])
, padding_values=(tf.constant(-1, dtype=tf.int64)
,tf.constant(-1, dtype=tf.int64)))
with tf.Session() as sess:
print(sess.run([i, data]))
print(sess.run([i, data]))
[array([[0],
[2]]), array([[ 0, 1, 2, 3],
[ 2, 3, 4, -1]])]
[array([[3],
[9]]), array([[ 3, 6, 5, 4, 3],
[ 3, 9, -1, -1, -1]])]
Explain
Every entry given in padding_values
represents the padding values to use for the respective components. This means that the length of padded_shapes
should be equal to the length of padding_values
. The latter is used for padding the entire length for every array in here,and the former has the same length and does not need padding -1
.For example:
dataset = dataset.padded_batch(2, padded_shapes=([None],[None])
, padding_values=(tf.constant(-1, dtype=tf.int64)
,tf.constant(-2, dtype=tf.int64)))
with tf.Session() as sess:
print(sess.run([i, data]))
print(sess.run([i, data]))
[array([[0],
[2]]), array([[ 0, 1, 2, 3],
[ 2, 3, 4, -2]])]
[array([[3],
[9]]), array([[ 3, 6, 5, 4, 3],
[ 3, 9, -2, -2, -2]])]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With