Right now I'm using keras with tensorflow backend. The dataset was stored in the tfrecords format. Training without any validation set is working, but how to integrate my validation-tfrecords also?
Lets assume this code as coarse skeleton:
def _ds_parser(proto):
features = {
'X': tf.FixedLenFeature([], tf.string),
'Y': tf.FixedLenFeature([], tf.string)
}
parsed_features = tf.parse_single_example(proto, features)
# get the data back as float32
parsed_features['X'] = tf.decode_raw(parsed_features['I'], tf.float32)
parsed_features['Y'] = tf.decode_raw(parsed_features['Y'], tf.float32)
return parsed_features['X'], parsed_features['Y']
def datasetLoader(dataSetPath, batchSize):
dataset = tf.data.TFRecordDataset(dataSetPath)
# Maps the parser on every filepath in the array. You can set the number of parallel loaders here
dataset = dataset.map(_ds_parser, num_parallel_calls=8)
# This dataset will go on forever
dataset = dataset.repeat()
# Set the batchsize
dataset = dataset.batch(batchSize)
# Create an iterator
iterator = dataset.make_one_shot_iterator()
# Create your tf representation of the iterator
X, Y = iterator.get_next()
# Bring the date back in shape
X = tf.reshape(I, [-1, 66, 198, 3])
Y = tf.reshape(Y,[-1,1])
return X, Y
X, Y = datasetLoader('PATH-TO-DATASET', 264)
model_X = keras.layers.Input(tensor=X)
model_output = keras.layers.Conv2D(filters=16, kernel_size=3, strides=1, padding='valid', activation='relu',
input_shape=(-1, 66, 198, 3))(model_X)
model_output = keras.layers.Dense(units=1, activation='linear')(model_output)
model = keras.models.Model(inputs=model_X, outputs=model_output)
model.compile(
optimizer=optimizer,
loss='mean_squared_error',
target_tensors=[Y]
)
parallel_model.fit(
epochs=epochs,
steps_per_epoch=stepPerEpoch,
shuffle=False,
validation_data=????
)
The question is, how to pass the validation set?
I have found something related here: gcloud-ml-engine-with-keras, but I'm not sure how to fit this into my problem.
First, You don't need to use iterator. Keras model will accept dataset object instead separate data/labels parameters, and will handle iteration. You only need to specify steps_per_epoch
, hence you need to know dataset size. If you have separate tfrecords file for train/validation, then you can just create dataset object and pass it to validation_data
. If you have one file and you'd like to split it, you can do
dataset = tf.data.TFRecordDataset('file.tfrecords')
dataset_train = dataset.take(size)
dataset_val = dataset.skip(size)
...
Ok I found the answer myself: basically it's done by simply change import keras
toimport tensorflow.keras as keras
. Tf.keras allows you to pass the validation set also as tensor:
X, Y = datasetLoader('PATH-TO-DATASET', 264)
X_val, Y_val = datasetLoader('PATH-TO-VALIDATION-DATASET', 264)
# ... define and compile the model like above
parallel_model.fit(
epochs= epochs,
steps_per_epoch= STEPS_PER_EPOCH,
shuffle= False,
validation_data= (X_val, Y_val),
validation_steps= STEPS_PER_VALIDATION_EPOCH
)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With