Epoch 1/2 103/Unknown - 8s 80ms/step - loss: 0.0175 (model.fit() keeps running forever even after crossing the total number of training images)

I am developing autoencoder on the dataset https://www.kaggle.com/jessicali9530/celeba-dataset.

import tensorflow



from tensorflow.keras.preprocessing import image
data_gen = image.ImageDataGenerator(rescale=1.0/255)

batch_size = 20

train_data_gen = data_gen.flow_from_directory(directory=train_dest_path,
                                              target_size=(256, 256),
                                              class_mode = 'input')
test_data_gen = data_gen.flow_from_directory(directory=test_dest_path,
                                             class_mode= 'input')
# autoencoder 
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam, SGD

inchannel = 3 
x, y  = 256, 256
input_img = Input(shape=(x,y,inchannel))

def autoencoder_model(input_img):
    conv1 = Conv2D(32, kernel_size=(3,3), activation='relu', padding='same')(input_img)
    pool1 = MaxPooling2D(pool_size=(2,2))(conv1)
    conv2 = Conv2D(64, kernel_size=(3,3), activation='relu', padding='same')(pool1)
    pool2 = MaxPooling2D(pool_size=(2,2))(conv2)
    conv3 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(pool2)

    conv4 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(conv3)
    pool3 = UpSampling2D(size=(2,2))(conv4)
    conv5 = Conv2D(64, kernel_size=(3,3), activation='relu', padding='same')(pool3)
    pool4 = UpSampling2D(size=(2,2))(conv5)
    decoded = Conv2D(3, kernel_size=(3,3), activation='relu', padding='same')(pool4)
    return decoded

model = Model(inputs=input_img, outputs=autoencoder_model(input_img))
model.compile(loss='mean_squared_error', optimizer=Adam())

Model: "model"
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
conv2d (Conv2D)              (None, 256, 256, 32)      896       
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
conv2d_1 (Conv2D)            (None, 128, 128, 64)      18496     
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 64)        0         
conv2d_2 (Conv2D)            (None, 64, 64, 128)       73856     
conv2d_3 (Conv2D)            (None, 64, 64, 128)       147584    
up_sampling2d (UpSampling2D) (None, 128, 128, 128)     0         
conv2d_4 (Conv2D)            (None, 128, 128, 64)      73792     
up_sampling2d_1 (UpSampling2 (None, 256, 256, 64)      0         
conv2d_5 (Conv2D)            (None, 256, 256, 3)       1731      
Total params: 316,355
Trainable params: 316,355
Non-trainable params: 0

from tensorflow.keras.callbacks import ModelCheckpoint

epochs = 2
num_training_steps = train_data_gen.samples/batch_size
checkpoint_directory = '/gdrive/My Drive/Colab Notebooks'

checkpoint = ModelCheckpoint(checkpoint_directory, verbose=1, save_weights_only=False, save_freq='epoch')
model.fit(train_data_gen, epochs=epochs, verbose=1, callbacks=[checkpoint])


Epoch 1/2
    103/Unknown - 8s 80ms/step - loss: 0.0175

After spending a lot of time, I am still not able to understand why I am getting "Unknown" in the output of model.fit(). Also, model.fit() keeps running forever even though if I take only 1000 images from the training dataset in flow_from_directory(). It goes above 1000 and I am not able to understand why it is acting like that.

1 Answers

When executing model.fit with a generator as input you have to set the steps_per_epoch argument. For generators you can't know the number of images they output (and in this case they go on forever), so set it to the number of images in your dataset divided by your batch size.

