I am building a model with multiple inputs as shown in pyimagesearch, however I can't load all images into RAM and I am trying to create a generator that uses flow_from_directory
and get from a CSV file all the extra attributes for each image being processed.
Question: How do I get the attributes from the CSV to correspond with the images in each batch from the image generator?
def get_combined_generator(images_dir, csv_dir, split, *args):
"""
Creates train/val generators on images and csv data.
Arguments:
images_dir : string
Path to a directory with subdirectories for each class.
csv_dir : string
Path to a directory containing train/val csv files with extra attributes.
split : string
Current split being used (train, val or test)
"""
img_width, img_height, batch_size = args
datagen = ImageDataGenerator(
rescale=1. / 255)
generator = datagen.flow_from_directory(
f'{images_dir}/{split}',
target_size=(img_width, img_height),
batch_size=batch_size,
shuffle=True,
class_mode='categorical')
df = pd.read_csv(f'{csv_dir}/{split}.csv', index_col='image')
def my_generator(image_gen, data):
while True:
i = image_gen.batch_index
batch = image_gen.batch_size
row = data[i * batch:(i + 1) * batch]
images, labels = image_gen.next()
yield [images, row], labels
csv_generator = my_generator(generator, df)
return csv_generator
I found a solution based on Luke's answer using a custom generator
import random
import pandas as pd
import numpy as np
from glob import glob
from keras.preprocessing import image as krs_image
# Create the arguments for image preprocessing
data_gen_args = dict(
horizontal_flip=True,
brightness_range=[0.5, 1.5],
shear_range=10,
channel_shift_range=50,
rescale=1. / 255,
)
# Create an empty data generator
datagen = ImageDataGenerator()
# Read the image list and csv
image_file_list = glob(f'{images_dir}/{split}/**/*.JPG', recursive=True)
df = pd.read_csv(f'{csv_dir}/{split}.csv', index_col=csv_data[0])
random.shuffle(image_file_list)
def custom_generator(images_list, dataframe, batch_size):
i = 0
while True:
batch = {'images': [], 'csv': [], 'labels': []}
for b in range(batch_size):
if i == len(images_list):
i = 0
random.shuffle(images_list)
# Read image from list and convert to array
image_path = images_list[i]
image_name = os.path.basename(image_path).replace('.JPG', '')
image = krs_image.load_img(image_path, target_size=(img_height, img_width))
image = datagen.apply_transform(image, data_gen_args)
image = krs_image.img_to_array(image)
# Read data from csv using the name of current image
csv_row = dataframe.loc[image_name, :]
label = csv_row['class']
csv_features = csv_row.drop(labels='class')
batch['images'].append(image)
batch['csv'].append(csv_features)
batch['labels'].append(label)
i += 1
batch['images'] = np.array(batch['images'])
batch['csv'] = np.array(batch['csv'])
# Convert labels to categorical values
batch['labels'] = np.eye(num_classes)[batch['labels']]
yield [batch['images'], batch['csv']], batch['labels']
I would suggest creating a custom generator given this relatively specific case. Something like the following (modified from a similar answer here) should suffice:
import os
import random
import pandas as pd
def generator(image_dir, csv_dir, batch_size):
i = 0
image_file_list = os.listdir(image_dir)
while True:
batch_x = {'images': list(), 'other_feats': list()} # use a dict for multiple inputs
batch_y = list()
for b in range(batch_size):
if i == len(image_file_list):
i = 0
random.shuffle(image_file_list)
sample = image_file_list[i]
image_file_path = sample[0]
csv_file_path = os.path.join(csv_dir,
os.path.basename(image_file_path).replace('.png', '.csv'))
i += 1
image = preprocess_image(cv2.imread(image_file_path))
csv_file = pd.read_csv(csv_file_path)
other_feat = preprocess_feats(csv_file)
batch_x['images'].append(image)
batch_x['other_feats'].append(other_feat)
batch_y.append(csv_file.loc[image_name, :]['class'])
batch_x['images'] = np.array(batch_x['images']) # convert each list to array
batch_x['other_feats'] = np.array(batch_x['other_feats'])
batch_y = np.eye(num_classes)[batch['labels']]
yield batch_x, batch_y
Then, you can use Keras's fit_generator() function to train your model.
Obviously, this assumes you have csv
files with the same names as your image files, and that you have some custom preprocessing
functions for images and csv
files.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With