I am using tf and Keras to create a cycleGAN following the approach used here and here
The network structure is quite complex: there are many models nested in each other.
I am not able to save and reload the trained model.
After the training is complete i used
generator_AtoB.save("models/generator_AtoB.h5")
and
pickle.dump(generator_AtoB, saveFile)
to save the model: this causes no errors and a file is created in the path provided.
By checking with h5dump | less
I can see that the .h5 file contains data.
Reloading the model later by using keras:
generator_AtoB = load_model("models/generator_AtoB.h5")
or pickle:
pickle.load(saveFile)
Causes an error:
Traceback (most recent call last):
File "test_model.py", line 14, in <module>
generator_AtoB = pickle.load(saveFile)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/network.py", line 1266, in __setstate__
model = saving.unpickle_model(state)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 435, in unpickle_model
return _deserialize_model(f)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 274, in _deserialize_model
reshape=False)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 658, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 670, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 658, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 800, in preprocess_weights_for_loading
elif layer_weights_shape != weights[0].shape:
IndexError: list index out of range
The error is the same if using keras.load_model
or pickle.load
Since this happens both by saving with keras or pickle, I am thinking this is not a save/load problem, but something I am saving wrongly inside the model, but I cannot find any reference anywhere.
Thank you for your help
Full code ahead:
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# https://hardikbansal.github.io/CycleGANBlog/
import sys
import time
import numpy as np
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Input, multiply, add as kadd
from keras.layers import Conv2D, BatchNormalization, Conv2DTranspose
from keras.layers import LeakyReLU, ReLU
from keras.layers import Activation
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
ngf = 32 # Number of filters in first layer of generator
ndf = 64 # Number of filters in first layer of discriminator
BATCH_SIZE = 1 # batch_size
pool_size = 50 # pool_size
IMG_WIDTH = 256 # Imput image will of width 256
IMG_HEIGHT = 256 # Input image will be of height 256
IMG_DEPTH = 3 # RGB format
DISCRIMINATOR_ITERATIONS = 1
SAVE_IMAGES_INTERVAL = 25
ITERATIONS = 5000
FAKE_POOL_SIZE=25
INPUT_SHAPE = (IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH)
def resnet_block(num_features):
block = Sequential()
block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
block.add(BatchNormalization())
block.add(ReLU())
block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
block.add(BatchNormalization())
block.add(ReLU())
resblock_input = Input(shape=(64, 64, 256))
conv_model = block(resblock_input)
_sum = kadd([resblock_input, conv_model])
composed = Model(inputs=[resblock_input], outputs=_sum)
return composed
def discriminator( f=4, name=None):
d = Sequential()
d.add(Conv2D(ndf, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_1"))
d.add(BatchNormalization())
d.add(LeakyReLU(0.2))
d.add(Conv2D(ndf * 2, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_2"))
d.add(BatchNormalization())
d.add(LeakyReLU(0.2))
d.add(Conv2D(ndf * 4, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_3"))
d.add(BatchNormalization())
d.add(LeakyReLU(0.2))
d.add(Conv2D(ndf * 8, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_4"))
d.add(BatchNormalization())
d.add(LeakyReLU(0.2))
d.add(Conv2D(1, kernel_size=f, strides=1, padding="SAME", name="discr_conv2d_out"))
# d.add(Activation("sigmoid"))
model_input = Input(shape=INPUT_SHAPE)
decision = d(model_input)
composed = Model(model_input, decision)
# print(d.output_shape)
# d.summary()
return composed
def generator(name=None):
g = Sequential()
# ENCODER
g.add(Conv2D(ngf, kernel_size=7,
strides=1,
# activation='relu',
padding='SAME',
input_shape=INPUT_SHAPE,
name="encoder_0" ))
g.add(Conv2D(64*2, kernel_size=3,
strides=2,
padding='SAME',
name="encoder_1" ))
# output shape = (128, 128, 128)
g.add(Conv2D(64*4, kernel_size=3,
padding="SAME",
strides=2,))
# output shape = (64, 64, 256)
# END ENCODER
# TRANSFORM
g.add(resnet_block(64*4))
g.add(resnet_block(64*4))
g.add(resnet_block(64*4))
g.add(resnet_block(64*4))
g.add(resnet_block(64*4))
# END TRANSFORM
# generator.shape = (64, 64, 256)
# DECODER
g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))
g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))
g.add(Conv2D(3,kernel_size=7, strides=1, padding="SAME"))
# END DECODER
model_input = Input(shape=INPUT_SHAPE)
generated_image = g(model_input)
composed = Model(model_input, generated_image, name=name)
return composed
def fromMinusOneToOne(x):
return x/127.5 -1
def toRGB(x):
return (1+x) * 127.5
def createImageGenerator( subset="train", data_type="A", batch_size=1, pp=None):
# we create two instances with the same arguments
data_gen_args = dict(
preprocessing_function= pp,
zoom_range=0.1)
image_datagen = ImageDataGenerator(**data_gen_args)
# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_directory=subset+data_type
print('data/vangogh2photo/'+image_directory)
image_generator = image_datagen.flow_from_directory(
'data/vangogh2photo/'+image_directory,
class_mode=None,
batch_size=batch_size,
seed=seed)
return image_generator
if __name__ == '__main__':
generator_AtoB = generator(name="gen_A")
generator_BtoA = generator(name="gen_B")
discriminator_A = discriminator(name="disc_A")
discriminator_B = discriminator(name="disc_B")
# input_A = Input(batch_shape=(batch_size, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_A")
input_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_A")
generated_B = generator_AtoB(input_A)
discriminator_generated_B = discriminator_B(generated_B)
cyc_A = generator_BtoA(generated_B)
input_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_B")
generated_A = generator_BtoA(input_B)
discriminator_generated_A = discriminator_A(generated_A )
cyc_B = generator_AtoB(generated_A)
### GENERATOR TRAINING
optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)
# cyclic error is increased, because it's more important
cyclic_weight_multipier = 10
generator_trainer = Model([input_A, input_B],
[discriminator_generated_B, discriminator_generated_A,
cyc_A, cyc_B,])
losses = [ "MSE", "MSE", "MAE", "MAE"]
losses_weights = [ 1, 1, cyclic_weight_multipier, cyclic_weight_multipier]
generator_trainer.compile(optimizer=optim, loss = losses, loss_weights=losses_weights)
### DISCRIMINATOR TRAINING
disc_optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)
real_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_A")
real_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_B")
generated_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_A")
generated_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_B")
discriminator_real_A = discriminator_A(real_A)
discriminator_generated_A = discriminator_A(generated_A)
discriminator_real_B = discriminator_B(real_B)
discriminator_generated_B = discriminator_B(generated_B)
disc_trainer = Model([real_A, generated_A, real_B, generated_B],
[ discriminator_real_A,
discriminator_generated_A,
discriminator_real_B,
discriminator_generated_B] )
disc_trainer.compile(optimizer=disc_optim, loss = 'MSE')
#########
##
## TRAINING
##
#########
fake_A_pool = []
fake_B_pool = []
ones = np.ones((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])
zeros = np.zeros((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])
train_A_image_generator = createImageGenerator("train", "A")
train_B_image_generator = createImageGenerator("train", "B")
it = 1
while it < ITERATIONS:
start = time.time()
print("\nIteration %d " % it)
sys.stdout.flush()
# THIS ONLY WORKS IF BATCH SIZE == 1
real_A = train_A_image_generator.next()
real_B = train_B_image_generator.next()
fake_A_pool.extend(generator_BtoA.predict(real_B))
fake_B_pool.extend(generator_AtoB.predict(real_A))
#resize pool
fake_A_pool = fake_A_pool[-FAKE_POOL_SIZE:]
fake_B_pool = fake_B_pool[-FAKE_POOL_SIZE:]
fake_A = [ fake_A_pool[ind] for ind in np.random.choice(len(fake_A_pool), size=(BATCH_SIZE,), replace=False) ]
fake_B = [ fake_B_pool[ind] for ind in np.random.choice(len(fake_B_pool), size=(BATCH_SIZE,), replace=False) ]
fake_A = np.array(fake_A)
fake_B = np.array(fake_B)
for x in range(0, DISCRIMINATOR_ITERATIONS):
_, D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B = \
disc_trainer.train_on_batch(
[real_A, fake_A, real_B, fake_B],
[zeros, ones * 0.9, zeros, ones * 0.9] )
print("=====")
print("Discriminator loss:")
print("Real A: %s, Fake A: %s || Real B: %s, Fake B: %s " % ( D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B))
_, G_loss_fake_B, G_loss_fake_A, G_loss_rec_A, G_loss_rec_B = \
generator_trainer.train_on_batch(
[real_A, real_B],
[zeros, zeros, real_A, real_B])
print("=====")
print("Generator loss:")
print("Fake B: %s, Cyclic A: %s || Fake A: %s, Cyclic B: %s " % (G_loss_fake_B, G_loss_rec_A, G_loss_fake_A, G_loss_rec_B))
end = time.time()
print("Iteration time: %s s" % (end-start))
sys.stdout.flush()
if not (it % SAVE_IMAGES_INTERVAL ):
imgA = real_A
# print(imgA.shape)
imga2b = generator_AtoB.predict(imgA)
# print(imga2b.shape)
imga2b2a = generator_BtoA.predict(imga2b)
# print(imga2b2a.shape)
imgB = real_B
imgb2a = generator_BtoA.predict(imgB)
imgb2a2b = generator_AtoB.predict(imgb2a)
c = np.concatenate([imgA, imga2b, imga2b2a, imgB, imgb2a, imgb2a2b], axis=2).astype(np.uint8)
# print(c.shape)
x = Image.fromarray(c[0])
x.save("data/generated/iteration_%s.jpg" % str(it).zfill(4))
it+=1
generator_AtoB.save("models/generator_AtoB.h5")
generator_BtoA.save("models/generator_BtoA.h5")
I was having the same issue. Ankish's suggestion to try this with the tf.keras API solved it. I don't know why, but...
tf.keras.models.load_model("./saved_models/our_model.h5", compile=False)
works perfectly, while
keras.models.load_model("./saved_models/our_model.h5")
fails with the listed error here. The compile flag being set to false is just to hide a warning.
I would generalise Ankish and Josh's answers, and import everything from tensorflow keras API. First install Tensorflow 2 (pip install tensorflow
or pip install tensorflow-gpu
if using pip, detailed instructions here). Then, import tensorflow and replace your import statements by switching totensorflow.keras
on each of the keras
imports:
# ...
import numpy as np
import tensorflow as tf
import tf.keras as keras
from tf.keras.models import Sequential, Model
from tf.keras.layers import Dense, Flatten, Input, multiply, add as kadd
from tf.keras.layers import Conv2D, BatchNormalization, Conv2DTranspose
from tf.keras.layers import LeakyReLU, ReLU
from tf.keras.layers import Activation
from tf.keras.preprocessing.image import ImageDataGenerator
#...
With these changes, the rest of the code can remain unchanged.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With