Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to store result of an operation (like TOPK) per epoch in keras

I have written a custom layer in keras. in part of this custom layer lets say I have a matrix like this:

c = tf.cast(tf.nn.top_k(tf.nn.top_k(n, tf.shape(n)[1])[1][:, ::-1], tf.shape(n)[1])[1][:, ::-1], dtype=tf.float32)

My question is that How can I track the values of result of this per epoch?

for example, if I have 20 epoch, I need to have 20 of this matrix saved in a csv file.

(I know how to save the weights of the model but this one is the result of a middle layer operation and I need to keep track of this matrix).

what I have done:

This is the structure of my layer:

class my_layer(Layer):
    def __init__(self, topk, ctype, **kwargs):
    self.x_prev = None
    self.topk_mat = None

   def call(self, x):
     'blah blah'

   def get_config(self):
      'blah blah'

   def k_comp_tanh(self,x, f=6):
     'blah blah'
      if self.topk_mat is None:
            self.topk_mat = self.add_weight(shape=(20, 25),
                                          initializer='zeros',
                                          trainable=False,
                                          # dtype=tf.float32,
                                          name='topk_mat')

     c = tf.cast(tf.nn.top_k(tf.nn.top_k(n, tf.shape(n)[1])[1][:, ::-1], tf.shape(n)[1])[1][:, ::-1], dtype=tf.float32)
     self.topk_mat.assign(c)

Code for building the model and fitting on the data:

class AutoEncoder(object):
def __init__(self, input_size, dim, comp_topk=None, ctype=None, save_model='best_model'):
    self.input_size = input_size
    self.dim = dim
    self.comp_topk = comp_topk
    self.ctype = ctype
    self.save_model = save_model
    self.build()

def build(self):
    input_layer = Input(shape=(self.input_size,))
    encoded_layer = Dense(self.dim, activation=act, kernel_initializer="glorot_normal", name="Encoded_Layer")
    encoded = encoded_layer(input_layer)
    encoder_model = Model(outputs=encoded, inputs=input_layer)
    encoder_model.save('pathto/encoder_model')

    self.encoded_instant = my_layer(self.comp_topk, self.ctype)
    encoded = self.encoded_instant(encoded)
    decoded = Dense_tied(self.input_size, activation='sigmoid',tied_to=encoded_layer, name='Decoded_Layer')(encoded)

    # this model maps an input to its reconstruction
    self.autoencoder = Model(outputs=decoded, inputs=input_layer)

    # this model maps an input to its encoded representation
    self.encoder = Model(outputs=encoded, inputs=input_layer)

    # create a placeholder for an encoded input
    encoded_input = Input(shape=(self.dim,))
    # retrieve the last layer of the autoencoder model
    decoder_layer = self.autoencoder.layers[-1]
    # create the decoder model
    self.decoder = Model(outputs=decoder_layer(encoded_input), inputs=encoded_input)

def fit(self, train_X, val_X, nb_epoch=50, batch_size=100, contractive=None):
    import tensorflow as tf
    optimizer = Adam(lr=0.0005)

    self.autoencoder.compile(optimizer=optimizer, loss='binary_crossentropy') # kld, binary_crossentropy, mse

    cbk = tf.keras.callbacks.LambdaCallback(
        on_epoch_begin=lambda epoch, logs: np.savetxt("foo.csv", tf.keras.backend.eval(self.encoded_instant.topk_mat), delimiter=","))
    self.autoencoder.fit(train_X[0], train_X[1],
                    epochs=nb_epoch,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(val_X[0], val_X[1]),
                    callbacks=[
                                ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=1, mode='auto'),
                                cbk,
                  save_best_only=True, mode='auto')
                                CustomModelCheckpoint(custom_model=self.encoder, filepath="pathtocheckpoint/{epoch}.hdf5",save_best_only=True,  monitor='val_loss', mode='auto')
                    ]
                    )

    return self


cbk = tf.keras.callbacks.LambdaCallback(
    on_epoch_begin=lambda epoch, logs: np.savetxt("mycsvtopk.csv", tf.keras.backend.eval(my_layer.topk_mat, delimiter=",")))
                                       )
self.autoencoder.fit(train_X[0], train_X[1],
                epochs=nb_epoch,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(val_X[0], val_X[1]),
                callbacks=[cbk,CustomModelCheckpoint(custom_model=self.encoder, filepath="path_to_file/{epoch}.hdf5",save_best_only=True,  monitor='val_loss', mode='auto')
                    ]
                    )
 

and this is where I call the Autoencoder class

ae = AutoEncoder(n_vocab, args.n_dim, comp_topk=args.comp_topk, ctype=args.ctype, save_model=args.save_model)
ae.fit([X_train_noisy, X_train], [X_val_noisy, X_val], nb_epoch=args.n_epoch, \
        batch_size=args.batch_size, contractive=args.contractive)

It raises error:

tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value mylayer_1/topk_mat
     [[{{node _retval_mylayer_1/topk_mat_0_0}} = _Retval[T=DT_FLOAT, index=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](mylayer_1/topk_mat)]]
Exception TypeError: TypeError("'NoneType' object is not callable",) in <bound method Session.__del__ of <tensorflow.python.client.session.Session object at 0x7f56ae01bc50>> ignored

The examples I see with CustomCallback all are related to metric already model is aware of like loss, accuracy, ... What I have done above based on @Jhadi idea is to save the result of this in one variable initially initialized with None, and then in the fitting part pass this variable to save it in a csv format. This seems has to work though I am getting this error and have tried many ways to fix it but no success. It seems to me like a Keras library issue.

like image 894
sariii Avatar asked Jun 08 '20 04:06

sariii


1 Answers

I think you could save the variable using a list-tracking Checkpoint.

you need to add code in the training so you need to code your training loop and save the variable at the end of each epoch.

def fit_and_save_log(self, train_X, val_X, nb_epoch=50, batch_size=100, contractive=None):
    import tensorflow as tf
    optimizer = Adam(lr=0.0005)

    self.autoencoder.compile(optimizer=optimizer, loss='binary_crossentropy') # kld, binary_crossentropy, mse   
    
    save = tf.train.Checkpoint()
    save.listed = []
    
    # Prepare dataset
    X, y = train_X
    train_ds = tf.data.Dataset.from_tensor_slices((x, y))
    train_ds = train_ds.shuffle(10000)
    train_ds = train_ds.batch(batch_size)
    iterator = train_ds.make_initializable_iterator()
    next_batch = iterator.get_next()

    for epoch in range(nb_epoch):
        sess.run(iterator.initializer)           
        
        while True:
            try:
                self.autoencoder.train_on_batch(next_batch[0], next_batch[1])
            except tf.errors.OutOfRangeError:
                break
        
        save.listed.append(self.encoded_instant.topk_mat)

        # you can compute validation results here 

    save_path = save.save('./topk_mat_log', session=tf.keras.backend.get_session())
    return self

Or you can use the model.fit function if you prefer it. Doing it this way can be easier, as we do not need to care about creating the batches. However, repeatedly calling model.fit may result in memory leak. You can give it a try and check how it behaves. [1]

def fit_and_save_log(self, train_X, val_X, nb_epoch=50, batch_size=100, contractive=None):
    import tensorflow as tf
    optimizer = Adam(lr=0.0005)

    self.autoencoder.compile(optimizer=optimizer, loss='binary_crossentropy') # kld, binary_crossentropy, mse   
    
    save = tf.train.Checkpoint()
    save.listed = []
    
    for epoch in range(nb_epoch):
        self.autoencoder.fit(train_X[0], train_X[1],
                epochs=1,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(val_X[0], val_X[1]))
        
        save.listed.append(self.encoded_instant.topk_mat)

        # you can compute validation results here 

    save_path = save.save('./topk_mat_log', session=tf.keras.backend.get_session())
    return self

Then you can restore the saved variable like this

restore = tf.train.Checkpoint()
restore.restore(save_path)
restore.listed = []
v1 = tf.Variable(0.)
restore.listed.append(v1) # Now v1 corresponds with topk_mat in the first epoch
like image 159
Pedrolarben Avatar answered Nov 15 '22 06:11

Pedrolarben