Im creating an Autoencoder as part of my full model for a Kaggle competition. Im trying to tie the weighs of the Encoder, transposed to the Decoder. Before the first Epoch the weights are correctly sync, after that, the Decoder weights just freeze, and dont keep up with the Encoder weights that are being updated by the Gradient Descent.
I look for 12 hours in almost every post about this problem i reach on google, no one seems to have the answer for my case. The closest one is this Tying Autoencoder Weights in a Dense Keras Layer but the problem was solved by not using a variable tensor as kernel, but im already not using that type of tensor as my Decoder kernel, so was no usefull.
Im using a DenseTied Keras custom Layer class defined in this article https://towardsdatascience.com/build-the-right-autoencoder-tune-and-optimize-using-pca-principles-part-ii-24b9cca69bd6, is exactly the same, just change the way i reference the Keras backed for suit my imports style.
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
This is the custom layer definition
class DenseTied(tf.keras.layers.Layer):
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
tied_to=None,
**kwargs):
self.tied_to = tied_to
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super().__init__(**kwargs)
self.units = units
self.activation = tf.keras.activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
self.bias_initializer = tf.keras.initializers.get(bias_initializer)
self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
self.activity_regularizer = tf.keras.regularizers.get(activity_regularizer)
self.kernel_constraint = tf.keras.constraints.get(kernel_constraint)
self.bias_constraint = tf.keras.constraints.get(bias_constraint)
self.input_spec = tf.keras.layers.InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
if self.tied_to is not None:
self.kernel = tf.keras.backend.transpose(self.tied_to.kernel)
self.non_trainable_weights.append(self.kernel)
else:
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = tf.keras.layers.InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def call(self, inputs):
output = tf.keras.backend.dot(inputs, self.kernel)
if self.use_bias:
output = tf.keras.backend.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
And this the model training and testing with a dummy data set
rand_samples = np.random.rand(16, 51)
dummy_ds = tf.data.Dataset.from_tensor_slices((rand_samples, rand_samples)).shuffle(16).batch(16)
encoder = tf.keras.layers.Dense(1, activation="linear", input_shape=(51,), use_bias=True)
decoder = DenseTied(51, activation="linear", tied_to=encoder, use_bias=True)
autoencoder = tf.keras.Sequential()
autoencoder.add(encoder)
autoencoder.add(decoder)
autoencoder.compile(metrics=['accuracy'],
loss='mean_squared_error',
optimizer='sgd')
autoencoder.summary()
print("Encoder Kernel Before 1 Epoch", encoder.kernel[0])
print("Decoder Kernel Before 1 Epoch", decoder.kernel[0][0])
autoencoder.fit(dummy_ds, epochs=1)
print("Encoder Kernel After 1 Epoch", encoder.kernel[0])
print("Decoder Kernel After 1 Epoch", decoder.kernel[0][0])
The Expected output is have the two kernels exactly the same in the first element (Print just one weight for simplicity)
The current output shows that Decoder Kernel is not updated as the same as the Transposed Encoder Kernel
2019-09-06 14:55:42.070003: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library nvcuda.dll
2019-09-06 14:55:42.984580: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties:
name: GeForce GTX 1060 major: 6 minor: 1 memoryClockRate(GHz): 1.733
pciBusID: 0000:01:00.0
2019-09-06 14:55:43.088109: I tensorflow/stream_executor/platform/default/dlopen_checker_stub.cc:25] GPU libraries are statically linked, skip dlopen check.
2019-09-06 14:55:43.166145: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
2019-09-06 14:55:43.203865: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2019-09-06 14:55:43.277988: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties:
name: GeForce GTX 1060 major: 6 minor: 1 memoryClockRate(GHz): 1.733
pciBusID: 0000:01:00.0
2019-09-06 14:55:43.300888: I tensorflow/stream_executor/platform/default/dlopen_checker_stub.cc:25] GPU libraries are statically linked, skip dlopen check.
2019-09-06 14:55:43.309040: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
2019-09-06 14:55:44.077814: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-09-06 14:55:44.094542: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187] 0
2019-09-06 14:55:44.099411: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0: N
2019-09-06 14:55:44.103424: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4712 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1) 52
_________________________________________________________________
dense_tied (DenseTied) (None, 51) 103
=================================================================
Total params: 103
Trainable params: 103
Non-trainable params: 0
_________________________________________________________________
Encoder Kernel Before 1 Epoch tf.Tensor([0.20486075], shape=(1,), dtype=float32)
Decoder Kernel Before 1 Epoch tf.Tensor(0.20486075, shape=(), dtype=float32)
1/1 [==============================] - 1s 657ms/step - loss: 0.3396 - accuracy: 0.0000e+00
Encoder Kernel After 1 Epoch tf.Tensor([0.20530733], shape=(1,), dtype=float32)
Decoder Kernel After 1 Epoch tf.Tensor(0.20486075, shape=(), dtype=float32)
PS C:\Users\whitm\Desktop\CodeProjects\ForestClassifier-DEC>
i dont see what im doing wrong.
An example: This constructs an autoencoder with an input layer (Keras’s built-in Inputlayer) and single DenseLayerAutoencoderwhich is actually 5 hidden layers and the output layer all in the same layer (3 encoder layers of sizes 100, 50, and 20, followed by 2 decoder layers of widths 50 and 100, followed by the output of size 1000).
The autoencoder orchestrates to train both encoder and decoder models. We'll define the encoder starting from the input layer. Encoder contains the Dense layer and ReLU leaky activations. The last output layer defines the latent vector size.
Finally, create the model by using Keras model () function for encoder_inputs i.e., input tensor and encoder hidden states state_h_enc and state_c_enc as output tensor. Now build the model for the decoder. Create two reverse-lookup token indexes to decode the sequence to make it readable. Next, create a predict function named decode_sequence.
ORCID Implementing tied-weights autoencoders in Keras 4 minute read On this page Definitions References Before we had ReLUs, batch normalization, and today’s GPU training capabilities, denoising autoencoders (DAEs) [1] were a creative method by which we could train deeper nets.
To tie the weights, I would suggest using the Keras functional API which enables to share layers. That said, here is an alternative implementation that ties the weights between the encoder and decoder:
class TransposableDense(tf.keras.layers.Dense):
def __init__(self, units, **kwargs):
super().__init__(units, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.t_output_dim = input_dim
self.kernel = self.add_weight(shape=(int(input_dim), self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
self.bias_t = self.add_weight(shape=(input_dim,),
initializer=self.bias_initializer,
name='bias_t',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.bias_t = None
# self.input_spec = tf.keras.layers.InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs, transpose=False):
bs, input_dim = inputs.get_shape()
kernel = self.kernel
bias = self.bias
if transpose:
assert input_dim == self.units
kernel = tf.keras.backend.transpose(kernel)
bias = self.bias_t
output = tf.keras.backend.dot(inputs, kernel)
if self.use_bias:
output = tf.keras.backend.bias_add(output, bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
bs, input_dim = input_shape
output_dim = self.units
if input_dim == self.units:
output_dim = self.t_output_dim
return bs, output_dim
The kernel of this dense layer can be transposed by calling the layer with transpose=True
. Note that this might break some basic Keras principles (e.g. the layer has multiple output shapes), but it should work for your case.
Here is an example showing how you can use it to define your model:
a = tf.keras.layers.Input((51,))
dense = TransposableDense(1, activation='linear', use_bias=True)
encoder_out = dense(a)
decoder_out = dense(encoder_out, transpose=True)
encoder = tf.keras.Model(a, encoder_out)
autoencoder = tf.keras.Model(a, decoder_out)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With