I'm trying to implement an unsupervised ANN using Hebbian updating in Keras. I found a custom Hebbian layer made by Dan Saunders here - https://github.com/djsaunde/rinns_python/blob/master/hebbian/hebbian.py (I hope it is not poor form to ask questions about another person's code here)
In the examples I found using this layer in the repo, this layer is used as an intermediate layer between Dense/Conv layers, but I would like to construct a network using only Hebbian layers.
Two critical things are confusing me in this implementation:
It seems as though input dims and output dims must be the same for this layer to work. Why would this be the case and what can I do to make it so they can be different?
Why is the diagonal of the weight matrix set to zero? It says this is to "ensure that no neuron is laterally connected to itself", but I thought the connection weights were between the previous layer and the current layer, not the current layer and itself.
Here is the code for the Hebbian Layer Implementation:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
np.set_printoptions(threshold=np.nan)
sess = tf.Session()
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
if self.connectivity == 'random':
self.B = np.random.random(self.output_dim) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# ensure that no neuron is laterally connected to itself
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x):
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
return K.reshape(activations, x_shape)
At first inspection I expected this layer to be able to take inputs from a previous layer, perform a simple activation calculation (input * weight), update the weights according to Hebbian updating (something like - if activation is high b/t nodes, increase weight), then pass the activations to the next layer.
I also expected that it would be able to deal with decreasing/increasing the number of nodes from one layer to the next.
Instead, I cannot seem to figure out why the input and output dims must be the same and why the diagonals of the weight matrix are set to zero.
Where in the code (implicitly or explicitly) is the specification that the layers need to be the same dims?
Where in the code (implicitly or explicitly) is the specification that this layer's weight matrix is connecting the current layer to itself?
Apologies if this Q should have been separated into 2, but it seems like they may be related to e/o so I kept them as 1.
Happy to provide more details if needed.
Edit: Realized I forgot to add the error message I get when I try to create a layer with different output dims than the input dims:
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 256))
This compiles w/o error ^
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 24))
This ^ throws the error: IndexError: boolean index did not match indexed array along dimension 0; dimension is 256 but corresponding boolean dimension is 24
Okay I think I maybe figured it out, sort of. There were many small problems but the biggest thing was I needed to add the compute_output_shape function which makes the layer able to modify the shape of its input as explained here: https://keras.io/layers/writing-your-own-keras-layers/
So here is the code with all the changes I made. It will compile and modify the input shape just fine. Note that this layer computes weight changes inside the layer itself and there may be some issues with that if you try to actually use the layer (I'm still ironing these out), but this is a separate issue.
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'random':
self.B = np.random.random(input_shape[0]) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x): # x is the input to the network
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
return K.reshape(activations, x_shape)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With