I am trying to implement the resilient backpropagation optimizer for Keras (link), but the challenging part was being able to perform an update on each individual parameter based on whether its corresponding gradient is positive, negative or zero. I wrote the code below as a start towards implementing the Rprop optimizer.  However, I can't seem to find a way to access the parameters individually. Looping over params (as in the code below) returns p, g, g_old, s, wChangeOld at each iteration which are all matrices.
Is there a way where I could iterate over the individual parameters and update them ? It would also work if I could index the parameter vector based on the sign of its gradients.
class Rprop(Optimizer):
    def __init__(self, init_step=0.01, **kwargs):
        super(Rprop, self).__init__(**kwargs)
        self.init_step = K.variable(init_step, name='init_step')
        self.iterations = K.variable(0., name='iterations')
        self.posStep = 1.2
        self.negStep = 0.5
        self.minStep = 1e-6
        self.maxStep = 50.
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        shapes = [K.get_variable_shape(p) for p in params]
        stepList = [K.ones(shape)*self.init_step  for shape in shapes]
        wChangeOldList = [K.zeros(shape) for shape in shapes]
        grads_old = [K.zeros(shape) for shape in shapes]
        self.weights = stepList + grads_old + wChangeOldList
        self.updates = []
        for p, g, g_old, s, wChangeOld in zip(params, grads, grads_old, 
                                                                  stepList, wChangeOldList):
            change = K.sign(g * g_old)
            if change > 0:
                s_new = K.minimum(s * self.posStep, self.maxStep)
                wChange = s_new * K.sign(g)
                g_new = g
            elif change < 0:
                s_new = K.maximum(s * self.posStep, self.maxStep)
                wChange = - wChangeOld
                g_new = 0
            else:
                s_new = s
                wChange = s_new * K.sign(g)
                g_new = p
            self.updates.append(K.update(g_old, g_new))
            self.updates.append(K.update(wChangeOld, wChange))
            self.updates.append(K.update(s, s_new))
            new_p = p - wChange
            # Apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_config(self):
        config = {'init_step': float(K.get_value(self.init_step))}
        base_config = super(Rprop, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
                I was looking for an RProp algorithm in Keras as well and found this question. I took the liberty of adapting your code to my purpose and post it back here now. So far it seems to work quite well, but I didn't test it extensively.
Disclaimer: I'm very new to keras but have a lot of experience with theano (and blocks). Further I tested this only with theano as a backend, but not tensorflow.
class RProp(Optimizer):
    def __init__(self, init_alpha=1e-3, scale_up=1.2, scale_down=0.5, min_alpha=1e-6, max_alpha=50., **kwargs):
        super(RProp, self).__init__(**kwargs)
        self.init_alpha = K.variable(init_alpha, name='init_alpha')
        self.scale_up = K.variable(scale_up, name='scale_up')
        self.scale_down = K.variable(scale_down, name='scale_down')
        self.min_alpha = K.variable(min_alpha, name='min_alpha')
        self.max_alpha = K.variable(max_alpha, name='max_alpha')
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        alphas = [K.variable(numpy.ones(shape) * self.init_alpha) for shape in shapes]
        old_grads = [K.zeros(shape) for shape in shapes]
        self.weights = alphas + old_grads
        self.updates = []
        for param, grad, old_grad, alpha in zip(params, grads, old_grads, alphas):
            new_alpha = K.switch(
                K.greater(grad * old_grad, 0),
                K.minimum(alpha * self.scale_up, self.max_alpha),
                K.maximum(alpha * self.scale_down, self.min_alpha)
            )
            new_param = param - K.sign(grad) * new_alpha
            # Apply constraints
            if param in constraints:
                c = constraints[param]
                new_param = c(new_param)
            self.updates.append(K.update(param, new_param))
            self.updates.append(K.update(alpha, new_alpha))
            self.updates.append(K.update(old_grad, grad))
        return self.updates
    def get_config(self):
        config = {
            'init_alpha': float(K.get_value(self.init_alpha)),
            'scale_up': float(K.get_value(self.scale_up)),
            'scale_down': float(K.get_value(self.scale_down)),
            'min_alpha': float(K.get_value(self.min_alpha)),
            'max_alpha': float(K.get_value(self.max_alpha)),
        }
        base_config = super(RProp, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Important notes:
A few comments about your code (referring to your original variable names):
wChange is never used across iterations, so you don't need to store those in permanent variables.change > 0 does not do what you think it does because change is a tensor variable.  What you want here is a element-wise comparison, use K.switch() instead.maxStep twice instead of using minStep the other time.change is zero is negligible, since that almost never happens in practice.g_new = 0 and g_new = p are both completely bogus and should be g_new = g as in the first if branch.I'm new to keras and Python but I modified the code above for my purposes a bit.
It is incredibly fast and simple algorithm due to using full-batch learning and partial derivatives. In my tests it outperformed all other backpropagation algorithms, including Adam. I tested it with Tensorflow and CNTK as a backend.
Modified Rprop without Weight-Backtracking: https://pdfs.semanticscholar.org/df9c/6a3843d54a28138a596acc85a96367a064c2.pdf
class iRprop_(Optimizer):
def __init__(self, init_alpha=0.01, scale_up=1.2, scale_down=0.5, min_alpha=0.00001, max_alpha=50., **kwargs):
    super(iRprop_, self).__init__(**kwargs)
    self.init_alpha = K.variable(init_alpha, name='init_alpha')
    self.scale_up = K.variable(scale_up, name='scale_up')
    self.scale_down = K.variable(scale_down, name='scale_down')
    self.min_alpha = K.variable(min_alpha, name='min_alpha')
    self.max_alpha = K.variable(max_alpha, name='max_alpha')
def get_updates(self, params, loss):
    grads = self.get_gradients(loss, params)
    shapes = [K.get_variable_shape(p) for p in params]
    alphas = [K.variable(K.ones(shape) * self.init_alpha) for shape in shapes]
    old_grads = [K.zeros(shape) for shape in shapes]
    self.weights = alphas + old_grads
    self.updates = []
    for p, grad, old_grad, alpha in zip(params, grads, old_grads, alphas):
        grad = K.sign(grad)
        new_alpha = K.switch(
            K.greater(grad * old_grad, 0),
            K.minimum(alpha * self.scale_up, self.max_alpha),
            K.switch(K.less(grad * old_grad, 0),K.maximum(alpha * self.scale_down, self.min_alpha),alpha)    
        )
        grad = K.switch(K.less(grad * old_grad, 0),K.zeros_like(grad),grad)
        new_p = p - grad * new_alpha 
        # Apply constraints.
        if getattr(p, 'constraint', None) is not None:
            new_p = p.constraint(new_p)
        self.updates.append(K.update(p, new_p))
        self.updates.append(K.update(alpha, new_alpha))
        self.updates.append(K.update(old_grad, grad))
    return self.updates
def get_config(self):
    config = {
        'init_alpha': float(K.get_value(self.init_alpha)),
        'scale_up': float(K.get_value(self.scale_up)),
        'scale_down': float(K.get_value(self.scale_down)),
        'min_alpha': float(K.get_value(self.min_alpha)),
        'max_alpha': float(K.get_value(self.max_alpha)),
    }
    base_config = super(iRprop_, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With