Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Simple Neural Network with backpropagation in Swift

I'm trying to implement a really simple neural network with backpropagation. I trying to train the network with the AND logical operator. But the prediction it's not working for me fine. :(

    public class ActivationFunction {

        class func sigmoid(x: Float) -> Float {
            return 1.0 / (1.0 + exp(-x))
        }

        class func dSigmoid(x: Float) -> Float {
            return x * (1 - x)
        }
    }

    public class NeuralNetConstants {

        public static let learningRate: Float = 0.3
        public static let momentum: Float = 0.6
        public static let iterations: Int = 100000

    }

public class Layer {

    private var output: [Float]
    private var input: [Float]
    private var weights: [Float]
    private var dWeights: [Float]

    init(inputSize: Int, outputSize: Int) {
        self.output = [Float](repeating: 0, count: outputSize)
        self.input = [Float](repeating: 0, count: inputSize + 1)
        self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
        self.dWeights = [Float](repeating: 0, count: weights.count)
    }

    public func run(inputArray: [Float]) -> [Float] {

        input =  inputArray
        input[input.count-1] = 1
        var offSet = 0

        for i in 0..<output.count {
            for j in 0..<input.count {
                output[i] += weights[offSet+j] * input[j]
            }

            output[i] = ActivationFunction.sigmoid(x: output[i])
            offSet += input.count

        }

        return output
    }

    public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] {

        var offset = 0
        var nextError = [Float](repeating: 0, count: input.count)

        for i in 0..<output.count {

            let delta = error[i] * ActivationFunction.dSigmoid(x: output[i])

            for j in 0..<input.count {
                let weightIndex = offset + j
                nextError[j] = nextError[j] + weights[weightIndex] * delta
                let dw = input[j] * delta * learningRate
                weights[weightIndex] += dWeights[weightIndex] * momentum + dw
                dWeights[weightIndex] = dw
            }

            offset += input.count
        }

        return nextError
    }

}

public class BackpropNeuralNetwork {

    private var layers: [Layer] = []

    public init(inputSize: Int, hiddenSize: Int, outputSize: Int) {
        self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize))
        self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize))
    }

    public func getLayer(index: Int) -> Layer {
        return layers[index]
    }

    public func run(input: [Float]) -> [Float] {

        var activations = input

        for i in 0..<layers.count {
            activations = layers[i].run(inputArray: activations)
        }

        return activations
    }

    public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) {

        let calculatedOutput = run(input: input)
        var error = [Float](repeating: 0, count: calculatedOutput.count)

        for i in 0..<error.count {
            error[i] = targetOutput[i] - calculatedOutput[i]
        }

        for i in (0...layers.count-1).reversed() {
            error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum)
        }


    }


}

extension ClosedRange where Bound: FloatingPoint {
    public func random() -> Bound {
        let range = self.upperBound - self.lowerBound
        let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound
        return randomValue
    }
}

This is my training data I just want that my network learn the simple AND logical operator.

My input data:

let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ]

let traningResults: [[Float]] = [ [0], [0], [0], [1] ]

let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1)

for iterations in 0..<NeuralNetConstants.iterations {

    for i in 0..<traningResults.count {
        backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum)
    }

    for i in 0..<traningResults.count {
        var t = traningData[i]
        print("\(t[0]), \(t[1])  -- \(backProb.run(input: t)[0])")
    }

}

This is my whole code for the neural network. The code is not really swifty but I think it's first more important to understand the theory about neural networks then the code will be more swifty.

The problem is that my results are completely wrong. This is what I get

0.0, 0.0  -- 0.246135
0.0, 1.0  -- 0.251307
1.0, 0.0  -- 0.24325
1.0, 1.0  -- 0.240923

This is what I want to get

0,0, 0,0 -- 0,000
0,0, 1,0 -- 0,005
1,0, 0,0 -- 0,005
1,0, 1,0 -- 0,992

Well for comparison the java implementation works fine..

public class ActivationFunction {

    public static float sigmoid(float x) {
        return (float) (1 / (1 + Math.exp(-x)));
    }

    public static float dSigmoid(float x) {
        return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice
    }
}

public class NeuralNetConstants {

    private NeuralNetConstants() {

    }

    public static final float LEARNING_RATE = 0.3f;
    public static final float MOMENTUM = 0.6f;
    public static final int ITERATIONS = 100000;
}

public class Layer {

    private float[] output;
    private float[] input;
    private float[] weights;
    private float[] dWeights;
    private Random random;

    public Layer(int inputSize, int outputSize) {
        output = new float[outputSize];
        input = new float[inputSize + 1];
        weights = new float[(1 + inputSize) * outputSize];
        dWeights = new float[weights.length];
        this.random = new Random();
        initWeights();
    }

    public void initWeights() {
        for (int i = 0; i < weights.length; i++) {
            weights[i] = (random.nextFloat() - 0.5f) * 4f;
        }
    }

    public float[] run(float[] inputArray) {

        System.arraycopy(inputArray, 0, input, 0, inputArray.length);
        input[input.length - 1] = 1; // bias
        int offset = 0;

        for (int i = 0; i < output.length; i++) {
            for (int j = 0; j < input.length; j++) {
                output[i] += weights[offset + j] * input[j];
            }
            output[i] = ActivationFunction.sigmoid(output[i]);
            offset += input.length;
        }

        return Arrays.copyOf(output, output.length);
    }

    public float[] train(float[] error, float learningRate, float momentum) {

        int offset = 0;
        float[] nextError = new float[input.length];

        for (int i = 0; i < output.length; i++) {

            float delta = error[i] * ActivationFunction.dSigmoid(output[i]); 
            for (int j = 0; j < input.length; j++) {
                int previousWeightIndex = offset + j;
                nextError[j] = nextError[j] + weights[previousWeightIndex] * delta;
                float dw = input[j] * delta * learningRate;
                weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw;
                dWeights[previousWeightIndex] = dw;
            }

            offset += input.length;
        }

        return nextError;
    }
}

public class BackpropNeuralNetwork {

    private Layer[] layers;

    public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) {
        layers = new Layer[2];
        layers[0] = new Layer(inputSize, hiddenSize);
        layers[1] = new Layer(hiddenSize, outputSize);
    }

    public Layer getLayer(int index) {
        return layers[index];
    }

    public float[] run(float[] input) {
        float[] inputActivation = input;
        for (int i = 0; i < layers.length; i++) {
            inputActivation = layers[i].run(inputActivation);
        }
        return inputActivation;
    }

    public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {

        float[] calculatedOutput = run(input);
        float[] error = new float[calculatedOutput.length];

        for (int i = 0; i < error.length; i++) {
            error[i] = targetOutput[i] - calculatedOutput[i]; 
        }

        for (int i = layers.length - 1; i >= 0; i--) {
            error = layers[i].train(error, learningRate, momentum);
        }
    }
}

public class NeuralNetwork {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
                float[][] trainingData = new float[][] { 
                new float[] { 0, 0 }, 
                new float[] { 0, 1 }, 
                new float[] { 1, 0 },
                new float[] { 1, 1 } 
        };

        float[][] trainingResults = new float[][] {
                new float[] { 0 }, 
                new float[] { 0 }, 
                new float[] { 0 },
                new float[] { 1 } 
        };

        BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1);

        for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) {

            for (int i = 0; i < trainingResults.length; i++) {
                backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i],
                        NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM);
            }

            System.out.println();
            for (int i = 0; i < trainingResults.length; i++) {
                float[] t = trainingData[i];
                System.out.printf("%d epoch\n", iterations + 1);
                System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]);
            }
        }
    }

}
like image 376
BilalReffas Avatar asked Mar 21 '17 23:03

BilalReffas


1 Answers

You are initializing your weights differently. You are creating one random value and use it often. What you want to do is to create a random value for each weight in the array: Replace

self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)

with

self.weights = (0..<(1 + inputSize) * outputSize).map { _ in
  return (-2.0...2.0).random()
}

Beside that: please consider to only override the first elements of your input in the Layer.run method. So instead of

input =  inputArray

you should do this:

for (i, e) in inputArray {
  self.input[i] = e
}
like image 100
Ben Avatar answered Oct 10 '22 19:10

Ben