I'm trying to implement a really simple neural network with backpropagation. I trying to train the network with the AND
logical operator. But the prediction it's not working for me fine. :(
public class ActivationFunction {
class func sigmoid(x: Float) -> Float {
return 1.0 / (1.0 + exp(-x))
}
class func dSigmoid(x: Float) -> Float {
return x * (1 - x)
}
}
public class NeuralNetConstants {
public static let learningRate: Float = 0.3
public static let momentum: Float = 0.6
public static let iterations: Int = 100000
}
public class Layer {
private var output: [Float]
private var input: [Float]
private var weights: [Float]
private var dWeights: [Float]
init(inputSize: Int, outputSize: Int) {
self.output = [Float](repeating: 0, count: outputSize)
self.input = [Float](repeating: 0, count: inputSize + 1)
self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
self.dWeights = [Float](repeating: 0, count: weights.count)
}
public func run(inputArray: [Float]) -> [Float] {
input = inputArray
input[input.count-1] = 1
var offSet = 0
for i in 0..<output.count {
for j in 0..<input.count {
output[i] += weights[offSet+j] * input[j]
}
output[i] = ActivationFunction.sigmoid(x: output[i])
offSet += input.count
}
return output
}
public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] {
var offset = 0
var nextError = [Float](repeating: 0, count: input.count)
for i in 0..<output.count {
let delta = error[i] * ActivationFunction.dSigmoid(x: output[i])
for j in 0..<input.count {
let weightIndex = offset + j
nextError[j] = nextError[j] + weights[weightIndex] * delta
let dw = input[j] * delta * learningRate
weights[weightIndex] += dWeights[weightIndex] * momentum + dw
dWeights[weightIndex] = dw
}
offset += input.count
}
return nextError
}
}
public class BackpropNeuralNetwork {
private var layers: [Layer] = []
public init(inputSize: Int, hiddenSize: Int, outputSize: Int) {
self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize))
self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize))
}
public func getLayer(index: Int) -> Layer {
return layers[index]
}
public func run(input: [Float]) -> [Float] {
var activations = input
for i in 0..<layers.count {
activations = layers[i].run(inputArray: activations)
}
return activations
}
public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) {
let calculatedOutput = run(input: input)
var error = [Float](repeating: 0, count: calculatedOutput.count)
for i in 0..<error.count {
error[i] = targetOutput[i] - calculatedOutput[i]
}
for i in (0...layers.count-1).reversed() {
error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum)
}
}
}
extension ClosedRange where Bound: FloatingPoint {
public func random() -> Bound {
let range = self.upperBound - self.lowerBound
let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound
return randomValue
}
}
This is my training data I just want that my network learn the simple AND
logical operator.
My input data:
let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ]
let traningResults: [[Float]] = [ [0], [0], [0], [1] ]
let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1)
for iterations in 0..<NeuralNetConstants.iterations {
for i in 0..<traningResults.count {
backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum)
}
for i in 0..<traningResults.count {
var t = traningData[i]
print("\(t[0]), \(t[1]) -- \(backProb.run(input: t)[0])")
}
}
This is my whole code for the neural network. The code is not really swifty but I think it's first more important to understand the theory about neural networks then the code will be more swifty.
The problem is that my results are completely wrong. This is what I get
0.0, 0.0 -- 0.246135
0.0, 1.0 -- 0.251307
1.0, 0.0 -- 0.24325
1.0, 1.0 -- 0.240923
This is what I want to get
0,0, 0,0 -- 0,000
0,0, 1,0 -- 0,005
1,0, 0,0 -- 0,005
1,0, 1,0 -- 0,992
Well for comparison the java implementation works fine..
public class ActivationFunction {
public static float sigmoid(float x) {
return (float) (1 / (1 + Math.exp(-x)));
}
public static float dSigmoid(float x) {
return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice
}
}
public class NeuralNetConstants {
private NeuralNetConstants() {
}
public static final float LEARNING_RATE = 0.3f;
public static final float MOMENTUM = 0.6f;
public static final int ITERATIONS = 100000;
}
public class Layer {
private float[] output;
private float[] input;
private float[] weights;
private float[] dWeights;
private Random random;
public Layer(int inputSize, int outputSize) {
output = new float[outputSize];
input = new float[inputSize + 1];
weights = new float[(1 + inputSize) * outputSize];
dWeights = new float[weights.length];
this.random = new Random();
initWeights();
}
public void initWeights() {
for (int i = 0; i < weights.length; i++) {
weights[i] = (random.nextFloat() - 0.5f) * 4f;
}
}
public float[] run(float[] inputArray) {
System.arraycopy(inputArray, 0, input, 0, inputArray.length);
input[input.length - 1] = 1; // bias
int offset = 0;
for (int i = 0; i < output.length; i++) {
for (int j = 0; j < input.length; j++) {
output[i] += weights[offset + j] * input[j];
}
output[i] = ActivationFunction.sigmoid(output[i]);
offset += input.length;
}
return Arrays.copyOf(output, output.length);
}
public float[] train(float[] error, float learningRate, float momentum) {
int offset = 0;
float[] nextError = new float[input.length];
for (int i = 0; i < output.length; i++) {
float delta = error[i] * ActivationFunction.dSigmoid(output[i]);
for (int j = 0; j < input.length; j++) {
int previousWeightIndex = offset + j;
nextError[j] = nextError[j] + weights[previousWeightIndex] * delta;
float dw = input[j] * delta * learningRate;
weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw;
dWeights[previousWeightIndex] = dw;
}
offset += input.length;
}
return nextError;
}
}
public class BackpropNeuralNetwork {
private Layer[] layers;
public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) {
layers = new Layer[2];
layers[0] = new Layer(inputSize, hiddenSize);
layers[1] = new Layer(hiddenSize, outputSize);
}
public Layer getLayer(int index) {
return layers[index];
}
public float[] run(float[] input) {
float[] inputActivation = input;
for (int i = 0; i < layers.length; i++) {
inputActivation = layers[i].run(inputActivation);
}
return inputActivation;
}
public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {
float[] calculatedOutput = run(input);
float[] error = new float[calculatedOutput.length];
for (int i = 0; i < error.length; i++) {
error[i] = targetOutput[i] - calculatedOutput[i];
}
for (int i = layers.length - 1; i >= 0; i--) {
error = layers[i].train(error, learningRate, momentum);
}
}
}
public class NeuralNetwork {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
float[][] trainingData = new float[][] {
new float[] { 0, 0 },
new float[] { 0, 1 },
new float[] { 1, 0 },
new float[] { 1, 1 }
};
float[][] trainingResults = new float[][] {
new float[] { 0 },
new float[] { 0 },
new float[] { 0 },
new float[] { 1 }
};
BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1);
for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) {
for (int i = 0; i < trainingResults.length; i++) {
backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i],
NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM);
}
System.out.println();
for (int i = 0; i < trainingResults.length; i++) {
float[] t = trainingData[i];
System.out.printf("%d epoch\n", iterations + 1);
System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]);
}
}
}
}
You are initializing your weights differently. You are creating one random value and use it often. What you want to do is to create a random value for each weight in the array: Replace
self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
with
self.weights = (0..<(1 + inputSize) * outputSize).map { _ in
return (-2.0...2.0).random()
}
Beside that: please consider to only override the first elements of your input in the Layer.run method. So instead of
input = inputArray
you should do this:
for (i, e) in inputArray {
self.input[i] = e
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With