Here is my perceptron implementation in ANSI C:
#include <stdio.h> #include <stdlib.h> #include <math.h> float randomFloat() { srand(time(NULL)); float r = (float)rand() / (float)RAND_MAX; return r; } int calculateOutput(float weights[], float x, float y) { float sum = x * weights[0] + y * weights[1]; return (sum >= 0) ? 1 : -1; } int main(int argc, char *argv[]) { // X, Y coordinates of the training set. float x[208], y[208]; // Training set outputs. int outputs[208]; int i = 0; // iterator FILE *fp; if ((fp = fopen("test1.txt", "r")) == NULL) { printf("Cannot open file.\n"); } else { while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) { if (outputs[i] == 0) { outputs[i] = -1; } printf("%f %f %d\n", x[i], y[i], outputs[i]); i++; } } system("PAUSE"); int patternCount = sizeof(x) / sizeof(int); float weights[2]; weights[0] = randomFloat(); weights[1] = randomFloat(); float learningRate = 0.1; int iteration = 0; float globalError; do { globalError = 0; int p = 0; // iterator for (p = 0; p < patternCount; p++) { // Calculate output. int output = calculateOutput(weights, x[p], y[p]); // Calculate error. float localError = outputs[p] - output; if (localError != 0) { // Update weights. for (i = 0; i < 2; i++) { float add = learningRate * localError; if (i == 0) { add *= x[p]; } else if (i == 1) { add *= y[p]; } weights[i] += add; } } // Convert error to absolute value. globalError += fabs(localError); printf("Iteration %d Error %.2f %.2f\n", iteration, globalError, localError); iteration++; } system("PAUSE"); } while (globalError != 0); system("PAUSE"); return 0; }
The training set I'm using: Data Set
I have removed all irrelevant code. Basically what it does now it reads test1.txt
file and loads values from it to three arrays: x
, y
, outputs
.
Then there is a perceptron learning algorithm which, for some reason, is not converging to 0 (globalError
should converge to 0) and therefore I get an infinite do while loop.
When I use a smaller training set (like 5 points), it works pretty well. Any ideas where could be the problem?
I wrote this algorithm very similar to this C# Perceptron algorithm:
EDIT:
Here is an example with a smaller training set:
#include <stdio.h> #include <stdlib.h> #include <math.h> float randomFloat() { float r = (float)rand() / (float)RAND_MAX; return r; } int calculateOutput(float weights[], float x, float y) { float sum = x * weights[0] + y * weights[1]; return (sum >= 0) ? 1 : -1; } int main(int argc, char *argv[]) { srand(time(NULL)); // X coordinates of the training set. float x[] = { -3.2, 1.1, 2.7, -1 }; // Y coordinates of the training set. float y[] = { 1.5, 3.3, 5.12, 2.1 }; // The training set outputs. int outputs[] = { 1, -1, -1, 1 }; int i = 0; // iterator FILE *fp; system("PAUSE"); int patternCount = sizeof(x) / sizeof(int); float weights[2]; weights[0] = randomFloat(); weights[1] = randomFloat(); float learningRate = 0.1; int iteration = 0; float globalError; do { globalError = 0; int p = 0; // iterator for (p = 0; p < patternCount; p++) { // Calculate output. int output = calculateOutput(weights, x[p], y[p]); // Calculate error. float localError = outputs[p] - output; if (localError != 0) { // Update weights. for (i = 0; i < 2; i++) { float add = learningRate * localError; if (i == 0) { add *= x[p]; } else if (i == 1) { add *= y[p]; } weights[i] += add; } } // Convert error to absolute value. globalError += fabs(localError); printf("Iteration %d Error %.2f\n", iteration, globalError); } iteration++; } while (globalError != 0); // Display network generalisation. printf("X Y Output\n"); float j, k; for (j = -1; j <= 1; j += .5) { for (j = -1; j <= 1; j += .5) { // Calculate output. int output = calculateOutput(weights, j, k); printf("%.2f %.2f %s\n", j, k, (output == 1) ? "Blue" : "Red"); } } // Display modified weights. printf("Modified weights: %.2f %.2f\n", weights[0], weights[1]); system("PAUSE"); return 0; }
Yes, the perceptron learning algorithm is a linear classifier. If your data is separable by a hyperplane, then the perceptron will always converge. It will never converge if the data is not linearly separable.
Perceptron Convergence theorem states that a classifier for two linearly separable classes of patterns is always trainable in a finite number of training steps. In summary, the training of a single discrete perceptron two class classifier requires a change of weights if and only if a misclassification occurs.
Perceptron Convergence Theorem: For any finite set of linearly separable labeled examples, the Perceptron Learning Algorithm will halt after a finite number of iterations. In other words, after a finite number of iterations, the algorithm yields a vector w that classifies perfectly all the examples.
In your current code, the perceptron successfully learns the direction of the decision boundary BUT is unable to translate it.
y y ^ ^ | - + \\ + | - \\ + + | - +\\ + + | - \\ + + + | - - \\ + | - - \\ + | - - + \\ + | - - \\ + + ---------------------> x --------------------> x stuck like this need to get like this
(as someone pointed out, here is a more accurate version)
The problem lies in the fact that your perceptron has no bias term, i.e. a third weight component connected to an input of value 1.
w0 ----- x ---->| | | f |----> output (+1/-1) y ---->| | w1 ----- ^ w2 1(bias) ---|
The following is how I corrected the problem:
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> #define LEARNING_RATE 0.1 #define MAX_ITERATION 100 float randomFloat() { return (float)rand() / (float)RAND_MAX; } int calculateOutput(float weights[], float x, float y) { float sum = x * weights[0] + y * weights[1] + weights[2]; return (sum >= 0) ? 1 : -1; } int main(int argc, char *argv[]) { srand(time(NULL)); float x[208], y[208], weights[3], localError, globalError; int outputs[208], patternCount, i, p, iteration, output; FILE *fp; if ((fp = fopen("test1.txt", "r")) == NULL) { printf("Cannot open file.\n"); exit(1); } i = 0; while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) { if (outputs[i] == 0) { outputs[i] = -1; } i++; } patternCount = i; weights[0] = randomFloat(); weights[1] = randomFloat(); weights[2] = randomFloat(); iteration = 0; do { iteration++; globalError = 0; for (p = 0; p < patternCount; p++) { output = calculateOutput(weights, x[p], y[p]); localError = outputs[p] - output; weights[0] += LEARNING_RATE * localError * x[p]; weights[1] += LEARNING_RATE * localError * y[p]; weights[2] += LEARNING_RATE * localError; globalError += (localError*localError); } /* Root Mean Squared Error */ printf("Iteration %d : RMSE = %.4f\n", iteration, sqrt(globalError/patternCount)); } while (globalError > 0 && iteration <= MAX_ITERATION); printf("\nDecision boundary (line) equation: %.2f*x + %.2f*y + %.2f = 0\n", weights[0], weights[1], weights[2]); return 0; }
... with the following output:
Iteration 1 : RMSE = 0.7206 Iteration 2 : RMSE = 0.5189 Iteration 3 : RMSE = 0.4804 Iteration 4 : RMSE = 0.4804 Iteration 5 : RMSE = 0.3101 Iteration 6 : RMSE = 0.4160 Iteration 7 : RMSE = 0.4599 Iteration 8 : RMSE = 0.3922 Iteration 9 : RMSE = 0.0000 Decision boundary (line) equation: -2.37*x + -2.51*y + -7.55 = 0
And here's a short animation of the code above using MATLAB, showing the decision boundary at each iteration:
It might help if you put the seeding of the random generator at the start of yout main instead of reseeding on every call to randomFloat
, i.e.
float randomFloat() { float r = (float)rand() / (float)RAND_MAX; return r; } // ... int main(int argc, char *argv[]) { srand(time(NULL)); // X, Y coordinates of the training set. float x[208], y[208];
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With