Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Perceptron learning algorithm not converging to 0

Here is my perceptron implementation in ANSI C:

#include <stdio.h> #include <stdlib.h> #include <math.h>  float randomFloat() {     srand(time(NULL));     float r = (float)rand() / (float)RAND_MAX;     return r; }  int calculateOutput(float weights[], float x, float y) {     float sum = x * weights[0] + y * weights[1];     return (sum >= 0) ? 1 : -1; }  int main(int argc, char *argv[]) {     // X, Y coordinates of the training set.     float x[208], y[208];      // Training set outputs.     int outputs[208];      int i = 0; // iterator      FILE *fp;      if ((fp = fopen("test1.txt", "r")) == NULL)     {         printf("Cannot open file.\n");     }     else     {         while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF)         {             if (outputs[i] == 0)             {                 outputs[i] = -1;             }             printf("%f   %f   %d\n", x[i], y[i], outputs[i]);             i++;         }     }      system("PAUSE");      int patternCount = sizeof(x) / sizeof(int);      float weights[2];     weights[0] = randomFloat();     weights[1] = randomFloat();      float learningRate = 0.1;      int iteration = 0;     float globalError;      do {         globalError = 0;         int p = 0; // iterator         for (p = 0; p < patternCount; p++)         {             // Calculate output.             int output = calculateOutput(weights, x[p], y[p]);              // Calculate error.             float localError = outputs[p] - output;              if (localError != 0)             {                 // Update weights.                 for (i = 0; i < 2; i++)                 {                     float add = learningRate * localError;                     if (i == 0)                     {                         add *= x[p];                     }                     else if (i == 1)                     {                         add *= y[p];                     }                     weights[i] +=  add;                 }             }              // Convert error to absolute value.             globalError += fabs(localError);              printf("Iteration %d Error %.2f %.2f\n", iteration, globalError, localError);              iteration++;         }          system("PAUSE");      } while (globalError != 0);      system("PAUSE");     return 0; } 

The training set I'm using: Data Set

I have removed all irrelevant code. Basically what it does now it reads test1.txt file and loads values from it to three arrays: x, y, outputs.

Then there is a perceptron learning algorithm which, for some reason, is not converging to 0 (globalError should converge to 0) and therefore I get an infinite do while loop.

When I use a smaller training set (like 5 points), it works pretty well. Any ideas where could be the problem?

I wrote this algorithm very similar to this C# Perceptron algorithm:


EDIT:

Here is an example with a smaller training set:

#include <stdio.h> #include <stdlib.h> #include <math.h>  float randomFloat() {     float r = (float)rand() / (float)RAND_MAX;     return r; }  int calculateOutput(float weights[], float x, float y) {     float sum = x * weights[0] + y * weights[1];     return (sum >= 0) ? 1 : -1; }  int main(int argc, char *argv[]) {     srand(time(NULL));      // X coordinates of the training set.     float x[] = { -3.2, 1.1, 2.7, -1 };      // Y coordinates of the training set.     float y[] = { 1.5, 3.3, 5.12, 2.1 };      // The training set outputs.     int outputs[] = { 1, -1, -1, 1 };      int i = 0; // iterator      FILE *fp;      system("PAUSE");      int patternCount = sizeof(x) / sizeof(int);      float weights[2];     weights[0] = randomFloat();     weights[1] = randomFloat();      float learningRate = 0.1;      int iteration = 0;     float globalError;      do {         globalError = 0;         int p = 0; // iterator         for (p = 0; p < patternCount; p++)         {             // Calculate output.             int output = calculateOutput(weights, x[p], y[p]);              // Calculate error.             float localError = outputs[p] - output;              if (localError != 0)             {                 // Update weights.                 for (i = 0; i < 2; i++)                 {                     float add = learningRate * localError;                     if (i == 0)                     {                         add *= x[p];                     }                     else if (i == 1)                     {                         add *= y[p];                     }                     weights[i] +=  add;                 }             }              // Convert error to absolute value.             globalError += fabs(localError);              printf("Iteration %d Error %.2f\n", iteration, globalError);                   }          iteration++;      } while (globalError != 0);      // Display network generalisation.     printf("X       Y     Output\n");     float j, k;     for (j = -1; j <= 1; j += .5)     {         for (j = -1; j <= 1; j += .5)         {             // Calculate output.             int output = calculateOutput(weights, j, k);             printf("%.2f  %.2f  %s\n", j, k, (output == 1) ? "Blue" : "Red");         }     }      // Display modified weights.     printf("Modified weights: %.2f %.2f\n", weights[0], weights[1]);      system("PAUSE");     return 0; } 
like image 922
Richard Knop Avatar asked Nov 08 '09 17:11

Richard Knop


People also ask

Will the perceptron learning algorithm always converge?

Yes, the perceptron learning algorithm is a linear classifier. If your data is separable by a hyperplane, then the perceptron will always converge. It will never converge if the data is not linearly separable.

What is the Perceptron convergence rule?

Perceptron Convergence theorem states that a classifier for two linearly separable classes of patterns is always trainable in a finite number of training steps. In summary, the training of a single discrete perceptron two class classifier requires a change of weights if and only if a misclassification occurs.

What does it mean when perceptron converges?

Perceptron Convergence Theorem: For any finite set of linearly separable labeled examples, the Perceptron Learning Algorithm will halt after a finite number of iterations. In other words, after a finite number of iterations, the algorithm yields a vector w that classifies perfectly all the examples.


2 Answers

In your current code, the perceptron successfully learns the direction of the decision boundary BUT is unable to translate it.

     y                              y     ^                              ^     |  - + \\  +                   |  - \\ +   +     | -    +\\ +   +               | -   \\  + +   +     | - -    \\ +                  | - -  \\    +     | -  -  + \\  +                | -  -  \\ +   +     ---------------------> x       --------------------> x         stuck like this            need to get like this 

(as someone pointed out, here is a more accurate version)

The problem lies in the fact that your perceptron has no bias term, i.e. a third weight component connected to an input of value 1.

        w0   -----     x ---->|     |            |  f  |----> output (+1/-1)     y ---->|     |        w1   -----                ^ w2     1(bias) ---| 

The following is how I corrected the problem:

#include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h>  #define LEARNING_RATE    0.1 #define MAX_ITERATION    100  float randomFloat() {     return (float)rand() / (float)RAND_MAX; }  int calculateOutput(float weights[], float x, float y) {     float sum = x * weights[0] + y * weights[1] + weights[2];     return (sum >= 0) ? 1 : -1; }  int main(int argc, char *argv[]) {     srand(time(NULL));      float x[208], y[208], weights[3], localError, globalError;     int outputs[208], patternCount, i, p, iteration, output;      FILE *fp;     if ((fp = fopen("test1.txt", "r")) == NULL) {         printf("Cannot open file.\n");         exit(1);     }      i = 0;     while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) {         if (outputs[i] == 0) {             outputs[i] = -1;         }         i++;     }     patternCount = i;      weights[0] = randomFloat();     weights[1] = randomFloat();     weights[2] = randomFloat();      iteration = 0;     do {         iteration++;         globalError = 0;         for (p = 0; p < patternCount; p++) {             output = calculateOutput(weights, x[p], y[p]);              localError = outputs[p] - output;             weights[0] += LEARNING_RATE * localError * x[p];             weights[1] += LEARNING_RATE * localError * y[p];             weights[2] += LEARNING_RATE * localError;              globalError += (localError*localError);         }          /* Root Mean Squared Error */         printf("Iteration %d : RMSE = %.4f\n",             iteration, sqrt(globalError/patternCount));     } while (globalError > 0 && iteration <= MAX_ITERATION);      printf("\nDecision boundary (line) equation: %.2f*x + %.2f*y + %.2f = 0\n",         weights[0], weights[1], weights[2]);      return 0; } 

... with the following output:

Iteration 1 : RMSE = 0.7206 Iteration 2 : RMSE = 0.5189 Iteration 3 : RMSE = 0.4804 Iteration 4 : RMSE = 0.4804 Iteration 5 : RMSE = 0.3101 Iteration 6 : RMSE = 0.4160 Iteration 7 : RMSE = 0.4599 Iteration 8 : RMSE = 0.3922 Iteration 9 : RMSE = 0.0000  Decision boundary (line) equation: -2.37*x + -2.51*y + -7.55 = 0 

And here's a short animation of the code above using MATLAB, showing the decision boundary at each iteration:

screenshot

like image 171
Amro Avatar answered Oct 18 '22 08:10

Amro


It might help if you put the seeding of the random generator at the start of yout main instead of reseeding on every call to randomFloat, i.e.

float randomFloat() {     float r = (float)rand() / (float)RAND_MAX;     return r; }  // ...  int main(int argc, char *argv[]) {     srand(time(NULL));      // X, Y coordinates of the training set.     float x[208], y[208]; 
like image 29
rsp Avatar answered Oct 18 '22 09:10

rsp