Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to fix premature convergence in simple GA (Python)?

Yesterday i started exploring the genetic algorithms, and when i ended up with some basic theory, i tried to write simple GA on Python, that solves Diophantine equation. I'm new to Python and GAs, so please, don't judge my code strictly.

Problem

I cant get any result due to premature convergence (there is some no-return point (n-population), population[n] == population[n+i], where i is any integer. even the random mutatuion element cant change this, the generation is degradating very quickly)

GA is using crossover to breed, and weighted choice of parents.

  • Q1: Is there any design mistakes in my code (below)?
  • Q1.2: Do i need to add elitism?
  • Q1.3: Do i need to change breed logic?
  • Q2: Is there realy needed deep copy?

Code:

# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random

class Organism:
    #initiate
    def __init__(self, alleles, fitness, likelihood):
        self.alleles = alleles
        self.fitness = fitness
        self.likelihood = likelihood
        self.result = 0
    def __unicode__(self):
        return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)

class  CDiophantine:
    def __init__(self, coefficients,  result):
        self.coefficients = coefficients
        self.result = result

    maxPopulation = 40
    organisms = []
    def GetGene (self,i):
        return self.organisms[i]

    def OrganismFitness (self,gene):
        gene.result = 0
        for i in range (0, len(self.coefficients)):
            gene.result += self.coefficients[i]*gene.alleles[i]
        gene.fitness = abs(gene.result - self.result)
        return gene.fitness

    def Fitness (self):
        for organism in self.organisms:
            organism.fitness = self.OrganismFitness(organism)
            if organism.fitness == 0:
                return  organism
        return None


    def MultiplyFitness (self):
        coefficientSum = 0
        for organism in self.organisms:
            coefficientSum += 1/float(organism.fitness)
        return coefficientSum

    def GenerateLikelihoods (self):
        last = 0
        multiplyFitness = self.MultiplyFitness()
        for organism in self.organisms:
            last = ((1/float(organism.fitness)/multiplyFitness)*100)
            #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
            organism.likelihood = last

    def Breed (self, parentOne, parentTwo):
        crossover = randint (1,len(self.coefficients)-1)
        child = deepcopy(parentOne)
        initial = 0
        final = len(parentOne.alleles) - 1
        if randint (1,100) < 50:
            father = parentOne
            mother = parentTwo
        else:
            father = parentTwo
            mother = parentOne
        child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
        if randint (1,100) < 5:
            for i in range(initial,final):    
                child.alleles[i] = randint (0,self.result)

        return child

    def CreateNewOrganisms (self):
        #generating new population
        tempPopulation = []
        for _ in self.organisms:
            iterations = 0
            father = deepcopy(self.organisms[0])
            mother = deepcopy(self.organisms[1])
            while father.alleles == mother.alleles:
                father = self.WeightedChoice()
                mother = self.WeightedChoice()
                iterations+=1
                if iterations > 35:
                    break
            kid = self.Breed(father,mother)
            tempPopulation.append(kid)
        self.organisms = tempPopulation

    def WeightedChoice (self):
        list = []
        for organism in self.organisms:
            list.append((organism.likelihood,organism))
        list = sorted((random.random() * x[0], x[1]) for x in list)
        return list[-1][1]


    def AverageFitness (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.fitness
        return float(sum)/len(self.organisms)

    def AverageLikelihoods (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.likelihood
        return sum/len(self.organisms)

    def Solve (self):
        solution = None
        for i in range(0,self.maxPopulation):
            alleles = []
            #
            for j in range(0, len(self.coefficients)):
                alleles.append(randint(0, self.result))
            self.organisms.append(Organism(alleles,0,0))
        solution = self.Fitness()
        if solution:
            return solution.alleles
        iterations = 0
        while not solution and  iterations <3000:
            self.GenerateLikelihoods()
            self.CreateNewOrganisms()
            solution = self.Fitness()
            if solution:
                print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
                return solution.alleles
            iterations += 1
        return  -1

if __name__ == "__main__":
    diophantine = CDiophantine ([1,2,3,4],30)
    #cProfile.run('diophantine.Solve()')
    print diophantine.Solve()

I tried to change breed and weighted random choice logic but with no results. This GA supposed to be work, i dont know, what's wrong. I know that there are some GA libraries on Python, i'm trying to understand them at the moment - it seems that they are quite complex to me. Sorry for mistakes, english is not my native language. Thank you for your understanding.

NECROUPDATE: Store chromosomes in Gray Code, not in integer.

like image 809
Ernado Avatar asked Jun 28 '11 19:06

Ernado


People also ask

What is premature convergence to solution?

In genetic algorithms, the term of premature convergence means that a population for an optimization problem converged too early, resulting in being suboptimal.

What causes premature convergence?

The premature convergence is generally due to the loss of diversity within the population. This loss can be caused by the selection pressure, the schemata distribution due to crossover operators, and a poor evolution parameters setting.

How do we check convergence of GA?

By our intuition, we see that if a GA converges to the global optimal set, it should satisfy the following properties: (1) Starting from any initial population (1) = x, it is possible for the GA to visit the global optimal set after finite transitions, that means, the global optimal set is accessible.

What factors affect convergence in genetic algorithm?

The weak point of a genetic algorithm is that it often suffers from so-called premature convergence, which is caused by an early homogenization of genetic material in the population. This means that no valuable exploration can be performed anymore.


1 Answers

Slight logic error: parentTwo is slightly more likely to be the father than the mother. Even odds would be randint (1,100) <= 50, not randint (1,100) < 50. Won't be what's causing the issue here.

  1. Your population size is fairly small. 40 is very few for most problems. That will cause it to converge quickly.
  2. Elitism will cause your population to converge faster, not slower.
  3. Your WeightedChoice function appears to be rather inefficient, if I'm reading it correctly. I haven't used Python recently enough to really understand what's going on there, but looking at it it certainly feels like something inefficient. If you can improve on that, it should speed up the processing so you can increase the population size (and, seeing as I'm figuring your algorithm there is probably at least O(n^2), that'll be really important).

With such a small population size, 200-300 generations is not surprising to solve the problem. If you increase the population, it should reduce the generations required.

Note: I found some old code that I wrote a few years ago for solving a similar problem. It's in C, and uses tournament selection, but perhaps it can give you a few ideas:

/*Diophantine equation solving genetic algorithm
Copyright (C) 2009- by Joel Rein
Licensed under the terms of the MIT License*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define POP 100
//number of variables to solve for
#define VAR 4
//maximum value for a) result and b) variables
#define MAX 100 
#define MAX_GENS 500
//probability of crossover (otherwise just one parent will be used)
#define CROSSOVER 0.7
//probability of mutation (per gene)
#define MUTATION 0.4
//print out debug information each generation (recommended: if used, keep RUNS low)
#define DEBUG
//print result of each run individually
#define PRINT_RESULT
//how many times to run the GA
#define RUNS 1

int pop[POP][VAR], scores[POP], new[POP][VAR];
int coefficients[VAR];
int result=0;

int score(int index){
    int sum=0;
    for(int i=0;i<VAR;i++)
        sum+=coefficients[i]*pop[index][i];
    return abs(sum-result);
}

int tournament(int size){
    int best=rand()%POP;
    for(int i=1;i<size;i++){
        int comp=rand()%POP;
        if(scores[comp]<scores[best])
            best=comp;
    }
    return best;
}

void breed(int target){
    int a=tournament(3), b=tournament(3);
    //copy a
    for(int i=0;i<VAR;i++)
        new[target][i]=pop[a][i];
    //crossover
    if((float)rand()/RAND_MAX<CROSSOVER){
        int x=rand()%VAR;
        for(int i=x;i<VAR;i++)
            new[target][i]=pop[b][i];
    }
    //mutation
    for(int i=0;i<VAR;i++)
        if((float)rand()/RAND_MAX<MUTATION)
            new[target][i]=rand()%(result*2)-result;
}

void debug(int gen, int best){
#ifdef DEBUG
    printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
    int sum=0;
    for(int i=0;i<VAR;i++){
        sum+=coefficients[i]*pop[best][i];
        printf("%3i*%3i+", coefficients[i], pop[best][i]);
    }
    printf("0= %3i (target: %i)\n", sum, result);
#endif
}

int ga(int run){
    srand(time(NULL)+run);
    //calculate a result for the equation. 
    //this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
    while(!result)
        result=rand()%MAX;
    for(int i=0;i<VAR;i++)
        coefficients[i]=rand()%result;
    //initialise population
    for(int i=0;i<POP;i++)
        for(int j=0;j<VAR;j++)
            pop[i][j]=rand()%(result*2)-result;
    //main loop
    int gen, best;
    for(gen=0;gen<MAX_GENS;gen++){
        best=0;
        //evaluate population
        for(int i=0;i<POP;i++){
            scores[i]=score(i);
            if(scores[i]<scores[best])
                best=i;
        }
        debug(gen, best);
        if(scores[best]==0)
            break;
        //breed and replace
        for(int i=0;i<POP;i++)
            breed(i);
        for(int i=0;i<POP;i++)
            for(int j=0;j<VAR;j++)
                pop[i][j]=new[i][j];
    }
#ifdef PRINT_RESULT
    printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); 
#else
    printf(".");
#endif
    return gen;
}

int main(){
    int total=0;
    for(int i=0;i<RUNS;i++)
        total+=ga(i);
    printf("\nAverage runtime: %i generations\n", total/RUNS);
}
like image 117
Joel Rein Avatar answered Nov 10 '22 14:11

Joel Rein