My own OCR-program in Python

I am still a beginner but I want to write a character-recognition-program. This program isn't ready yet. And I edited a lot, therefor the comments may not match exactly. I will use the 8-connectivity for the connected component labeling.

from PIL import Image import numpy as np  im = Image.open("D:\\Python26\\PYTHON-PROGRAMME\\bild_schrift.jpg")  w,h = im.size w = int(w) h = int(h)  #2D-Array for area area = [] for x in range(w):     area.append([])     for y in range(h):         area[x].append(2) #number 0 is white, number 1 is black  #2D-Array for letter letter = [] for x in range(50):     letter.append([])     for y in range(50):         letter[x].append(0)  #2D-Array for label label = [] for x in range(50):     label.append([])     for y in range(50):         label[x].append(0)  #image to number conversion pix = im.load() threshold = 200 for x in range(w):     for y in range(h):         aaa = pix[x, y]         bbb = aaa[0] + aaa[1] + aaa[2] #total value         if bbb<=threshold:             area[x][y] = 1         if bbb>threshold:             area[x][y] = 0 np.set_printoptions(threshold='nan', linewidth=10)  #matrix transponation ccc = np.array(area)  area = ccc.T #better solution?  #find all black pixel and set temporary label numbers i=1 for x in range(40): # width (later)     for y in range(40): # heigth (later)         if area[x][y]==1:             letter[x][y]=1             label[x][y]=i             i += 1  #connected components labeling for x in range(40): # width (later)     for y in range(40): # heigth (later)         if area[x][y]==1:             label[x][y]=i             #if pixel has neighbour:             if area[x][y+1]==1:                 #pixel and neighbour get the lowest label                              pass # tomorrows work             if area[x+1][y]==1:                 #pixel and neighbour get the lowest label                              pass # tomorrows work                         #should i also compare pixel and left neighbour?  #find width of the letter #find height of the letter #find the middle of the letter #middle = [width/2][height/2] #? #divide letter into 30 parts --> 5 x 6 array  #model letter #letter A-Z, a-z, 0-9 (maybe more)  #compare each of the 30 parts of the letter with all model letters #make a weighting  #print(letter)  im.save("D:\\Python26\\PYTHON-PROGRAMME\\bild2.jpg") print('done') 
1 Answers

OCR is not an easy task indeed. That's why text CAPTCHAs still work :)

To talk only about the letter extraction and not the pattern recognition, the technique you are using to separate the letters is called Connected Component Labeling. Since you are asking for a more efficient way to do this, try to implement the two-pass algorithm that's described in this article. Another description can be found in the article Blob extraction.

EDIT: Here's the implementation for the algorithm that I have suggested:

import sys from PIL import Image, ImageDraw  class Region():     def __init__(self, x, y):         self._pixels = [(x, y)]         self._min_x = x         self._max_x = x         self._min_y = y         self._max_y = y      def add(self, x, y):         self._pixels.append((x, y))         self._min_x = min(self._min_x, x)         self._max_x = max(self._max_x, x)         self._min_y = min(self._min_y, y)         self._max_y = max(self._max_y, y)      def box(self):         return [(self._min_x, self._min_y), (self._max_x, self._max_y)]  def find_regions(im):     width, height  = im.size     regions = {}     pixel_region = [[0 for y in range(height)] for x in range(width)]     equivalences = {}     n_regions = 0     #first pass. find regions.     for x in xrange(width):         for y in xrange(height):             #look for a black pixel             if im.getpixel((x, y)) == (0, 0, 0, 255): #BLACK                 # get the region number from north or west                 # or create new region                 region_n = pixel_region[x-1][y] if x > 0 else 0                 region_w = pixel_region[x][y-1] if y > 0 else 0                  max_region = max(region_n, region_w)                  if max_region > 0:                     #a neighbour already has a region                     #new region is the smallest > 0                     new_region = min(filter(lambda i: i > 0, (region_n, region_w)))                     #update equivalences                     if max_region > new_region:                         if max_region in equivalences:                             equivalences[max_region].add(new_region)                         else:                             equivalences[max_region] = set((new_region, ))                 else:                     n_regions += 1                     new_region = n_regions                  pixel_region[x][y] = new_region      #Scan image again, assigning all equivalent regions the same region value.     for x in xrange(width):         for y in xrange(height):                 r = pixel_region[x][y]                 if r > 0:                     while r in equivalences:                         r = min(equivalences[r])                      if not r in regions:                         regions[r] = Region(x, y)                     else:                         regions[r].add(x, y)      return list(regions.itervalues())  def main():     im = Image.open(r"c:\users\personal\py\ocr\test.png")     regions = find_regions(im)     draw = ImageDraw.Draw(im)     for r in regions:         draw.rectangle(r.box(), outline=(255, 0, 0))     del draw      #im.show()     output = file("output.png", "wb")     im.save(output)     output.close()  if __name__ == "__main__":     main() 

It's not 100% perfect, but since you are doing this only for learning purposes, it may be a good starting point. With the bounding box of each character you can now use a neural network as others have suggested here.

