I'm using pytesseract (0.3.2) with openCV (4.1.2) to identify digits in images. While image_to_string is working, image_to_data and image_to_boxes are not. I need to be able to draw the bounding boxes on the images and this has stumped me. I've tried different images, older versions of pytesseract, etc. I'm using Windows and Jupyter Notebooks.
import cv2
import pytesseract
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#grayscale
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#thresholding
def thresholding(image):
#return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
return cv2.threshold(image, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
img = cv2.imread('my_image.jpg')
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
gray = get_grayscale(img)
thresh = thresholding(gray)
erode = remove_noise(thresh)
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
print(pytesseract.image_to_string(erode, config=custom_config))
cv2.imwrite("test.jpg", erode)
#these return nothing
print(pytesseract.image_to_boxes(Image.open('test.jpg')))
print(pytesseract.image_to_data(Image.open('test.jpg')))
Instead of using image_to_boxes, an alternative approach is to simply find contours with cv2.findContours, obtain the bounding rectangle coordinates with cv2.boundingRect, and draw the bounding box with cv2.rectangle
Using this sample input image

Drawn boxes

Result from OCR
1234567890
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Draw bounding boxes
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
# OCR
data = pytesseract.image_to_string(255 - thresh, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Please try the following code:
from pytesseract import Output
import pytesseract
import cv2
image = cv2.imread("my_image.jpg")
#swap color channel ordering from BGR (OpenCV’s default) to RGB (compatible with Tesseract and pytesseract).
# By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
# we need to convert from BGR to RGB format/mode:
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
results = pytesseract.image_to_data(rgb, output_type=Output.DICT,lang='eng',config=custom_config)
boxresults = pytesseract.image_to_boxes(rgb,output_type=Output.DICT,lang='eng',config=custom_config)
print(results)
print(boxresults)
for i in range(0, len(results["text"])):
# extract the bounding box coordinates of the text region from the current result
tmp_tl_x = results["left"][i]
tmp_tl_y = results["top"][i]
tmp_br_x = tmp_tl_x + results["width"][i]
tmp_br_y = tmp_tl_y + results["height"][i]
tmp_level = results["level"][i]
conf = results["conf"][i]
text = results["text"][i]
if(tmp_level == 5):
cv2.putText(image, text, (tmp_tl_x, tmp_tl_y - 10), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 1)
cv2.rectangle(image, (tmp_tl_x, tmp_tl_y), (tmp_br_x, tmp_br_y), (0, 0, 255), 1)
for j in range(0,len(boxresults["left"])):
left = boxresults["left"][j]
bottom = boxresults["bottom"][j]
right = boxresults["right"][j]
top = boxresults["top"][j]
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 1)
cv2.imshow("image",image)
cv2.waitKey(0)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With