How to join nearby bounding boxes in OpenCV Python

Tags:

I am doing a college class project on image processing. This is my original image: enter image description here

I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to @HansHirse for the help):

import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')

gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)

#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
    x, y, w, h = cv2.boundingRect(cnt)
    xyminmax.append([x,y,x+w,y+h])

distances=[]
for i in range(len(xyminmax)):
    try:
        first_xmax = xyminmax[i][2]
        second_xmin = xyminmax[i + 1][0]
        distance=abs(second_xmin-first_xmax)
        distances.append(distance)
    except IndexError:
        pass

THRESHOLD=stats.mode(distances, axis=None)[0][0]

new_rects=[]
for i in range(len(xyminmax)):
    try:
        # [xmin,ymin,xmax,ymax]
        first_ymin=xyminmax[i][1]
        first_ymax=xyminmax[i][3]

        second_ymin=xyminmax[i+1][1]
        second_ymax=xyminmax[i+1][3]

        first_xmax = xyminmax[i][2]
        second_xmin = xyminmax[i+1][0]

        firstheight=abs(first_ymax-first_ymin)
        secondheight=abs(second_ymax-second_ymin)

        distance=abs(second_xmin-first_xmax)

        if distance<THRESHOLD:
            new_xmin=xyminmax[i][0]
            new_xmax=xyminmax[i+1][2]
            if first_ymin>second_ymin:
                new_ymin=second_ymin
            else:
                new_ymin = first_ymin

            if firstheight>secondheight:
                new_ymax = first_ymax
            else:
                new_ymax = second_ymax
            new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
        else:
            new_rects.append(xyminmax[i])
    except IndexError:
        pass

for rect in new_rects:
    cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)

which produces this image as a result: Text line images with bounding boxes

I want to join very close or overlapping bounding boxes such as these

enter image description here

into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.

278

asked Mar 27 '19 11:03

Igor Krakowski

2 Answers

So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.

import cv2
import numpy as np

image = cv2.imread('images/example.png')

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)

cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Array of initial bounding rects
rects = []

# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []

# Just initialize bounding rects and set all bools to false
for cnt in cnts:
    rects.append(cv2.boundingRect(cnt))
    rectsUsed.append(False)

# Sort bounding rects by x coordinate
def getXFromRect(item):
    return item[0]

rects.sort(key = getXFromRect)

# Array of accepted rects
acceptedRects = []

# Merge threshold for x coordinate distance
xThr = 5

# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
    if (rectsUsed[supIdx] == False):

        # Initialize current rect
        currxMin = supVal[0]
        currxMax = supVal[0] + supVal[2]
        curryMin = supVal[1]
        curryMax = supVal[1] + supVal[3]

        # This bounding rect is used
        rectsUsed[supIdx] = True

        # Iterate all initial bounding rects
        # starting from the next
        for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):

            # Initialize merge candidate
            candxMin = subVal[0]
            candxMax = subVal[0] + subVal[2]
            candyMin = subVal[1]
            candyMax = subVal[1] + subVal[3]

            # Check if x distance between current rect
            # and merge candidate is small enough
            if (candxMin <= currxMax + xThr):

                # Reset coordinates of current rect
                currxMax = candxMax
                curryMin = min(curryMin, candyMin)
                curryMax = max(curryMax, candyMax)

                # Merge candidate (bounding rect) is used
                rectsUsed[subIdx] = True
            else:
                break

        # No more merge candidates possible, accept current rect
        acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])

for rect in acceptedRects:
    img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)

cv2.imwrite("images/result.png", image)

For your example

exampel

I get the following output

output

Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.

Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!

182

answered Sep 18 '22 16:09

HansHirse

Here is a slightly different approach, using the OpenCV Wrapper library.

import cv2
import opencv_wrapper as cvw

image = cv2.imread("example.png")

gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)

# dilation
img_dilation = cvw.dilate(thresh, 5)

# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)

# Distance threshold
dt = 5

# List of final, joined rectangles
final_rects = [sorted_rects[0]]

for rect in sorted_rects[1:]:
    prev_rect = final_rects[-1]

    # Shift rectangle `dt` back, to find out if they overlap
    shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
    intersection = cvw.rect_intersection(prev_rect, shifted_rect)
    if intersection is not None:
        # Join the two rectangles
        min_y = min((prev_rect.tl.y, rect.tl.y))
        max_y = max((prev_rect.bl.y, rect.bl.y))
        max_x = max((prev_rect.br.x, rect.br.x))
        width = max_x - prev_rect.tl.x
        height = max_y - min_y
        new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
        # Add new rectangle to final list, making it the new prev_rect
        # in the next iteration
        final_rects[-1] = new_rect
    else:
        # If no intersection, add the box
        final_rects.append(rect)

for rect in sorted_rects:
    cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)

for rect in final_rects:
    cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)

cv2.imwrite("result.png", image)

And the result Final result

The green boxes are the final result, while the magenta boxes are the original ones.

I used the same threshold as @HansHirse.

The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.

Disclosure: I am the author of OpenCV Wrapper.

answered Sep 22 '22 16:09

Andreas

Related questions
                            
                                Python 3.5 create .rpm with pyinstaller generated executable
                            
                                What does rtype mean in Python?
                            
                                Python script in Power BI returns date as Microsoft.OleDb.Date
                            
                                Group by and aggregate columns but create NaN if values do not match
                            
                                How to check an object has the type 'dict_items'?
                            
                                How is ternary operator implemented in Python
                            
                                Possible Combination of Parentheses in a Matrix Chain Application
                            
                                Converting a DateTime Index value to an Index Number
                            
                                Implementing ROC Curves for K-NN machine learning algorithm using python and Scikit Learn
                            
                                Pickling dict in Python
                            
                                Sorting pandas dataframe by weekdays
                            
                                numpy find the max value in a row and return back to it's column index
                            
                                How to debug Tensorflow segmentation fault in model.fit()?
                            
                                Difference between multiprocessing.cpu_count and os.cpu_count
                            
                                What does the 'm' in a Python ABI tag mean?
                            
                                What is the difference between MLP implementation from scratch and in PyTorch?
                            
                                How to redirect -progress option output of ffmpeg to stderr?
                            
                                How to add calculated column to Dataframe counting frequency in column in pandas
                            
                                What is a time complexity of move_to_end operation for OrderedDict in Python 3?
                            
                                Multivariate polynomial regression with Python

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

How to join nearby bounding boxes in OpenCV Python

Tags:

python

image-processing

opencv

opencv-python

Igor Krakowski

People also ask

2 Answers

HansHirse

Andreas

Recent Activity

Donate For Us