How can we get the number of rows and columns in an Image table via OpenCV.
Code for getting boxes in table which I am getting right
contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b:b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
Here are two methods: The first uses the structure of the table to determine the number of rows and columns while the second uses cell count.
Method #1: Table structure
The idea is that we can count the number of horizontal and vertical lines of the table to determine the number of rows and columns. For rows, it's rows = horizontal lines - 1
and for columns, it's columns = vertical lines - 1
.
Detected horizontal lines in green
Detected vertical lines in green
Result
Rows: 7
Columns: 4
Code
import cv2
# Load image, convert to grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find number of rows
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rows = 0
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
rows += 1
# Find number of columns
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,25))
vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
columns = 0
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
columns += 1
print('Rows:', rows - 1)
print('Columns:', columns - 1)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Method #2: Cell count
Obtain binary image. Load image, convert to grayscale, Gaussian blur, then Otsu's threshold.
Remove text inside cells. Find contours and filter using cv2.contourArea()
to remove text by filling in the contours with cv2.drawContours()
Invert image. We invert the image so the cells are in white and the background in black
Sort cells and sum rows/columns. We find contours then sort the contours from top-to-bottom
using imutils.contours.sort_contours
. Next we iterate through contours and find the centroid to obtain the (cX, cY)
coordinates. The idea is that we can compare the cY
value of each cell to determine if it is a new row or a cell in the same row by using a offset. A cell should be in the same row if the cY
value is +/- some offset value. If it is greater then it means the cell is in a new row. We build a model table where the length of the table gives you the rows while the length of any index gives you the number of columns.
Binary image
Removed text contours + inverted image
Here's a visualization of iterating through each cell to count the number of rows and columns
Result
Rows: 7
Columns: 4
Code
import numpy as np
from imutils import contours
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and remove text inside cells
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 4000:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Invert image
invert = 255 - thresh
offset, old_cY, first = 10, 0, True
visualize = cv2.cvtColor(invert, cv2.COLOR_GRAY2BGR)
# Find contours, sort from top-to-bottom and then sum up column/rows
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
# Find centroid
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# New row
if (abs(cY) - abs(old_cY)) > offset:
if first:
row, table = [], []
first = False
old_cY = cY
table.append(row)
row = []
# Cell in same row
if ((abs(cY) - abs(old_cY)) <= offset) or first:
row.append(1)
# Uncomment to visualize
'''
cv2.circle(visualize, (cX, cY), 10, (36, 255, 12), -1)
cv2.imshow('visualize', visualize)
cv2.waitKey(200)
'''
print('Rows: {}'.format(len(table)))
print('Columns: {}'.format(len(table[1])))
cv2.imshow('invert', invert)
cv2.imshow('thresh', thresh)
cv2.waitKey()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With