Get white text on black background

Question

I want to identify text in a set of images. There are some images with both white and black colored text.

enter image description here

I used otsu thresholding to binarize image

enter image description here

After contour identification and removal of non text regions I identified the required text region.

enter image description here

I need all the text in white color. But I don't know how to do it. I thought of using a bitwise operator but couldn't find a method. Can someone help me with this?

Expected output:

enter image description here

import cv2
import numpy as np


def process(img):
 # read image
 img_no = str(img)
 rgb = cv2.imread(img_no + '.jpg')
 # cv2.imshow('original', rgb)

 # convert image to grayscale
 gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)

 _, bw_copy = cv2.threshold(gray, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

 # bilateral filter
 blur = cv2.bilateralFilter(gray, 5, 75, 75)
 # cv2.imshow('blur', blur)

 # morphological gradient calculation
 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
 grad = cv2.morphologyEx(blur, cv2.MORPH_GRADIENT, kernel)
 # cv2.imshow('gradient', grad)

 # binarization
 _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
 # cv2.imshow('otsu', bw)

 # closing
 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1))
 closed = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
 # cv2.imshow('closed', closed)

 # finding contours
 contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

 mask = np.zeros(closed.shape, dtype=np.uint8)
 mask1 = np.zeros(bw_copy.shape, dtype=np.uint8)

 for idx in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[idx])
    mask[y:y + h, x:x + w] = 0
    area = cv2.contourArea(contours[idx])
    aspect_ratio = float(w) / h
    cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
    r = float(cv2.countNonZero(mask[y:y + h, x:x + w])) / (w * h)

    # identify region of interest
    if r > 0.34 and 0.52 < aspect_ratio < 13 and area > 145.0:
        cv2.drawContours(mask1, [contours[idx]], -1, (255, 255, 255), -1)

 result = cv2.bitwise_and(bw_copy, mask1)
 cv2.imshow('result', result)

 print(img_no + " Done")
 cv2.waitKey()

New Image

Accepted answer doesn't work with this picture.

enter image description here

yapws87 · Accepted Answer

At first glance this looks like a simple question but it is quite tricky to solve. However you already have all the ingredients needed to solve the problem and only require a slight tweak to your algorithm.

Here are the gists:

What you need is a an inverted image(wb_copy) of your thresholded image(bw_copy).

enter image description here

You have done a great job creating a mask

enter image description here

Run bitwise_and operation on both bw_copy and wb_copy with the mask above. You should get the result shown below.

enter image description here

As you can see, your answer is abit from both images. All you need to do is for every font blob, count the non-zero pixel from both images and select the one with the higher count. Doing so will provide the result you wanted.

enter image description here

Here are the modifications I made to the code

  # finding contours
    _,contours,_ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    mask = np.zeros(closed.shape, dtype=np.uint8)
    mask1 = np.zeros(bw_copy.shape, dtype=np.uint8)
    wb_copy = cv2.bitwise_not(bw_copy)
    new_bw = np.zeros(bw_copy.shape, dtype=np.uint8)

    for idx in range(len(contours)):
        x, y, w, h = cv2.boundingRect(contours[idx])
        mask[y:y + h, x:x + w] = 0
        area = cv2.contourArea(contours[idx])
        aspect_ratio = float(w) / h
        cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
        r = float(cv2.countNonZero(mask[y:y + h, x:x + w])) / (w * h)


        # identify region of interest
        if r > 0.34 and 0.52 < aspect_ratio < 13 and area > 145.0:

            cv2.drawContours(mask1, [contours[idx]], -1, (255, 255, 255), -1)

            bw_temp = cv2.bitwise_and(mask1[y:y + h, x:x + w],bw_copy[y:y + h, x:x + w])
            wb_temp = cv2.bitwise_and(mask1[y:y + h, x:x + w],wb_copy[y:y + h, x:x + w])

            bw_count = cv2.countNonZero(bw_temp)
            wb_count = cv2.countNonZero(wb_temp)

            if bw_count > wb_count:
                new_bw[y:y + h, x:x + w]=np.copy(bw_copy[y:y + h, x:x + w])
            else:
                new_bw[y:y + h, x:x + w]=np.copy(wb_copy[y:y + h, x:x + w])

    cv2.imshow('new_bw', new_bw)

Get white text on black background

Tags:

python

image-processing

opencv

Tech Five

1 Answers

yapws87

Recent Activity

Donate For Us

Get white text on black background

Tags:

python

image-processing

opencv

Tech Five

1 Answers

yapws87

Related questions

Recent Activity

Donate For Us