Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Extract words in rectangles from text

I am struggling to extract fast and efficiently words that are in rectangles from a BufferedImage.
For example I have the following page : ( edit! ) the image is scanned, so it can contain noise, skewing and distortion.
enter image description here


How can I extract the following images without the rectangle : ( edit! ) I can use OpenCv or any other library, but I'm absolutely new to advanced image processing techniques. enter image description here

EDIT

I've used the method suggested by karlphillip here and it works decent.
Here is the code :

    package ro.ubbcluj.detection;

import java.awt.FlowLayout;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;
import javax.swing.ImageIcon;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.WindowConstants;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfByte;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.highgui.Highgui;
import org.opencv.imgproc.Imgproc;

public class RectangleDetection {

public static void main(String[] args) throws IOException {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
    Mat image = loadImage();
    Mat grayscale = convertToGrayscale(image);

    Mat treshold = tresholdImage(grayscale);
    List<MatOfPoint> contours = findContours(treshold);
    Mat contoursImage = fillCountours(contours, grayscale);
    Mat grayscaleWithContours = convertToGrayscale(contoursImage);
    Mat tresholdGrayscaleWithContours = tresholdImage(grayscaleWithContours);
    Mat eroded = erodeAndDilate(tresholdGrayscaleWithContours);
    List<MatOfPoint> squaresFound = findSquares(eroded);
    Mat squaresDrawn = Rectangle.drawSquares(grayscale, squaresFound);
    BufferedImage convertedImage = convertMatToBufferedImage(squaresDrawn);
    displayImage(convertedImage);
}

private static List<MatOfPoint> findSquares(Mat eroded) {
    return Rectangle.findSquares(eroded);
}

private static Mat erodeAndDilate(Mat input) {
    int erosion_type = Imgproc.MORPH_RECT;
    int erosion_size = 5;
    Mat result = new Mat();
    Mat element = Imgproc.getStructuringElement(erosion_type, new Size(2 * erosion_size + 1, 2 * erosion_size + 1));
    Imgproc.erode(input, result, element);
    Imgproc.dilate(result, result, element);
    return result;
}

private static Mat convertToGrayscale(Mat input) {
    Mat grayscale = new Mat();
    Imgproc.cvtColor(input, grayscale, Imgproc.COLOR_BGR2GRAY);
    return grayscale;
}

private static Mat fillCountours(List<MatOfPoint> contours, Mat image) {
    Mat result = image.clone();
    Imgproc.cvtColor(result, result, Imgproc.COLOR_GRAY2RGB);
    for (int i = 0; i < contours.size(); i++) {
        Imgproc.drawContours(result, contours, i, new Scalar(255, 0, 0), -1, 8, new Mat(), 0, new Point());
    }
    return result;
}

private static List<MatOfPoint> findContours(Mat image) {
    List<MatOfPoint> contours = new ArrayList<>();
    Mat hierarchy = new Mat();
    Imgproc.findContours(image, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_NONE);
    return contours;
}

private static Mat detectLinesHough(Mat img) {
    Mat lines = new Mat();
    int threshold = 80;
    int minLineLength = 10;
    int maxLineGap = 5;
    double rho = 0.4;
    Imgproc.HoughLinesP(img, lines, rho, Math.PI / 180, threshold, minLineLength, maxLineGap);
    Imgproc.cvtColor(img, img, Imgproc.COLOR_GRAY2RGB);
    System.out.println(lines.cols());
    for (int x = 0; x < lines.cols(); x++) {
        double[] vec = lines.get(0, x);
        double x1 = vec[0], y1 = vec[1], x2 = vec[2], y2 = vec[3];
        Point start = new Point(x1, y1);
        Point end = new Point(x2, y2);
        Core.line(lines, start, end, new Scalar(0, 255, 0), 3);
    }
    return img;
}

static BufferedImage convertMatToBufferedImage(Mat mat) throws IOException {
    MatOfByte matOfByte = new MatOfByte();
    Highgui.imencode(".jpg", mat, matOfByte);
    byte[] byteArray = matOfByte.toArray();
    InputStream in = new ByteArrayInputStream(byteArray);
    return ImageIO.read(in);

}

static void displayImage(BufferedImage image) {
    JFrame frame = new JFrame();
    frame.getContentPane().setLayout(new FlowLayout());
    frame.getContentPane().add(new JLabel(new ImageIcon(image)));
    frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
    frame.pack();
    frame.setVisible(true);

}

private static Mat tresholdImage(Mat img) {
    Mat treshold = new Mat();
    Imgproc.threshold(img, treshold, 225, 255, Imgproc.THRESH_BINARY_INV);
    return treshold;
}

private static Mat tresholdImage2(Mat img) {
    Mat treshold = new Mat();
    Imgproc.threshold(img, treshold, -1, 255, Imgproc.THRESH_BINARY_INV + Imgproc.THRESH_OTSU);
    return treshold;
}

private static Mat loadImage() {
    return Highgui
            .imread("E:\\Programs\\Eclipse Workspace\\LicentaWorkspace\\OpenCvRectangleDetection\\src\\img\\form3.jpg");
}

}


and the Rectangle class

    package ro.ubbcluj.detection;

import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.MatOfPoint2f;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;

public class Rectangle {
static List<MatOfPoint> findSquares(Mat input) {
    Mat pyr = new Mat();
    Mat timg = new Mat();

    // Down-scale and up-scale the image to filter out small noises
    Imgproc.pyrDown(input, pyr, new Size(input.cols() / 2, input.rows() / 2));
    Imgproc.pyrUp(pyr, timg, input.size());
    // Apply Canny with a threshold of 50
    Imgproc.Canny(timg, timg, 0, 50, 5, true);

    // Dilate canny output to remove potential holes between edge segments
    Imgproc.dilate(timg, timg, new Mat(), new Point(-1, -1), 1);

    // find contours and store them all as a list
    Mat hierarchy = new Mat();
    List<MatOfPoint> contours = new ArrayList<>();
    Imgproc.findContours(timg, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
    List<MatOfPoint> squaresResult = new ArrayList<MatOfPoint>();
    for (int i = 0; i < contours.size(); i++) {

        // Approximate contour with accuracy proportional to the contour
        // perimeter
        MatOfPoint2f contour = new MatOfPoint2f(contours.get(i).toArray());
        MatOfPoint2f approx = new MatOfPoint2f();
        double epsilon = Imgproc.arcLength(contour, true) * 0.02;
        boolean closed = true;
        Imgproc.approxPolyDP(contour, approx, epsilon, closed);
        List<Point> approxCurveList = approx.toList();

        // Square contours should have 4 vertices after approximation
        // relatively large area (to filter out noisy contours)
        // and be convex.
        // Note: absolute value of an area is used because
        // area may be positive or negative - in accordance with the
        // contour orientation
        boolean aproxSize = approx.rows() == 4;
        boolean largeArea = Math.abs(Imgproc.contourArea(approx)) > 200;
        boolean isConvex = Imgproc.isContourConvex(new MatOfPoint(approx.toArray()));
        if (aproxSize && largeArea && isConvex) {
            double maxCosine = 0;
            for (int j = 2; j < 5; j++) {
                // Find the maximum cosine of the angle between joint edges
                double cosine = Math.abs(getAngle(approxCurveList.get(j % 4), approxCurveList.get(j - 2),
                        approxCurveList.get(j - 1)));
                maxCosine = Math.max(maxCosine, cosine);
            }
            // If cosines of all angles are small
            // (all angles are ~90 degree) then write quandrange
            // vertices to resultant sequence
            if (maxCosine < 0.3) {
                Point[] points = approx.toArray();
                squaresResult.add(new MatOfPoint(points));
            }
        }
    }
    return squaresResult;
}

// angle: helper function.
// Finds a cosine of angle between vectors from pt0->pt1 and from pt0->pt2.
private static double getAngle(Point point1, Point point2, Point point0) {
    double dx1 = point1.x - point0.x;
    double dy1 = point1.y - point0.y;
    double dx2 = point2.x - point0.x;
    double dy2 = point2.y - point0.y;
    return (dx1 * dx2 + dy1 * dy2) / Math.sqrt((dx1 * dx1 + dy1 * dy1) * (dx2 * dx2 + dy2 * dy2) + 1e-10);
}

public static Mat drawSquares(Mat image, List<MatOfPoint> squares) {
    Mat result = new Mat();
    Imgproc.cvtColor(image, result, Imgproc.COLOR_GRAY2RGB);
    int thickness = 2;
    Core.polylines(result, squares, false, new Scalar(0, 255, 0), thickness);
    return result;
}
}

Example of result :

enter image description hereenter image description here

... though, it doesn't work so great for smaller images :
enter image description hereenter image description here

Maybe some enhancements can be suggested? Or how to make the algorithm faster in case I have a batch of images to process?

like image 629
Iulian Rosca Avatar asked Apr 25 '14 09:04

Iulian Rosca


3 Answers

I did the following program in c++ using opencv (I'm not familiar with java+opencv). I've included the output for the two sample images that you have provided. You may have to adjust the thresholds in the contour filtering section for some other images.

#include "stdafx.h"

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>

using namespace cv;
using namespace std;

int _tmain(int argc, _TCHAR* argv[])
{
    // load image as grayscale
    Mat im = imread(INPUT_FILE, CV_LOAD_IMAGE_GRAYSCALE);

    Mat morph;
    // morphological closing with a column filter : retain only large vertical edges
    Mat morphKernelV = getStructuringElement(MORPH_RECT, Size(1, 7));
    morphologyEx(im, morph, MORPH_CLOSE, morphKernelV);

    Mat bwV;
    // binarize: will contain only large vertical edges
    threshold(morph, bwV, 0, 255.0, CV_THRESH_BINARY | CV_THRESH_OTSU);

    // morphological closing with a row filter : retain only large horizontal edges
    Mat morphKernelH = getStructuringElement(MORPH_RECT, Size(7, 1));
    morphologyEx(im, morph, MORPH_CLOSE, morphKernelH);

    Mat bwH;
    // binarize: will contain only large horizontal edges
    threshold(morph, bwH, 0, 255.0, CV_THRESH_BINARY | CV_THRESH_OTSU);

    // combine the virtical and horizontal edges
    Mat bw = bwV & bwH;
    threshold(bw, bw, 128.0, 255.0, CV_THRESH_BINARY_INV);

    // just for illustration
    Mat rgb;
    cvtColor(im, rgb, CV_GRAY2BGR);

    // find contours
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(bw, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours by area to obtain boxes
    double areaThL = bw.rows * .04 * bw.cols * .06;
    double areaThH = bw.rows * .7 * bw.cols * .7;
    double area = 0;
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    {
        area = contourArea(contours[idx]); 
        if (area > areaThL && area < areaThH)
        {
            drawContours(rgb, contours, idx, Scalar(0, 0, 255), 2, 8, hierarchy);
            // take bounding rectangle. better to use filled countour as a mask
            // to extract the rectangle because then you won't get any stray elements
            Rect rect = boundingRect(contours[idx]);
            cout << "rect: (" << rect.x << ", " << rect.y << ") " << rect.width << " x " << rect.height << endl;
            Mat imRect(im, rect);
        }
    }

    return 0;
}

Result for the first image:

enter image description here

Result for the second image:

enter image description here

like image 181
dhanushka Avatar answered Nov 14 '22 23:11

dhanushka


I'm not sure whether "real" image processing skills are necessary.

Once you start tackling this problem with OpenCV, Sobel/Canny filters, edge detections and Hough transforms, it starts becoming rather involved. But maybe all this is not necessary here.

It all depends on how "predictable" the input is. That's why I asked in the comments whether the image can serve as a test case. IF the rectangles are always axis-aligned and don't have noise, distortions and interruptions, this can be solved with some trivial loops and pixel comparisons.

So IF you have potentially noisy or distorted input images, then ... good luck, you may have to acquire quite some image processing skills. If the image is not distorted or noisy, a solution like this one might be sufficient:

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.GridLayout;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;
import javax.swing.ImageIcon;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.SwingUtilities;


public class RectangleInImageTest
{
    public static void main(String[] args) throws IOException
    {
        final BufferedImage image = convertToARGB(ImageIO.read(new File("gcnc2.jpg")));
        final List<BufferedImage> subImages = scan(image);

        SwingUtilities.invokeLater(new Runnable()
        {
            @Override
            public void run()
            {
                createAndShowGUI(image, subImages);
            }
        });
    }

    private static void createAndShowGUI(
        BufferedImage image,
        List<BufferedImage> subImages)
    {
        JFrame f = new JFrame();
        f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        f.getContentPane().setLayout(new BorderLayout());

        f.getContentPane().add(new JLabel(new ImageIcon(image)),
            BorderLayout.CENTER);

        JPanel p = new JPanel(new GridLayout(1,0));
        for (BufferedImage subImage : subImages)
        {
            p.add(new JLabel(new ImageIcon(subImage)));
        }
        JPanel pp = new JPanel(new GridLayout(1,1));
        pp.setPreferredSize(new Dimension(800, 100));
        pp.add(new JScrollPane(p));
        f.getContentPane().add(pp, BorderLayout.SOUTH);
        f.setSize(800,800);
        f.setLocationRelativeTo(null);
        f.setVisible(true);
    }


    public static BufferedImage convertToARGB(BufferedImage image)
    {
        BufferedImage newImage = new BufferedImage(
            image.getWidth(), image.getHeight(),
            BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = newImage.createGraphics();
        g.drawImage(image, 0, 0, null);
        g.dispose();
        return newImage;
    }    

    private static List<BufferedImage> scan(BufferedImage image)
    {
        List<BufferedImage> result = new ArrayList<BufferedImage>();
        int w = image.getWidth();
        int h = image.getHeight();
        for (int y=0; y<h; y++)
        {
            for (int x=0; x<w; x++)
            {
                int rgb = image.getRGB(x, y);
                if (!isBlack(rgb))
                {
                    continue;
                }
                if (!isUpperLeftCorner(image, x, y))
                {
                    continue;
                }
                Rectangle rectangle = extractRectangle(image, x,y);
                if (!isValidRectangle(rectangle))
                {
                    continue;
                }
                System.out.println("Rectangle "+rectangle);

                BufferedImage part = new BufferedImage(
                    rectangle.width-2, rectangle.height-2, 
                    BufferedImage.TYPE_INT_ARGB);
                Graphics2D g = part.createGraphics();
                g.drawImage(image, 
                    0, 0, rectangle.width-2, rectangle.height-2,
                    x+1, y+1, x+rectangle.width-1, y+rectangle.height-1, null);
                g.dispose();
                result.add(part);
            }
        }
        return result;
    }

    private static boolean isBlack(int rgb)
    {
        final int threshold = 128;
        int r = (rgb >> 16) & 0xFF;
        int g = (rgb >>  8) & 0xFF;
        int b = (rgb      ) & 0xFF;
        return 
            r < threshold &&
            g < threshold &&
            b < threshold;
    }

    private static boolean isUpperLeftCorner(BufferedImage image, int x, int y)
    {
        if (!isValidAndWhite(image, x-1, y  )) return false;
        if (!isValidAndWhite(image, x  , y-1)) return false;
        if (!isValidAndWhite(image, x-1, y-1)) return false;
        if (!isValidAndWhite(image, x+1, y-1)) return false;
        if (!isValidAndWhite(image, x-1, y+1)) return false;
        if (!isValidAndWhite(image, x+1, y+1)) return false;
        return true;
    }

    private static boolean isValidAndWhite(
        BufferedImage image, int x, int y)
    {
        int w = image.getWidth();
        int h = image.getHeight();
        if (x < 0 || x >= w)
        {
            return false;
        }
        if (y < 0 || y >= h)
        {
            return false;
        }
        int rgb = image.getRGB(x, y);
        return !isBlack(rgb);
    }


    private static Rectangle extractRectangle(
        BufferedImage image, int x0, int y0)
    {
        int w = image.getWidth();
        int h = image.getHeight();

        int x1 = x0;
        int y1 = y0;
        for (int y=y0; y<h; y++)
        {
            int rgb = image.getRGB(x0, y);
            if (!isBlack(rgb))
            {
                y1 = y;
                break;
            }
        }
        for (int x=x0; x<w; x++)
        {
            int rgb = image.getRGB(x, y0);
            if (!isBlack(rgb))
            {
                x1 = x;
                break;
            }
        }
        return new Rectangle(x0, y0, x1-x0, y1-y0);  
    }


    private static boolean isValidRectangle(Rectangle r)
    {
        final int minWidth = 16;
        final int minHeight = 8;
        return r.width >= minWidth && r.height >= minHeight;
    }

}
like image 3
Marco13 Avatar answered Nov 14 '22 22:11

Marco13


Here's an algorithm that I demonstrated on a similar project using OpenCV:

  • Find the squares on the original image;
  • Extract (crop) the squares found so each become a new image;
  • Perform OCR detection on each image.

Most of these references are not in Java, but I assume you have the skills to convert C/C++ code to Java (btw, cv::Mat is equivalent to IplImage).

like image 3
karlphillip Avatar answered Nov 14 '22 22:11

karlphillip