Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

OpenCV: parallel for loop. What's wrong with my code?

For the purpose of testing the parallel for loop in OpenCV, I created the following code, which just takes a matrix and does some manipulations and outputs another matrix. The parallel code is supposed to give the same results as given by the serial code. However, it is not the case.

Before compiling the code, make sure that you have already had TBB properly enabled and installed in OpenCV. (Otherwise the parallelization will not be taken into account and will be treated as serial code, thus you'll obviously obtain the same results.)

#include <opencv2/core/core.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>    

using namespace cv;
using namespace std;    

class Parallel_process : public cv::ParallelLoopBody
{

private:
    const Mat& im;
    Mat& out;
    int r;

public:
    Parallel_process(const Mat& inputIm, int radius, Mat& outputIm)
        : im(inputIm), out(outputIm), r(radius){}

    virtual void operator()(const cv::Range& range) const
    {
        //cout << "start=" << range.start<< " end=" << range.end<<endl;
        //cout << endl;
        for(int y = range.start; y < range.end; y++)
        {
            //cout << " " << y;
            for(int x=0; x< im.cols - r; x++)
            {
                out(Rect(x,y,r,r)) = im(Rect(x,y,r,r))*im(Rect(x,y,r,r));
                //cout<<im(Rect(x,y,r,r))<<endl;
            } 
        }
    }
};


int main(int , char** )
{

    double start, timeSec;

    int n = 5, r = 2;

    /// Define a matrix
    Mat M = Mat(n, n, CV_32F);
    //randu(M, 0, 1);
    for(int y=0; y< M.rows; y++)
    {
        for(int x=0; x< M.cols; x++)
        {
            M.at<float>(x,y) = abs(x-y);    
        } 
    } 

    //cout<<M<<endl;

    Mat M1 = M.clone();
    Mat M2 = M.clone();    


    /// Serial loop
    start = (double)getTickCount();
    for(int y=0; y< M1.rows - r; y++)
    {
        for(int x=0; x< M1.cols - r; x++)
        {
            M1(Rect(x,y,r,r)) = M(Rect(x,y,r,r))*M(Rect(x,y,r,r));  
        } 
    } 
    timeSec = (getTickCount() - start) / getTickFrequency();
    cout << "Non parallel time: " << timeSec << "s" << endl;
    //cout<<M1<<endl;        


    /// Parallel loop
    start = (double)getTickCount();
    parallel_for_(Range(0,(int)M2.rows-r), Parallel_process(M,r,M2));
    timeSec = (getTickCount() - start) / getTickFrequency();
    cout << "Parallel time: " << timeSec << "s" << endl;    
    //cout<<M2<<endl;    

    /// Check the results
    cout << "Check: norm(M1-M2)=" << norm(M1-M2) << endl;
    return 0;
}

By executing the obtained binary, I got very random results (even with the same binary, i.e. compiled once but executed several times).

Hope somebody can help to figure it out. Thanks in advance.

enter image description here

like image 722
f10w Avatar asked Dec 07 '25 10:12

f10w


1 Answers

I guess there is an overlap in the out matrix when you compute the results: out(Rect(x,y,r,r)) overlaps out(Rect(x+,y+1,r,r)) for r > 1. So in the sequential execution, the last computed results following your sequential scan of the image are always stored in the output matrix, while in the parallel computation, the scan of the image is not sequential anymore, so so threads might overwrite other thread results

like image 164
remi Avatar answered Dec 09 '25 23:12

remi