I am using the SteroBM class for a stereo vision as part of my project. I am taking the input frames from 2 Web cams and running the Stereo block matching computation on the input frames gray scale frames without rectification. The output I am getting is far from the ground truth(very patchy). I want to know, is it because I am not doing rectification on input frames. Moreover, the base line I have chosen to keep at 20cm. I am using opencv-3.2.0 version c++.
The code I am running is given below.
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include </home/eswar/softwares/opencv_contrib-3.2.0/modules/contrib_world/include/opencv2/contrib_world.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/imgproc.hpp>
#include <stdio.h>
#include <iostream>
using namespace std;
using namespace cv;
int main()
{
//initialize and allocate memory to load the video stream from camera
VideoCapture camera0(0);
VideoCapture camera1(1);
if( !camera0.isOpened() ) return 1;
if( !camera1.isOpened() ) return 1;
Mat frame0,frame1;
Mat frame0gray,frame1gray;
Mat dispbm,dispsgbm;
Mat dispnorm_bm,dispnorm_sgbm;
Mat falseColorsMap, sfalseColorsMap;
int ndisparities = 16*5; /**< Range of disparity */
int SADWindowSize = 21; /**< Size of the block window. Must be odd */
Ptr<StereoBM> sbm = StereoBM::create( ndisparities, SADWindowSize );
Ptr<StereoSGBM> sgbm = StereoSGBM::create(0, //int minDisparity
96, //int numDisparities
5, //int SADWindowSize
600, //int P1 = 0
2400, //int P2 = 0
10, //int disp12MaxDiff = 0
16, //int preFilterCap = 0
2, //int uniquenessRatio = 0
20, //int speckleWindowSize = 0
30, //int speckleRange = 0
true); //bool fullDP = false
//-- Check its extreme values
double minVal; double maxVal;
while(true)
{
//grab and retrieve each frames of the video sequentially
camera0 >> frame0;
camera1 >> frame1;
imshow("Video0", frame0);
imshow("Video1", frame1);
cvtColor(frame0,frame0gray,CV_BGR2GRAY);
cvtColor(frame1,frame1gray,CV_BGR2GRAY);
sbm->compute( frame0gray, frame1gray, dispbm );
minMaxLoc( dispbm, &minVal, &maxVal );
dispbm.convertTo( dispnorm_bm, CV_8UC1, 255/(maxVal - minVal));
sgbm->compute(frame0gray, frame1gray, dispsgbm);
minMaxLoc( dispsgbm, &minVal, &maxVal );
dispsgbm.convertTo( dispnorm_sgbm, CV_8UC1, 255/(maxVal - minVal));
imshow( "BM", dispnorm_bm);
imshow( "SGBM",dispnorm_sgbm);
//wait for 40 milliseconds
int c = cvWaitKey(40);
//exit the loop if user press "Esc" key (ASCII value of "Esc" is 27)
if(27 == char(c)) break;
}
return 0;
}
Although in the code you see block matching also being used, please ignore because its giving even worse output. I find that the SGBM output is closer to the ground truth and therefore I've decided to improve on it. However if any help about how the block matching results can be improved. It would great and I'd certainly appreciate that.
Th output image depth image for SGBM technique looks like.
No, StereoBM doesn't do rectification, just block matching and some pre and post processing, however opencv provide functions for camera calibration and rectification check this link
Also there is a ready made example for this process in opencv examples so don't have to write the code from scratch.
About the results, StereoBM is based on SAD algorithm(local stereo-matching) which is not robust, you can try wls filter, which could improve your results significantly.
StereoSGBM is based on SGM algorithm (actually it is a little different from the one introduced in the original paper) is semi global algorithm which consider global optimisation in disparity map generation which produce better disparity but slower.
As indicated above I tried rectification of the frames. The code is below.
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include </home/eswar/softwares/opencv_contrib-3.2.0/modules/contrib_world /include/opencv2/contrib_world.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/imgproc.hpp>
#include <stdio.h>
#include <iostream>
#include <opencv2/xfeatures2d/nonfree.hpp>
using namespace std;
using namespace cv;
using namespace cv::xfeatures2d;
int main()
{
//initialize and allocate memory to load the video stream from camera
VideoCapture camera0(0);
VideoCapture camera1(1);
int count=0;
Mat loRes, hiRes;
if( !camera0.isOpened() ) return 1;
if( !camera1.isOpened() ) return 1;
camera0.set(CV_CAP_PROP_FRAME_WIDTH, 400);
camera0.set(CV_CAP_PROP_FRAME_HEIGHT, 400);
camera1.set(CV_CAP_PROP_FRAME_WIDTH, 400);
camera1.set(CV_CAP_PROP_FRAME_HEIGHT, 400);
Mat frame0,frame1;
Mat frame0gray,frame1gray;
Mat dispbm,dispsgbm,disparity,disparity1;
Mat dispnorm_bm,dispnorm_sgbm;
Mat falseColorsMap, sfalseColorsMap,falsemap;
Mat img_matches;
Mat H1,H2;
int ndisparities = 96; /**< Range of disparity */
int SADWindowSize = 7;
Ptr<StereoBM> sbm = StereoBM::create( ndisparities, SADWindowSize );
Ptr<StereoSGBM> sgbm = StereoSGBM::create(-3, //int minDisparity
96, //int numDisparities
7, //int SADWindowSize
60, //int P1 = 0
2400, //int P2 = 0
90, //int disp12MaxDiff = 0
16, //int preFilterCap = 0
1, //int uniquenessRatio = 0
60, //int speckleWindowSize = 0
20, //int speckleRange = 0
true); //bool fullDP = false
//-- Check its extreme values
double minVal; double maxVal;
double max_dist = 0;
double min_dist = 100;
int minHessian = 630;
Ptr<Feature2D> f2d = SIFT::create();
vector<KeyPoint> keypoints_1, keypoints_2;
Ptr<Feature2D> fd = SIFT::create();
Mat descriptors_1, descriptors_2;
BFMatcher matcher(NORM_L2, true); //BFMatcher matcher(NORM_L2);
vector< DMatch > matches;
vector< DMatch > good_matches;
vector<Point2f>imgpts1,imgpts2;
vector<uchar> status;
while(true)
{
//grab and retrieve each frames of the video sequentially
camera0 >> frame0;
camera1 >> frame1;
imshow("Video0", frame0);
imshow("Video1", frame1);
cvtColor(frame0,frame0gray,CV_BGR2GRAY);
cvtColor(frame1,frame1gray,CV_BGR2GRAY);
sbm->compute( frame0gray, frame1gray, dispbm );
minMaxLoc( dispbm, &minVal, &maxVal );
dispbm.convertTo( dispnorm_bm, CV_8UC1, 255/(maxVal - minVal));
sgbm->compute(frame0gray, frame1gray, dispsgbm);
minMaxLoc( dispsgbm, &minVal, &maxVal );
dispsgbm.convertTo( dispnorm_sgbm, CV_8UC1, 255/(maxVal - minVal));
applyColorMap(dispnorm_bm, falseColorsMap, cv::COLORMAP_JET);
applyColorMap(dispnorm_sgbm, sfalseColorsMap, cv::COLORMAP_JET);
f2d->detect( frame0gray, keypoints_1 );
f2d->detect( frame1gray, keypoints_2 );
//-- Step 2: Calculate descriptors (feature vectors)
fd->compute( frame0gray, keypoints_1, descriptors_1 );
fd->compute( frame1gray, keypoints_2, descriptors_2 );
//-- Step 3: Matching descriptor vectors with a brute force matcher
matcher.match( descriptors_1, descriptors_2, matches );
drawMatches(frame0gray, keypoints_1, frame1gray, keypoints_2, matches, img_matches);
imshow("matches", img_matches);
//-- Quick calculation of max and min distances between keypoints
for( int i = 0; i < matches.size(); i++ )
{ double dist = matches[i].distance;
if( dist < min_dist ) min_dist = dist;
if( dist > max_dist ) max_dist = dist;
}
for( int i = 0; i < matches.size(); i++ )
{
if( matches[i].distance <= max(4.5*min_dist, 0.02) ){
good_matches.push_back( matches[i]);
imgpts1.push_back(keypoints_1[matches[i].queryIdx].pt);
imgpts2.push_back(keypoints_2[matches[i].trainIdx].pt);
}
}
Mat F = findFundamentalMat(imgpts1, imgpts2, cv::FM_RANSAC, 3., 0.99, status); //FM_RANSAC
stereoRectifyUncalibrated(imgpts1, imgpts1, F, frame0gray.size(), H1, H2);
Mat rectified1(frame0gray.size(), frame0gray.type());
warpPerspective(frame0gray, rectified1, H1, frame0gray.size());
Mat rectified2(frame1gray.size(), frame1gray.type());
warpPerspective(frame1gray, rectified2, H2, frame1gray.size());
sgbm->compute(rectified1, rectified2, disparity);
minMaxLoc( disparity, &minVal, &maxVal );
disparity.convertTo( disparity1, CV_8UC1, 255/(maxVal - minVal));
applyColorMap(disparity1, falsemap, cv::COLORMAP_JET);
imshow("disparity_rectified_color", falsemap);
imshow( "BM", falseColorsMap);
imshow( "CSGBM",sfalseColorsMap);
//wait for 40 milliseconds
int c = cvWaitKey(40);
//exit the loop if user press "Esc" key (ASCII value of "Esc" is 27)
if(27 == char(c)) break;
}
return 0;
}
Now the output again isn't that good but improved from last time. However there seems to be one constant problem, that is also seen in the above image. The left side of the output image has a total black region. It shouldn't come this way right. How to solve this problem? Any help appreciated.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With