I got inspiration from this answer here, which is a Python
implementation, but I need C++
, that answer works very well, I got the thought is that: detectAndCompute
to get keypoints
, use kmeans
to segment them to clusters, then for each cluster do matcher->knnMatch
with each's descriptors
, then do the other stuffs like the common single detecting method. The main problem is, how to provide descriptors
for each cluster's matcher->knnMatch
process? I thought we should set value of the other keypoints
corresponding descriptor
to 0(useless), am I right?
And got some problems in my trying:
kmeans
?Mat descriptors_scene_clusters[3] = { Mat(descriptors_scene.rows, descriptors_scene.cols, CV_8U, Scalar(0)) };
?Very appreciate any help, pls!
#include <stdio.h>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/xfeatures2d.hpp>
using namespace cv;
using namespace cv::xfeatures2d;
#define MIN_MATCH_COUNT 10
int main()
{
Mat img_object = imread("./2.PNG", IMREAD_GRAYSCALE);
Mat img_scene = imread("./1.PNG", IMREAD_GRAYSCALE);
Ptr<ORB> detector = ORB::create();
std::vector<KeyPoint> keypoints_object, keypoints_scene;
Mat descriptors_object, descriptors_scene;
detector->detectAndCompute(img_object, cv::Mat(), keypoints_object, descriptors_object);
detector->detectAndCompute(img_scene, cv::Mat(), keypoints_scene, descriptors_scene);
std::cout << descriptors_scene.row(0) << "\n";
std::cout << descriptors_scene.cols << "\n";
std::vector<Point2f> keypoints_scene_points_;
for (int i=0; i<keypoints_scene.size(); i++) {
keypoints_scene_points_.push_back(keypoints_scene[i].pt);
}
Mat keypoints_scene_points(keypoints_scene_points_);
Mat labels;
int estimate_cluster_count = 3; // estimated ??????????
kmeans(keypoints_scene_points, estimate_cluster_count, labels, TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 10, 1.0), 3, KMEANS_RANDOM_CENTERS);
std::cout << "==================================111111\n";
Mat descriptors_scene_clusters[3] = { Mat(descriptors_scene.rows, descriptors_scene.cols, CV_8U, Scalar(0)) };
std::cout << "==================================111111------\n";
for (int i=0; i<labels.rows; i++) {
int clusterIndex = labels.at<int>(i);
Point2f pt = keypoints_scene_points.at<Point2f>(i);
descriptors_scene_clusters[clusterIndex].at<uchar>(pt) = descriptors_scene.at<uchar>(pt); // ?????? error
}
std::cout << descriptors_scene_clusters[0] << "\n";
std::cout << "==================================22222222\n";
// return 0;
Mat img_matches = img_scene;
std::vector<DMatch> all_good_matches;
for (int i=0; i<estimate_cluster_count; i++) {
std::cout << "==================================33333\n";
Ptr<flann::IndexParams> indexParams = makePtr<flann::KDTreeIndexParams>(5);
Ptr<flann::SearchParams> searchParams = makePtr<flann::SearchParams>(50);
Ptr<FlannBasedMatcher> matcher = makePtr<FlannBasedMatcher>(indexParams, searchParams);
// BFMatcher matcher;
std::vector<std::vector<DMatch>> matches;
std::cout << "==================================444444\n";
matcher->knnMatch(descriptors_object, descriptors_scene_clusters[i], matches, 2);
std::cout << "==================================555555\n";
std::vector<DMatch> good_matches;
for (auto &match : matches) {
if (match[0].distance < 0.7 * match[1].distance) {
good_matches.push_back(match[0]);
}
}
all_good_matches.insert(all_good_matches.end(), good_matches.begin(), good_matches.end());
std::cout << "==================================66666\n";
if (good_matches.size() > MIN_MATCH_COUNT) {
//-- Localize the object
std::vector<Point2f> obj;
std::vector<Point2f> scene;
for (auto &match : good_matches) {
//-- Get the keypoints from the good matches
obj.push_back(keypoints_object[match.queryIdx].pt);
scene.push_back(keypoints_scene[match.trainIdx].pt);
}
Mat H = findHomography(obj, scene, RANSAC);
//-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0);
obj_corners[1] = cvPoint(img_object.cols, 0);
obj_corners[2] = cvPoint(img_object.cols, img_object.rows);
obj_corners[3] = cvPoint(0, img_object.rows);
std::vector<Point2f> scene_corners(4);
perspectiveTransform(obj_corners, scene_corners, H);
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
line(img_matches, scene_corners[0] + Point2f(img_object.cols, 0),
scene_corners[1] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[1] + Point2f(img_object.cols, 0),
scene_corners[2] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[2] + Point2f(img_object.cols, 0),
scene_corners[3] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[3] + Point2f(img_object.cols, 0),
scene_corners[0] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
print(scene_corners);
}
}
drawMatches(img_object, keypoints_object, img_scene, keypoints_scene,
all_good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
//-- Show detected matches
imshow("Good Matches & Object detection", img_matches);
waitKey(0);
return 0;
}
OpenCV has a bunch of pre-trained classifiers that can be used to identify objects such as trees, number plates, faces, eyes, etc. We can use any of these classifiers to detect the object as per our need.
If there are multiple object to localize on an image, we use multiple object detection. As like the object localization, neural network creates 7 output vectors, but grid by grid. One image is divided by a grid 4 by 4 or 16 by 16 etc.. This example has 4 x 4 grid.
Basically, CSRT tracker is C implementation of the CSR-DCF (Channel and Spatial Reliability of Discriminative Correlation Filter) tracking algorithm in OpenCV library.
Object tracking using OpenCV is a popular method that is extensively used in the domain. OpenCV has a number of built-in functions specifically designed for the purpose of object tracking. Some object trackers in OpenCV include MIL, CSRT, GOTURN, and MediandFlow.
You can use a simple clusterer to either directly compute the clusters, find the number of clusters and / or the cluster centers initialization for kmeans. Here below a possible implementation of an agglomerative clusterer, which groups together points closer to a specified distance - see parameter dist in constructor. In your case dist can be the biggest distance between keypoints in the small image.
Header file:
class PointsClusterer {
public:
PointsClusterer (int dist, std::vector <cv::Point2f> points);
bool cluster();
std::map <int, std::vector <cv::Point2f>> getClusters ();
private:
double distance (int i, int j);
void merge (int i, int j);
private:
std::vector <cv::Point2f> m_Points;
std::map <int, std::vector <int>> m_Clusters;
std::map <int, cv::Point2f> m_Sums;
int m_dist = 0;
};
Cpp file:
PointsClusterer::PointsClusterer (int dist, std::vector <cv::Point2f> points) :
m_dist(dist), m_Points (points)
{}
bool PointsClusterer::cluster()
{
//initialization
for (int i = 0; i < m_Points.size(); ++i)
{
clusters[i] = std::vector<int>(1, i);
sum_clusters[i] = m_Points[i];
}
bool still_merge = true;
//Merge clusters
while (still_merge)
{
still_merge = false;
bool break_i = false;
for (int i=0; i < m_Clusters.size () && !break_i ;++i)
for (int j=i+1; j < m_Clusters.size ();++j)
{
if (distance(i, j) < m_dist)
{
merge(i, j);
break_i = true;
still_merge = true;
break;
}
}
}
//final conversion to std::map <int, std::vector <cv::Point2f>> is missing
}
void PointsClusterer::merge(int i, int j)
{
auto it = m_Clusters.begin();
auto iti = it+i;
auto itj = it+j;
for (val : itj->second)
{
iti->second.push_back(val);
m_Sums[iti->first]+=m_Points[val];
}
m_Clusters.erase(itj);
}
double PointsClusterer::distance(int i, int j)
{
auto it = m_Clusters.begin();
auto iti = it + i;
auto itj = it + j;
auto vali = m_Sums[iti->first] / iti->second.size();
auto valj = m_Sums[itj->first] / itj->second.size();
return cv::norm(vali - valj);
}
The implementation of the cluster method was oversimplified such that it is obvious how it works. Its performance can be improved, but I believe that is beyond the scope of your question.
I don't know a solution to your problem, but the following might help answer the questions you've asked.
In the comments it says that you might need an implementation of meanshift
, which opencv already has. Here an example, here the documentation with a tutorial.
The clusterCount
for kmeans
is the number of clusters you want to create link. I don't know how to estimate the number you want to create, but I guess you could know.
You initialize descriptors_scene_clusters
only with one element:
Mat descriptors_scene_clusters[3] = { Mat(descriptors_scene.rows, descriptors_scene.cols, CV_8U, Scalar(0)) };
And when you iterate over it:
for (int i=0; i<labels.rows; i++) {
int clusterIndex = labels.at<int>(i);
Point2f pt = keypoints_scene_points.at<Point2f>(i);
descriptors_scene_clusters[clusterIndex].at<uchar>(pt) = descriptors_scene.at<uchar>(pt); // ?????? error
}
clusterIndex
is 2 and you access an uninitialized element in the array, which results in the EXEC_BAD_ACCESS error
.
I hope this helps for further investigation!
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With