I have an N*N matrix (N=2 to 10000) of numbers that may range from 0 to 1000. How can I find the largest (rectangular) submatrix that consists of the same number?
Example:
1 2 3 4 5
-- -- -- -- --
1 | 10 9 9 9 80
2 | 5 9 9 9 10
3 | 85 86 54 45 45
4 | 15 21 5 1 0
5 | 5 6 88 11 10
The output should be the area of the submatrix, followed by 1-based coordinates of its top left element. For the example, it would be (6, 2, 1)
because there are six 9
s situated at column 2, row 1.
Let the given binary matrix be M[R][C]. The idea of the algorithm is to construct an auxiliary size matrix S[][] in which each entry S[i][j] represents the size of the square sub-matrix with all 1s including M[i][j] where M[i][j] is the rightmost and bottom-most entry in sub-matrix.
The largest square submatrix is formed by cells (0, 2) , (3, 2) , (0, 5) , and (3, 5) . The brute-force solution is to consider every square submatrix and check if it is surrounded by all 1's . We keep track of the dimensions of the largest square submatrix seen and finally return it.
For each such matrix, in corresponding rows, there are n + 1 submatrices (exactly one of width 1,2,3··· ,n + 1). Hence, in total there are m(m + 1)(n + 1) 2 submatrices which are newly added. This can be written as a recursive formula: f(m, n + 1) = f(m, n) + m(m + 1)(n + 1) 2 .
This is a work in progress
I thought about this problem and I think I may have a O(w*h)
algorithm.
The idea goes like this:
(i,j)
compute the highest number of cells with the same value in the column j
starting from (i,j)
. Store this values as heights[i][j]
.height > heights[i][j]
. Because the submatrix with height > heights[i][j]
cannot continue on this cell(i,j,heights[i][j])
where j
is the farest coordinate where we can fit a submatrix of height: heights[i][j]
The tricky part is in the inner loop. I use something similar to the max subwindow algorithm to ensure it is O(1)
on average for each cell.
I will try to formulate a proof but in the meantime here is the code.
#include <algorithm>
#include <iterator>
#include <iostream>
#include <ostream>
#include <vector>
typedef std::vector<int> row_t;
typedef std::vector<row_t> matrix_t;
std::size_t height(matrix_t const& M) { return M.size(); }
std::size_t width (matrix_t const& M) { return M.size() ? M[0].size() : 0u; }
std::ostream& operator<<(std::ostream& out, matrix_t const& M) {
for(unsigned i=0; i<height(M); ++i) {
std::copy(begin(M[i]), end(M[i]),
std::ostream_iterator<int>(out, ", "));
out << std::endl;
}
return out;
}
struct sub_matrix_t {
int i, j, h, w;
sub_matrix_t(): i(0),j(0),h(0),w(1) {}
sub_matrix_t(int i_,int j_,int h_,int w_):i(i_),j(j_),h(h_),w(w_) {}
bool operator<(sub_matrix_t const& rhs) const { return (w*h)<(rhs.w*rhs.h); }
};
// Pop all sub_matrix from the vector keeping only those with an height
// inferior to the passed height.
// Compute the max sub matrix while removing sub matrix with height > h
void pop_sub_m(std::vector<sub_matrix_t>& subs,
int i, int j, int h, sub_matrix_t& max_m) {
sub_matrix_t sub_m(i, j, h, 1);
while(subs.size() && subs.back().h >= h) {
sub_m = subs.back();
subs.pop_back();
sub_m.w = j-sub_m.j;
max_m = std::max(max_m, sub_m);
}
// Now sub_m.{i,j} is updated to the farest coordinates where there is a
// submatrix with heights >= h
// If we don't cut the current height (because we changed value) update
// the current max submatrix
if(h > 0) {
sub_m.h = h;
sub_m.w = j-sub_m.j+1;
max_m = std::max(max_m, sub_m);
subs.push_back(sub_m);
}
}
void push_sub_m(std::vector<sub_matrix_t>& subs,
int i, int j, int h, sub_matrix_t& max_m) {
if(subs.empty() || subs.back().h < h)
subs.emplace_back(i, j, h, 1);
}
void solve(matrix_t const& M, sub_matrix_t& max_m) {
// Initialize answer suitable for an empty matrix
max_m = sub_matrix_t();
if(height(M) == 0 || width(M) == 0) return;
// 1) Compute the heights of columns of the same values
matrix_t heights(height(M), row_t(width(M), 1));
for(unsigned i=height(M)-1; i>0; --i)
for(unsigned j=0; j<width(M); ++j)
if(M[i-1][j]==M[i][j])
heights[i-1][j] = heights[i][j]+1;
// 2) Run through all columns heights to compute local sub matrices
std::vector<sub_matrix_t> subs;
for(int i=height(M)-1; i>=0; --i) {
push_sub_m(subs, i, 0, heights[i][0], max_m);
for(unsigned j=1; j<width(M); ++j) {
bool same_val = (M[i][j]==M[i][j-1]);
int pop_height = (same_val) ? heights[i][j] : 0;
int pop_j = (same_val) ? j : j-1;
pop_sub_m (subs, i, pop_j, pop_height, max_m);
push_sub_m(subs, i, j, heights[i][j], max_m);
}
pop_sub_m(subs, i, width(M)-1, 0, max_m);
}
}
matrix_t M1{
{10, 9, 9, 9, 80},
{ 5, 9, 9, 9, 10},
{85, 86, 54, 45, 45},
{15, 21, 5, 1, 0},
{ 5, 6, 88, 11, 10},
};
matrix_t M2{
{10, 19, 9, 29, 80},
{ 5, 9, 9, 9, 10},
{ 9, 9, 54, 45, 45},
{ 9, 9, 5, 1, 0},
{ 5, 6, 88, 11, 10},
};
int main() {
sub_matrix_t answer;
std::cout << M1 << std::endl;
solve(M1, answer);
std::cout << '(' << (answer.w*answer.h)
<< ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
<< std::endl;
answer = sub_matrix_t();
std::cout << M2 << std::endl;
solve(M2, answer);
std::cout << '(' << (answer.w*answer.h)
<< ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
<< std::endl;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With