Given matrix m
:
# [,1] [,2] [,3] [,4]
# [1,] 2 1 3 4
# [2,] 4 3 2 1
# [3,] 2 3 1 4
# [4,] 1 2 3 4
# [5,] 4 2 3 1
# [6,] 4 3 1 2
# [7,] 2 4 3 1
# [8,] 4 3 2 1
# [9,] 3 2 1 4
# [10,] 1 2 3 4
# [11,] 3 2 4 1
# [12,] 4 3 2 1
# [13,] 2 1 3 4
# [14,] 2 1 3 4
# [15,] 1 2 3 4
# [16,] 4 3 2 1
# [17,] 2 1 3 4
# [18,] 1 4 3 2
# [19,] 3 2 1 4
# [20,] 1 2 3 4
m <- structure(c(2L, 4L, 2L, 1L, 4L, 4L, 2L, 4L, 3L, 1L, 3L, 4L, 2L,
2L, 1L, 4L, 2L, 1L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 3L, 4L, 3L, 2L,
2L, 2L, 3L, 1L, 1L, 2L, 3L, 1L, 4L, 2L, 2L, 3L, 2L, 1L, 3L, 3L,
1L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 4L,
1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 1L, 4L,
2L, 4L, 4L), .Dim = c(20L, 4L))
We can extract sorted rows in this way:
apply(m, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
#[1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
It's OK if matrix is not large. But I'm talking about matrix with millions rows.
Can we do better? Can we do it in a vectorized way? Matrix m
is given just as a toy data. I'm looking for a general solution.
It's ugly, but you could get there by checking if all the differences in each column are negative or positive.
colSums(sign(diff(t(m)))) %in% c(-3,3)
# [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#[13] FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
My quick testing suggests it is a lot faster to execute.
You can generalize it by just checking against the size of the matrix m
:
colSums(sign(diff(t(m)))) %in% c(-(ncol(m)-1), ncol(m)-1)
In the case that you have sorted rows like c(1,1,2,3)
which have repeated values, you can use a slightly more long-winded approach:
sdm <- diff(t(m))
nc <- ncol(m) - 1
colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
# [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#[13] FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
Some quick benchmarking (keeping in mind these aren't all identical in terms of coping with repeated values):
set.seed(1)
m2 <- m[sample(1:nrow(m),1e6,replace=T),]
## original apply code
system.time({
apply(m2, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
})
# user system elapsed
# 14.888 0.272 15.153
And the comparison runs:
system.time({
n <- t(m2)
forwards <- colSums(n == sort(m2[1,])) == ncol(m2)
backwards <- colSums(n == rev(sort(m2[1,]))) == ncol(m2)
vec <- forwards | backwards
})
# user system elapsed
# 0.104 0.020 0.123
system.time({
sdm <- diff(t(m2))
nc <- ncol(m) - 1
colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
})
# user system elapsed
# 0.248 0.032 0.279
system.time({
apply(m2[,-1] - m2[,-ncol(m2)], 1, function(x) all(x>=0) || all(x <= 0))
})
# user system elapsed
# 3.724 0.004 3.731
library(matrixStats)
system.time(rowVarDiffs(m2) == 0)
# user system elapsed
# 40.176 1.156 42.071
I went for a recycling approach:
n <- t(m)
forwards <- colSums(n == sort(m[1,])) == ncol(m)
backwards <- colSums(n == rev(sort(m[1,]))) == ncol(m)
vec <- forwards | backwards
unvec <- apply(m, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
identical(vec, unvec)
[1] TRUE
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With