Extract sorted rows from matrix

Question

Given matrix m:

      # [,1] [,2] [,3] [,4]
 # [1,]    2    1    3    4
 # [2,]    4    3    2    1
 # [3,]    2    3    1    4
 # [4,]    1    2    3    4
 # [5,]    4    2    3    1
 # [6,]    4    3    1    2
 # [7,]    2    4    3    1
 # [8,]    4    3    2    1
 # [9,]    3    2    1    4
# [10,]    1    2    3    4
# [11,]    3    2    4    1
# [12,]    4    3    2    1
# [13,]    2    1    3    4
# [14,]    2    1    3    4
# [15,]    1    2    3    4
# [16,]    4    3    2    1
# [17,]    2    1    3    4
# [18,]    1    4    3    2
# [19,]    3    2    1    4
# [20,]    1    2    3    4

m <- structure(c(2L, 4L, 2L, 1L, 4L, 4L, 2L, 4L, 3L, 1L, 3L, 4L, 2L, 
2L, 1L, 4L, 2L, 1L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 3L, 4L, 3L, 2L, 
2L, 2L, 3L, 1L, 1L, 2L, 3L, 1L, 4L, 2L, 2L, 3L, 2L, 1L, 3L, 3L, 
1L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 4L, 
1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 
2L, 4L, 4L), .Dim = c(20L, 4L))

We can extract sorted rows in this way:

apply(m, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))

#[1] FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE 
#FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE

It's OK if matrix is not large. But I'm talking about matrix with millions rows. Can we do better? Can we do it in a vectorized way? Matrix m is given just as a toy data. I'm looking for a general solution.

thelatemail · Accepted Answer

It's ugly, but you could get there by checking if all the differences in each column are negative or positive.

colSums(sign(diff(t(m)))) %in% c(-3,3)
# [1] FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE
#[13] FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE

My quick testing suggests it is a lot faster to execute.

You can generalize it by just checking against the size of the matrix m:

colSums(sign(diff(t(m)))) %in% c(-(ncol(m)-1), ncol(m)-1)

In the case that you have sorted rows like c(1,1,2,3) which have repeated values, you can use a slightly more long-winded approach:

sdm <- diff(t(m))
nc <- ncol(m) - 1
colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
# [1] FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE
#[13] FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE

Some quick benchmarking (keeping in mind these aren't all identical in terms of coping with repeated values):

set.seed(1)
m2 <- m[sample(1:nrow(m),1e6,replace=T),]

## original apply code
system.time({
  apply(m2, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
})
#   user  system elapsed 
# 14.888   0.272  15.153

And the comparison runs:

system.time({
  n <- t(m2)
  forwards <- colSums(n == sort(m2[1,])) == ncol(m2)
  backwards  <- colSums(n == rev(sort(m2[1,]))) == ncol(m2)
  vec <- forwards | backwards
})
#   user  system elapsed 
#  0.104   0.020   0.123

system.time({
  sdm <- diff(t(m2))
  nc <- ncol(m) - 1
  colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
})
#   user  system elapsed 
#  0.248   0.032   0.279

system.time({
  apply(m2[,-1] - m2[,-ncol(m2)], 1, function(x) all(x>=0) || all(x <= 0))
})
#   user  system elapsed 
#  3.724   0.004   3.731

library(matrixStats)
system.time(rowVarDiffs(m2) == 0)
#   user  system elapsed 
# 40.176   1.156  42.071

sebastian-c · Answer

I went for a recycling approach:

n <- t(m)

forwards <- colSums(n == sort(m[1,])) == ncol(m)
backwards  <- colSums(n == rev(sort(m[1,]))) == ncol(m)

vec <- forwards | backwards
unvec <- apply(m, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))

identical(vec, unvec)
[1] TRUE

Extract sorted rows from matrix

Tags:

sorting

r

matrix

989

2 Answers

thelatemail

sebastian-c

Recent Activity

Donate For Us

Extract sorted rows from matrix

Tags:

sorting

r

matrix

989

2 Answers

thelatemail

sebastian-c

Related questions

Recent Activity

Donate For Us