I have a dataframe as the following :
COL_1 COL_2 COL_3 COL_4 COL_5 COL_6
<int> <int> <int> <int> <int> <int>
1 1 1 1 1 1 1
2 1 1 1 1 1 2
3 1 1 1 1 1 3
4 1 1 1 1 1 4
5 1 2 1 1 1 5
6 1 1 1 1 1 6
7 1 3 4 5 6 7
8 1 1 1 1 1 8
9 1 1 9 1 1 9
10 1 3 5 7 9 10
I'd like to filter this dataset to keep only values from COL_1 to COL_6 strictly increasing, so it would be as the following:
COL_1 COL_2 COL_3 COL_4 COL_5 COL_6
<int> <int> <int> <int> <int> <int>
7 1 3 4 5 6 7
10 1 3 5 7 9 10
EDIT : The code should be used in a function with a dynamic number of columns (which will be named from COL_1 to COL_N). A "basic" code such as
df %>% filter(COL_6 > COL_5 & ... & COL_2 > COL_1)
will not work in my situation. Thank you very much
apply by rowsdf[!colSums(apply(df, 1, diff) <= 0), ]
# COL_1 COL_2 COL_3 COL_4 COL_5 COL_6
# 7 1 3 4 5 6 7
# 10 1 3 5 7 9 10
The trick is that: ! + numeric vector will convert non-zeros to FALSE and zeros to TRUE.
!(-3:3)
# [1] FALSE FALSE FALSE TRUE FALSE FALSE FALSE
Map+Reduce (Much Faster)df[Reduce(`&`, Map(`>`, df[-1], df[-ncol(df)])), ]
library(microbenchmark)
bm <- microbenchmark(
MrFlick = df[Reduce(\(x, y) { list(y, x[[2]] & (x[[1]] < y)) }, df, init = list(df[[1]]-1, TRUE))[[2]], ],
Darren_2 = df[Reduce(`&`, Map(`>`, df[-1], df[-ncol(df)])), ],
LMc = df[lapply(df, increasing())[[length(df)]], ],
Thomas_2 = df[rowSums(df[-1] > df[-ncol(df)]) == ncol(df) - 1, ],
Thomas_1 = df[rowMeans(df[-1] > df[-ncol(df)]) == 1, ],
zx8754_1 = df[ apply(df, 1, \(i) !is.unsorted(i, strictly = TRUE)), ],
Darren_1 = df[!colSums(apply(df, 1, diff) <= 0), ],
zx8754_2 = df[ apply(df, 1, \(i) all(rank(i) == seq.int(ncol(df)))), ],
setup = {
nr <- 1e4
df <- as.data.frame(matrix(runif(1e6, 0, 100), nr, 1e2))
# pick 100 rows to sort
ind <- sample(1:nr, 100)
df[ind, ] <- t(apply(df[ind, ], 1, sort))
},
unit = "relative"
)
# Unit: relative
# expr min lq mean median uq max neval
# MrFlick 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 100
# Darren_2 1.106325 1.094651 1.076697 1.087586 1.005100 1.4959953 100
# LMc 1.343194 1.341192 1.250389 1.331452 1.154049 0.9628282 100
# Thomas_2 1.582195 1.572858 1.521608 1.574595 1.382800 1.3160589 100
# Thomas_1 1.580272 1.563802 1.375415 1.552095 1.301315 1.1092026 100
# zx8754_1 6.116539 6.362767 5.427589 6.896683 4.752581 2.2093543 100
# Darren_1 23.723654 25.009280 20.208686 25.972528 19.187991 4.9937860 100
# zx8754_2 44.612050 46.052300 34.288879 45.622591 29.601381 7.9472758 100

With a bit of work, you could use Reduce for this. For example
keep <- Reduce(function(x, y) {
list(y, x[[2]] & (x[[1]] < y))
}, dd, init=list(dd[[1]]-1, TRUE))[[2]]
which(keep)
# [1] 7 10
dd[keep, ]
# COL_1 COL_2 COL_3 COL_4 COL_5 COL_6
# 7 1 3 4 5 6 7
# 10 1 3 5 7 9 10
Tested with
dd <- read.table(text="
COL_1 COL_2 COL_3 COL_4 COL_5 COL_6
1 1 1 1 1 1 1
2 1 1 1 1 1 2
3 1 1 1 1 1 3
4 1 1 1 1 1 4
5 1 2 1 1 1 5
6 1 1 1 1 1 6
7 1 3 4 5 6 7
8 1 1 1 1 1 8
9 1 1 9 1 1 9
10 1 3 5 7 9 10", header=TRUE)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With