I need to find and count the ID's that appear with a 1 after 3 or more consecutive zeros.
This is a example of what I have:
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# 1 0 0 0 1 0 0 1 1 1 0
# 2 0 0 0 0 0 0 1 0 0 0
# 3 0 0 0 0 0 0 0 0 0 1
# 4 1 0 0 1 0 1 0 1 0 1
# 5 0 0 1 0 0 1 1 0 0 1
c1<- c("ID","Jan","Feb", "Mar","Apr", "May","Jun", "Jul", "Aug", "Sept", "Oct")
c2<- c(1,0,0,0,1,0,0,1,1,1,0)
c3<- c(2,0,0,0,0,0,0,1,0,0,0)
c4<- c(3,0,0,0,0,0,0,0,0,0,1)
c5<- c(4,1,0,0,1,0,1,0,1,0,1)
c6<- c(5,0,0,1,0,0,1,1,0,0,1)
BD<-data.frame(rbind(c2,c3,c4,c5,c6))
colnames(BD)<-c1
The result of what I expect is something like this:
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# 1 0 0 0 1 0 0 1 1 1 0
# 2 0 0 0 0 0 0 1 0 0 1
# 3 0 0 0 0 0 0 0 0 0 1
Anyone know how to do it? Thanks!
You could collapse to string and use grep() to search for pattern.
k <- 3
grep(sprintf(paste0("%0", k + 1, "d"), 1), apply(d[-1], 1, paste, collapse=""))
# [1] 2 4 5 6 8
If no following 1 is needed you could use the rle().
d
# id Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# c1 1 1 1 0 1 1 0 0 1 1 1 0 0
# c2 2 0 0 0 1 1 1 0 1 1 0 1 0
# c3 3 1 0 0 1 1 0 1 1 1 0 1 0
# c4 4 0 0 0 0 0 1 1 0 0 1 1 0
# c5 5 0 0 0 1 1 1 1 0 0 1 0 1
# c6 6 1 0 0 0 1 0 1 0 0 0 0 1
# c7 7 0 1 0 0 1 0 1 1 1 0 0 1
# c8 8 0 1 1 1 1 1 1 1 0 0 0 1
# c9 9 0 1 0 0 1 1 0 0 1 1 1 0
# c10 10 1 1 0 1 0 1 1 0 0 1 0 1
k <- 3
d$id[sapply(as.data.frame(t(d[-1])), function(x) any(rle(x)$lengths[rle(x)$values == 0] >= k))]
# [1] 2 4 5 6 8
Data:
set.seed(0)
d <- data.frame(id=1:10,
`dimnames<-`(matrix(sample(0:1, 120, r=1), 10),
list(paste0("c", 1:10), month.abb)))
If you take the rowid(rleid(x)) of a vector x you get the number of steps into each "run" each element is*. You can check that this is >= 3 and the element is 0. If that is true for the previous element (for the shifted output) and the element is 1, return TRUE. Then check if this is TRUE for any of the elements in the row.
library(data.table)
rows <-
apply(BD, 1, function(r) any(shift(rowid(rleid(r)) >= 3 & r == 0) & r == 1))
BD[rows,]
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# c2 1 0 0 0 1 0 0 1 1 1 0
# c3 2 0 0 0 0 0 0 1 0 0 0
# c4 3 0 0 0 0 0 0 0 0 0 1
* Here's an example for a particular row (the first)
rbind(
rowid_rleid = rowid(rleid(unlist(BD[1,]))),
original = unlist(BD[1,]))
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# rowid_rleid 1 1 2 3 1 1 2 1 2 3 1
# original 1 0 0 0 1 0 0 1 1 1 0
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With