I hava a problem, where I need to fill NA by group, but only one (the first) NA before some non-NA value (direction = "up"). I know the tidyr fill() function but do not know how to use it conditionally (so to fill only one NA above some non-NA value).
df <- data.frame(g=c(1,1,1,1,1,1, 2,2,2,2,2,2,2),v=c(NA,NA,5,NA,8,NA, 1,1,NA,2,NA,NA,3))
Data (with group "g" and values "v"):
   g  v
1  1 NA
2  1 NA
3  1  5
4  1 NA
5  1  8
6  1 NA
7  2  1
8  2  1
9  2 NA
10 2  2
11 2 NA
12 2 NA
13 2  3
should become...
   g  v
1  1 NA
2  1  5
3  1  5
4  1  8
5  1  8
6  1 NA
7  2  1
8  2  1
9  2  2
10 2  2
11 2 NA
12 2  3
13 2  3
You can try coalesce + lead
> df %>%
+     mutate(vv = coalesce(v, lead(v)), .by = g)
   g  v vv
1  1 NA NA
2  1 NA  5
3  1  5  5
4  1 NA  8
5  1  8  8
6  1 NA NA
7  2  1  1
8  2  1  1
9  2 NA  2
10 2  2  2
11 2 NA NA
12 2 NA  3
13 2  3  3
With base R, you can define a customize a function f like below and then use ave to update the values by groups
f <- function(x) {
    idx <- which(is.na(x))
    p <- x[idx + 1]
    for (i in seq_along(idx)) {
        if (!is.na(p[i])) {
            x[idx[i]] <- p[i]
        }
    }
    x
}
and you will obtain
> transform(df, vv = ave(v, g, FUN = f))
   g  v vv
1  1 NA NA
2  1 NA  5
3  1  5  5
4  1 NA  8
5  1  8  8
6  1 NA NA
7  2  1  1
8  2  1  1
9  2 NA  2
10 2  2  2
11 2 NA NA
12 2 NA  3
13 2  3  3
ftic0 <- function() {
    df %>%
        mutate(vv = coalesce(v, lead(v)), .by = g)
}
ftic1 <- function() {
    f <- function(x) {
        idx <- which(is.na(x))
        p <- x[idx + 1]
        for (i in seq_along(idx)) {
            if (!is.na(p[i])) {
                x[idx[i]] <- p[i]
            }
        }
        x
    }
    transform(df, vv = ave(v, g, FUN = f))
}
fsbaldur0 <- function() {
    fill_one_up <- \(x) {
        n <- length(x)
        if (n <= 1L) {
            return(x)
        }
        for (i in 2L:n) {
            if (!is.na(x[i]) && is.na(x[i - 1L])) {
                x[i - 1L] <- x[i]
            }
        }
        return(x)
    }
    df |> mutate(vv = fill_one_up(v), .by = g)
}
fsbaldur1 <- function() {
    setDT(df)[, vv := fcoalesce(v, shift(v, -1)), by = g]
    setDF(df)
}
microbenchmark(
    ftic0 = ftic0(),
    ftic1 = ftic1(),
    fsbaldur0 = fsbaldur0(),
    fsbaldur1 = fsbaldur1(),
    unit = "relative",
    check = "equivalent"
)
shows
Unit: relative
      expr      min       lq     mean   median       uq       max neval
     ftic0 8.920659 6.457389 6.810327 5.939112 6.231513 10.436721   100
     ftic1 1.000000 1.000000 1.000000 1.000000 1.000000  1.000000   100
 fsbaldur0 6.199669 4.508141 4.460354 4.107098 4.139735  3.520242   100
 fsbaldur1 2.614049 2.188928 2.375262 2.137102 2.232717  5.698269   100
You can always create your own solution:
fill_one_up <- \(x) {
  n <- length(x)
  if (n <= 1L) return(x)
  for (i in 2L:n) {
    if (!is.na(x[i]) && is.na(x[i-1L])) {
      x[i-1L] <- x[i]
    }
  }
  return(x)
}
df <- df |> mutate(v2 = fill_one_up(v), .by=g)
#    g  v v2
# 1  1 NA NA
# 2  1 NA  5
# 3  1  5  5
# 4  1 NA  8
# 5  1  8  8
# 6  1 NA NA
# 7  2  1  1
# 8  2  1  1
# 9  2 NA  2
# 10 2  2  2
# 11 2 NA NA
# 12 2 NA  3
# 13 2  3  3
If you need more speed you can translate the logic to Rcpp:
Rcpp::cppFunction("NumericVector fill_one_up_cpp(NumericVector x) {
  int n = x.size();
  if (n <= 1) return x;
  for (int i = 1; i < n; i++) {
    if (ISNA(x[i-1]) && !ISNA(x[i])) {
      x[i-1] = x[i];
    }
  }
  return x;
}")
EDIT
Using ThomasIsCoding's idea to first locate all the NAs fill_one_up() can be further simplified:
fill_one_up2 <- \(x) {
  idx <- which(is.na(x))
  idx <- idx[which(!is.na(x[idx + 1]))]
  x[idx] <- x[idx+1]
  x
} 
Which seems can be made faster with collapse:
library(collapse)
fill_one_up4 <- \(x) {
  idx <- whichNA(x)
  idx <- fsubset(idx, whichNA(x[idx + 1], TRUE))
  copyv(x, idx, R = x[idx+1], vind1 = TRUE) # Could play with setv() with care
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With