Append first row by group to original data

Question

I have data with a grouping variable "ID" and some values:

ID, Value
1, 1
1, 2
1, 3
1, 4
2, 5
2, 6 
2, 7
2, 8

Within each group, I want to append the first row after the last row. Desired result:

ID, Value
1, 1
1, 2
1, 3
1, 4
1, 1 # First row of ID 1 inserted as last row in the group
2, 5
2, 6 
2, 7
2, 8
2, 5 # First row of ID 2 inserted as last row in the group

I have 5000 row of this.

jblood94 · Accepted Answer

And with data.table:

library(data.table)

dt <- data.table(ID = rep(1:2, each = 4), Value = 1:8)
dt[,.(Value = c(Value, first(Value))), ID]
#>     ID Value
#>  1:  1     1
#>  2:  1     2
#>  3:  1     3
#>  4:  1     4
#>  5:  1     1
#>  6:  2     5
#>  7:  2     6
#>  8:  2     7
#>  9:  2     8
#> 10:  2     5

Benchmarking with a 5000-row table:

library(dplyr)

dt <- data.table(ID = rep(1:1250, each = 4), Value = 1:5e3)

f1 <- function(dt) dt[,.(Value = c(Value, first(Value))), ID]
# base R
f2 <- function(dt) do.call(rbind, lapply(split(dt, by = "ID"), function(x) rbind(x, x[1,])))
# tidyverse
f3 <- function(dt) {
  dt %>%
    group_by(ID) %>%
    do(add_row(., ID = unique(.$ID), Value = first(.$Value))) %>%
    ungroup()
}
f4 <- function(dt) {
  dt %>%
    group_by(ID) %>%
    group_modify(~ add_row(., Value = first(.$Value))) %>%
    ungroup()
}

microbenchmark::microbenchmark(data.table = f1(dt),
                               "base R" = f2(dt),
                               tidyverse1 = f3(dt),
                               tidyverse2 = f4(dt),
                               times = 10)
#> Unit: milliseconds
#>        expr      min       lq      mean   median       uq      max neval
#>  data.table   3.4989   3.6844   4.16619   4.3085   4.4623   5.3020    10
#>      base R 245.1397 263.1636 283.61131 284.8053 307.7105 310.3901    10
#>  tidyverse1 761.7097 773.3705 791.05115 787.9463 808.5416 821.5321    10
#>  tidyverse2 711.9593 716.4959 752.20273 728.2170 782.6474 837.1926    10

If speed is really important, this simple Rcpp function provides a very fast solution:

Rcpp::cppFunction(
  "IntegerVector firstLast(const IntegerVector& x) {
    const int n = x.size();
    IntegerVector idxOut(2*n);
    int i0 = 1;
    int idx = 0;
    idxOut(0) = 1;
    
    for (int i = 1; i < n; i++) {
      if (x(i) != x(i - 1)) {
        idxOut(++idx) = i0;
        i0 = i + 1;
      }
      idxOut(++idx) = i + 1;
    }
    idxOut(++idx) = i0;
    return idxOut[Rcpp::Range(0, idx)];
  }"
)

Benchmarking against the fastest solution from this answer (on a much larger dataset):

dt = data.table(ID = rep(1:125e4, each = 4), Value = 1:5e6)

microbenchmark::microbenchmark(
  f_uniq_dt = setorder(rbindlist(list(dt, unique(dt, by = "ID"))), ID),
  f_Rcpp = dt[firstLast(dt$ID)],
  check = "equal"
)
#> Unit: milliseconds
#>      expr     min       lq     mean   median       uq      max neval
#> f_uniq_dt 78.6056 83.71345 95.42876 85.80720 90.03685 175.8867   100
#>    f_Rcpp 49.1485 53.38275 60.96322 55.44925 58.01485 121.3637   100

harre · Answer

Within tidyverse, you could use add_row with do (now deprecated) or group_modify (experimental):

dat |>
  group_by(ID) |>
  do(add_row(., ID = unique(.$ID), Value = first(.$Value))) |>
  ungroup()

dat |>
  group_by(ID) |>
  group_modify(~ add_row(., Value = first(.$Value))) |>
  ungroup()

Or bind_rows with summarize (my variation of @Gregor Thomas, thanks):

dat |> 
  group_by(ID) |>
  summarize(bind_rows(cur_data(), head(cur_data(), 1))) |>
  ungroup()

Or by applying the same logic of @Henrik using bind_rows, filter, and arrange:

dat |>
  bind_rows(dat |> filter(!duplicated(ID))) |>
  arrange(ID)

Output:

# A tibble: 10 × 2
      ID Value
   <int> <dbl>
 1     1     1
 2     1     2
 3     1     3
 4     1     4
 5     1     1
 6     2     5
 7     2     6
 8     2     7
 9     2     8
10     2     5

Thanks to @SamR for the data.

Append first row by group to original data

Tags:

append

r

data-manipulation

ail

2 Answers

jblood94

harre

Recent Activity

Donate For Us

Append first row by group to original data

Tags:

append

r

data-manipulation

ail

2 Answers

jblood94

harre

Related questions

Recent Activity

Donate For Us