i have the following list and I would want to add a new row before each group of ID's preserving the ID and setting the A and B to 1.00.
ID DATEE A B
102984 2016-11-23 2.0 2.0
140349 2016-11-23 1.5 1.5
167109 2017-04-16 2.0 2.0
167109 2017-06-21 1.5 1.5
The end result:
ID DATEE A B
102984 NA 1.0 1.0
102984 2016-11-23 2.0 2.0
140349 NA 1.0 1.0
140349 2016-11-23 1.5 1.5
167109 NA 1.0 1.0
167109 2017-04-16 2.0 2.0
167109 2017-06-21 1.5 1.5
Up until now I used the following code which adds an empty row at the bottom of each group do.call(rbind, by(df,df$ID,rbind,"")) however I couldn't introduce the specific values in their respective columns when I substituted "" by a vector of values.
Here is one option with tidyverse
. We get the distinct
rows of dataset by 'ID', mutate
the variables 'A', 'B' to 1, and 'DATEE' to NA, then with bind_rows
row bind with the original dataset and arrange
by 'ID'
library(tidyverse)
df1 %>%
distinct(ID, .keep_all= TRUE) %>%
mutate_at(vars("A", "B"), funs((1))) %>%
mutate(DATEE = NA) %>%
bind_rows(., df1) %>%
arrange(ID)
# ID DATEE A B
#1 102984 <NA> 1.0 1.0
#2 102984 2016-11-23 2.0 2.0
#3 140349 <NA> 1.0 1.0
#4 140349 2016-11-23 1.5 1.5
#5 167109 <NA> 1.0 1.0
#6 167109 2017-04-16 2.0 2.0
#7 167109 2017-06-21 1.5 1.5
(I'll assume the date formatting has been fixed, e.g., with df1$DATEE = as.Date(df1$DATEE)
.)
Or translated to base R:
new1 = data.frame(ID = unique(df1$ID), DATEE = Sys.Date()[NA_integer_], A = 1, B = 1)
tabs = list(new1, df1)
res = do.call(rbind, tabs)
res <- res[order(res$ID), ]
# ID DATEE A B
# 1 102984 <NA> 1.0 1.0
# 4 102984 2016-11-23 2.0 2.0
# 2 140349 <NA> 1.0 1.0
# 5 140349 2016-11-23 1.5 1.5
# 3 167109 <NA> 1.0 1.0
# 6 167109 2017-04-16 2.0 2.0
# 7 167109 2017-06-21 1.5 1.5
Or with data.table:
library(data.table)
new1 = data.table(ID = unique(df1$ID), DATEE = Sys.Date()[NA_integer_], A = 1, B = 1)
tabs = list(new1, df1)
res = rbindlist(tabs)
setorder(res)
# ID DATEE A B
#1: 102984 <NA> 1.0 1.0
#2: 102984 2016-11-23 2.0 2.0
#3: 140349 <NA> 1.0 1.0
#4: 140349 2016-11-23 1.5 1.5
#5: 167109 <NA> 1.0 1.0
#6: 167109 2017-04-16 2.0 2.0
#7: 167109 2017-06-21 1.5 1.5
There are some other ways, too:
# or let DATEE and other cols be filled as NA
library(data.table)
new1 = data.table(ID = unique(df1$ID), A = 1, B = 1)
tabs = list(df1, new1)
res = rbindlist(tabs, fill = TRUE, idcol = "src")
setorder(res, ID, -src)
res[, src := NULL ]
# or a more compact option (assuming df1$A has no missing values)
library(data.table)
setDT(df1)[, .SD[c(.N+1, seq_len(.N))], ID][is.na(A), c("A", "B") := 1][]
Here are two solutions with base R
Split into sub-groups based on ID
, add a row to the top of each sub-group, and rbind
everything back at the end.
do.call(rbind, lapply(split(df, df$ID), function(a){
rbind(setNames(c(a$ID[1], NA, 1, 1), names(a)), a)
}))
# ID DATEE A B
#102984.1 102984 <NA> 1.0 1.0
#102984.2 102984 2016-11-23 2.0 2.0
#140349.1 140349 <NA> 1.0 1.0
#140349.2 140349 2016-11-23 1.5 1.5
#167109.1 167109 <NA> 1.0 1.0
#167109.3 167109 2017-04-16 2.0 2.0
#167109.4 167109 2017-06-21 1.5 1.5
Or you could initially replicate the first rows (by identifying them with ave) and then substitute appropriate values in each column.
df = df[sort(c(1:NROW(df), which(ave(df$A, df$ID, FUN = seq_along) == 1))),]
df$DATEE = replace(df$DATEE, which(ave(df$A, df$ID, FUN = seq_along) == 1), NA)
df$A = replace(df$A, which(ave(df$A, df$ID, FUN = seq_along) == 1), 1)
df$B = replace(df$B, which(ave(df$A, df$ID, FUN = seq_along) == 1), 1)
df
# ID DATEE A B
#1 102984 <NA> 1.0 1.0
#1.1 102984 2016-11-23 2.0 2.0
#2 140349 <NA> 1.0 1.0
#2.1 140349 2016-11-23 1.5 1.5
#3 167109 <NA> 1.0 1.0
#3.1 167109 2017-04-16 2.0 2.0
#4 167109 2017-06-21 1.5 1.5
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With