i have the following list and I would want to add a new row before each group of ID's preserving the ID and setting the A and B to 1.00.
ID DATEE A B
102984 2016-11-23 2.0 2.0
140349 2016-11-23 1.5 1.5
167109 2017-04-16 2.0 2.0
167109 2017-06-21 1.5 1.5
The end result:
ID DATEE A B
102984 NA 1.0 1.0
102984 2016-11-23 2.0 2.0
140349 NA 1.0 1.0
140349 2016-11-23 1.5 1.5
167109 NA 1.0 1.0
167109 2017-04-16 2.0 2.0
167109 2017-06-21 1.5 1.5
Up until now I used the following code which adds an empty row at the bottom of each group do.call(rbind, by(df,df$ID,rbind,"")) however I couldn't introduce the specific values in their respective columns when I substituted "" by a vector of values.
Here is one option with tidyverse. We get the distinct rows of dataset by 'ID', mutate the variables 'A', 'B' to 1, and 'DATEE' to NA, then with bind_rows row bind with the original dataset and arrange by 'ID'
library(tidyverse)
df1 %>%
distinct(ID, .keep_all= TRUE) %>%
mutate_at(vars("A", "B"), funs((1))) %>%
mutate(DATEE = NA) %>%
bind_rows(., df1) %>%
arrange(ID)
# ID DATEE A B
#1 102984 <NA> 1.0 1.0
#2 102984 2016-11-23 2.0 2.0
#3 140349 <NA> 1.0 1.0
#4 140349 2016-11-23 1.5 1.5
#5 167109 <NA> 1.0 1.0
#6 167109 2017-04-16 2.0 2.0
#7 167109 2017-06-21 1.5 1.5
(I'll assume the date formatting has been fixed, e.g., with df1$DATEE = as.Date(df1$DATEE).)
Or translated to base R:
new1 = data.frame(ID = unique(df1$ID), DATEE = Sys.Date()[NA_integer_], A = 1, B = 1)
tabs = list(new1, df1)
res = do.call(rbind, tabs)
res <- res[order(res$ID), ]
# ID DATEE A B
# 1 102984 <NA> 1.0 1.0
# 4 102984 2016-11-23 2.0 2.0
# 2 140349 <NA> 1.0 1.0
# 5 140349 2016-11-23 1.5 1.5
# 3 167109 <NA> 1.0 1.0
# 6 167109 2017-04-16 2.0 2.0
# 7 167109 2017-06-21 1.5 1.5
Or with data.table:
library(data.table)
new1 = data.table(ID = unique(df1$ID), DATEE = Sys.Date()[NA_integer_], A = 1, B = 1)
tabs = list(new1, df1)
res = rbindlist(tabs)
setorder(res)
# ID DATEE A B
#1: 102984 <NA> 1.0 1.0
#2: 102984 2016-11-23 2.0 2.0
#3: 140349 <NA> 1.0 1.0
#4: 140349 2016-11-23 1.5 1.5
#5: 167109 <NA> 1.0 1.0
#6: 167109 2017-04-16 2.0 2.0
#7: 167109 2017-06-21 1.5 1.5
There are some other ways, too:
# or let DATEE and other cols be filled as NA
library(data.table)
new1 = data.table(ID = unique(df1$ID), A = 1, B = 1)
tabs = list(df1, new1)
res = rbindlist(tabs, fill = TRUE, idcol = "src")
setorder(res, ID, -src)
res[, src := NULL ]
# or a more compact option (assuming df1$A has no missing values)
library(data.table)
setDT(df1)[, .SD[c(.N+1, seq_len(.N))], ID][is.na(A), c("A", "B") := 1][]
Here are two solutions with base R
Split into sub-groups based on ID, add a row to the top of each sub-group, and rbind everything back at the end.
do.call(rbind, lapply(split(df, df$ID), function(a){
rbind(setNames(c(a$ID[1], NA, 1, 1), names(a)), a)
}))
# ID DATEE A B
#102984.1 102984 <NA> 1.0 1.0
#102984.2 102984 2016-11-23 2.0 2.0
#140349.1 140349 <NA> 1.0 1.0
#140349.2 140349 2016-11-23 1.5 1.5
#167109.1 167109 <NA> 1.0 1.0
#167109.3 167109 2017-04-16 2.0 2.0
#167109.4 167109 2017-06-21 1.5 1.5
Or you could initially replicate the first rows (by identifying them with ave) and then substitute appropriate values in each column.
df = df[sort(c(1:NROW(df), which(ave(df$A, df$ID, FUN = seq_along) == 1))),]
df$DATEE = replace(df$DATEE, which(ave(df$A, df$ID, FUN = seq_along) == 1), NA)
df$A = replace(df$A, which(ave(df$A, df$ID, FUN = seq_along) == 1), 1)
df$B = replace(df$B, which(ave(df$A, df$ID, FUN = seq_along) == 1), 1)
df
# ID DATEE A B
#1 102984 <NA> 1.0 1.0
#1.1 102984 2016-11-23 2.0 2.0
#2 140349 <NA> 1.0 1.0
#2.1 140349 2016-11-23 1.5 1.5
#3 167109 <NA> 1.0 1.0
#3.1 167109 2017-04-16 2.0 2.0
#4 167109 2017-06-21 1.5 1.5
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With