base R, dplyr, and data.table can't rbind data frame containing data frame columns :
x <- data.frame(a=1)
x$b <- data.frame(z=2)
y <- data.frame(a=3)
y$b <- data.frame(z=4)
# base and dplyr fail
rbind(x, y)
#> Warning: non-unique value when setting 'row.names': '1'
#> Error in `.rowNamesDF<-`(x, value = value): duplicate 'row.names' are not allowed
dplyr::bind_rows(x,y)
#> Error: Argument 2 can't be a list containing data frames
# data.table gives a result that doesn't make much sense to me
str(data.table::rbindlist(list(x,y)))
#> Warning in setDT(ans): Some columns are a multi-column type (such as a matrix
#> column): [2]. setDT will retain these columns as-is but subsequent operations
#> like grouping and joining may fail. Please consider as.data.table() instead
#> which will create a new column for each embedded column.
#> Classes 'data.table' and 'data.frame': 2 obs. of 2 variables:
#> $ a: num 1 3
#> $ b:'data.frame': 1 obs. of 2 variables:
#> ..$ : num 2
#> ..$ : num 4
#> - attr(*, ".internal.selfref")=<externalptr>
Created on 2020-01-03 by the reprex package (v0.3.0)
My expected output would be to rbind the data frame columns so we'd end up with something like res
below :
res <- data.frame(a= c(1,3))
res$b <- data.frame(z = c(3,4))
res
#> a z
#> 1 1 3
#> 2 3 4
str(res)
#> 'data.frame': 2 obs. of 2 variables:
#> $ a: num 1 3
#> $ b:'data.frame': 2 obs. of 1 variable:
#> ..$ z: num 3 4
How can I work around this ?
We can bind the data frame columns separately from the regular columns, here are 3 similar solutions wrapping the 3 functions mentioned in the question :
base R
rbind_fixed <- function(...){
dfs <- list(...)
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- do.call(rbind, dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
data.frame(row.names = seq.int(nrow(df)))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- do.call(rbind_fixed, subdfs)
}
res
}
rbind_fixed(x, y)
#> a z
#> 1 1 2
#> 2 3 4
dplyr
bind_rows_fixed <- function(...){
# use list2() so we can use `!!!`, as we lose the "autosplice" feature of bind_rows
dfs <- rlang::list2(...)
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- dplyr::bind_rows(dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
tibble(.rows = nrow(df))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- bind_rows_fixed(!!!subdfs)
}
res
}
bind_rows_fixed(x,y)
#> a z
#> 1 1 2
#> 2 3 4
data.table
rbindlist_fixed <- function(l){
dfs <- l
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- data.table::rbindlist(dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
data.frame(row.names = seq.int(nrow(df)))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- rbindlist_fixed(subdfs)
}
res
}
dt <- rbindlist_fixed(list(x,y))
dt
#> a b
#> 1: 1 <multi-column>
#> 2: 3 <multi-column>
str(dt)
#> Classes 'data.table' and 'data.frame': 2 obs. of 2 variables:
#> $ a: num 1 3
#> $ b:Classes 'data.table' and 'data.frame': 2 obs. of 1 variable:
#> ..$ z: num 2 4
#> ..- attr(*, ".internal.selfref")=<externalptr>
#> - attr(*, ".internal.selfref")=<externalptr>
The problem seems to be that the bind
functions have trouble with the row names of the data frame b
inside x
/y
. We can avoid this in basic R by renaming the rows (see below).
Important note: dplyr
is able to handle this example by now. No workarounds are required anymore.
# Setup
x <- data.frame(a=1)
x$b <- data.frame(z=2)
y <- data.frame(a=3)
y$b <- data.frame(z=4)
rbind(x, y) # still does not work
#> Warning: non-unique value when setting 'row.names': '1'
#> Error in `.rowNamesDF<-`(x, value = value): duplicate 'row.names' are not allowed
require(dplyr)
dplyr::bind_rows(x,y) # works!!!
#> a z
#> 1 1 2
#> 2 3 4
# Avoid conflicting row names
row.names(x) <- seq(nrow(y)+1, nrow(y)+nrow(x))
row.names(x$b) <- seq(nrow(y)+1, nrow(y)+nrow(x))
rbind(x, y) #works now, too
#> a z
#> 2 1 2
#> 1 3 4
Created on 2020-06-27 by the reprex package (v0.3.0)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With