I have a deeply nested list I would like to convert to a data frame. Here's what the structure looks like:
ls <- list('10' = list('123' = list('0.1' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)),
'0.2' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8))),
'456' = list ('0.1' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)),
'0.2' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)))),
'20' = list('123' = list('0.1' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)),
'0.2' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8))),
'456' = list ('0.1' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)),
'0.2' = list(Gmax.val = rnorm(1),
G2.val = rnorm(1),
Gmax.vec = rnorm(8),
G2.vec = rnorm(8)))))
> ls[['10']][['123']][['0.1']]
$Gmax.val
[1] -0.1982298
$G2.val
[1] -0.2761515
$Gmax.vec
[1] -0.4732736 -0.5714809 -0.1600405 -0.7138532 0.3503852 -0.7367241 0.3024992 -0.4931045
$G2.vec
[1] -0.2374231 -0.7927135 -0.9554769 0.8733201 -0.4126742 1.8689940 0.1576750 -0.2184344
Each sublist name is a value for a different variable: in this example, maybe:
ls[[]] = time; 10 or 20
ls[[]][[]] = seed; 123 or 456
ls[[]][[]][[]] = treatment; 0.1 or 0.2
Ideally I'd like to have the names of sublists used as values in their own columns. I'd like the data frame to look like this:
# time seed treatment Gmax.val G2.val Gmax.vec G2.vec
#1 10 123 0.1 0.1972457 -0.1224265 0.06121407 1.5102516
#2 10 123 0.1 0.1972457 -0.1224265 -2.53026477 -0.1320042
#3 10 123 0.1 0.1972457 -0.1224265 0.06648820 -0.2477285
#4 10 123 0.1 0.1972457 -0.1224265 -0.45594701 -0.8577670
#5 10 123 0.1 0.1972457 -0.1224265 0.90828911 -1.0710828
#6 10 123 0.1 0.1972457 -0.1224265 0.56427976 1.5086222
Thanks for the help.
Another approach is to:
rrapply()
in the rrapply-package (or similarly with reshape2::melt()
).pivot_wider()
and unnest()
.library(rrapply)
library(tidyverse)
rrapply(ls, how = "melt") %>% ## melt to long df
pivot_wider(names_from = "L4") %>% ## reshape to wide df
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>% ## unnest list columns
rename(time = L1, seed = L2, treatment = L3) ## rename columns
#> # A tibble: 64 x 7
#> time seed treatment Gmax.val G2.val Gmax.vec G2.vec
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 10 123 0.1 -0.626 0.184 -0.836 1.51
#> 2 10 123 0.1 -0.626 0.184 1.60 0.390
#> 3 10 123 0.1 -0.626 0.184 0.330 -0.621
#> 4 10 123 0.1 -0.626 0.184 -0.820 -2.21
#> 5 10 123 0.1 -0.626 0.184 0.487 1.12
#> 6 10 123 0.1 -0.626 0.184 0.738 -0.0449
#> 7 10 123 0.1 -0.626 0.184 0.576 -0.0162
#> 8 10 123 0.1 -0.626 0.184 -0.305 0.944
#> 9 10 123 0.2 0.821 0.594 0.919 -0.478
#> 10 10 123 0.2 0.821 0.594 0.782 0.418
#> # … with 54 more rows
Or using data.table's dcast()
to reshape the long table into wide format:
library(data.table)
long_dt <- as.data.table(rrapply(ls, how = "melt"))
wide_dt <- dcast(long_dt, L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
Some benchmarks
microbenchmark::microbenchmark(
tidyr = {
rrapply(ls, how = "melt") %>%
pivot_wider(names_from = "L4") %>%
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%
rename(time = L1, seed = L2, treatment = L3)
},
data.table = {
wide_dt <- dcast(as.data.table(rrapply(ls, how = "melt")), L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
wide_dt
},
times = 25
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> tidyr 17.959197 20.072647 23.662698 21.278771 25.633581 40.593022 25
#> data.table 2.061861 2.655782 2.966581 2.784425 2.988044 5.032524 25
Here's a solution that uses some of the newer "rectangling" methods in tidyr
. I'm posting this mainly as an exercise to gain and share some familiarity with these functions - my sense is that this approach could definitely be, well, tidied up a bit. Still, it's a nice way to play with swinging back and forth between wide/long list unpacking.
library(tidyverse)
set.seed(1L)
tibble(time = names(data), data = data) %>%
unnest_wider(data) %>%
pivot_longer(-time, names_to = "seed", values_to = "treatment") %>%
unnest_wider(treatment) %>%
pivot_longer(-c(time, seed), names_to = "treatment", values_to = "g_data") %>%
unnest_wider(g_data) %>%
mutate(row_n = row_number()) %>%
pivot_longer(c(Gmax.vec, G2.vec), names_to = "g", values_to = "g_val") %>%
unnest_longer(g_val) %>%
group_by(row_n, time, seed, treatment, Gmax.val, G2.val, g) %>%
mutate(sub_n = row_number()) %>%
pivot_wider(names_from = g, values_from = g_val) %>%
ungroup() %>%
select(-row_n, -sub_n)
# A tibble: 64 x 7
time seed treatment Gmax.val G2.val Gmax.vec G2.vec
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 10 123 0.1 -0.626 0.184 -0.836 1.51
2 10 123 0.1 -0.626 0.184 1.60 0.390
3 10 123 0.1 -0.626 0.184 0.330 -0.621
4 10 123 0.1 -0.626 0.184 -0.820 -2.21
5 10 123 0.1 -0.626 0.184 0.487 1.12
6 10 123 0.1 -0.626 0.184 0.738 -0.0449
7 10 123 0.1 -0.626 0.184 0.576 -0.0162
8 10 123 0.1 -0.626 0.184 -0.305 0.944
9 10 123 0.2 0.821 0.594 0.919 -0.478
10 10 123 0.2 0.821 0.594 0.782 0.418
# … with 54 more rows
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With