Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Use select_helpers with dplyr::coalesce

Tags:

r

dplyr

I have a very wide dataframe (much larger than the data provided here for reprex).

Using the data provided below (assigned to my_wide_data), I would like to utilize dplyr::coalesce along with the select helpers from dplyr (e.g. dplyr::starts_with).

# dput output assigned to my_wide_data    
structure(list(myvar1 = c(10L, 3L, 11L, 2L, 4L, 5L, 2L, 6L, 1L, 
                              4L, 12L, 9L, 12L, 2L, 3L, 1L, 2L, 8L, 1L, 2L, 3L, 3L, 8L, 11L, 
                              10L, 6L, 3L, 10L, 5L, 2L, 8L, 3L, 1L, 6L, 2L, 1L, 8L, 4L, 10L, 
                              3L, 1L, 4L, 2L, 12L, 3L, 2L, 5L, 1L, 3L, 5L, 3L, 2L, 12L, 3L, 
                              6L, 11L, 12L, 2L, 6L, 10L, 3L, 10L, 3L, 2L, 2L, 2L, 2L, 3L, 6L, 
                              3L, 6L, 10L, 1L, 3L, 3L, 6L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 10L, 
                              3L, 3L, 4L, 1L, 3L, 2L, 3L, 9L, 1L, 1L, NA, 5L, 1L, 8L, 3L, 10L, 
                              3L, 3L, 4L, 7L, 10L, 2L, 2L, 11L, 6L, 11L, 6L, 4L, 4L, 12L, 6L, 
                              6L, 1L, 2L, 11L, 2L, 2L, 11L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 9L, 
                              2L, 1L, 1L, 4L, 2L, 8L, 2L, 10L, 6L, 3L, 1L, 6L, 2L, 10L, 3L, 
                              5L, 6L, 3L, 4L, 10L, 9L, 3L, 4L, 3L, 2L, 3L, 9L, 3L, 3L, 1L, 
                              10L, 4L, 4L, 6L, 2L, 7L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 7L, 2L, 
                              2L, 6L, 2L, 4L, 3L, 3L, 4L, 2L, 4L, 2L, 5L, 5L, 3L, 6L, 5L, 4L, 
                              5L, 4L, 4L, 10L, 1L, 9L, 4L, 4L, 4L, 4L, 8L, 6L, 5L), myvar2 = c(24L, 
                                                                                               24L, 27L, 8L, 9L, 15L, 1L, 27L, 3L, 23L, 28L, 10L, 24L, 5L, 14L, 
                                                                                               17L, 16L, 28L, 29L, 16L, 3L, 13L, 7L, 13L, 18L, 25L, 10L, 10L, 
                                                                                               15L, 27L, 21L, 17L, 25L, 25L, 15L, 25L, 21L, 13L, 9L, 28L, 1L, 
                                                                                               13L, 19L, 21L, 23L, 15L, NA, 29L, 12L, 25L, 1L, 5L, 12L, 7L, 
                                                                                               15L, 25L, 4L, 8L, 30L, 25L, 8L, NA, 6L, 16L, 14L, 7L, 20L, 26L, 
                                                                                               19L, 10L, 1L, 15L, 30L, 7L, 16L, 23L, 24L, 21L, 8L, 1L, 1L, 10L, 
                                                                                               26L, 28L, 5L, 7L, 21L, 10L, 13L, 26L, 14L, 5L, 22L, 18L, NA, 
                                                                                               NA, 9L, 20L, 17L, 23L, 3L, 13L, 7L, 5L, 6L, 9L, 8L, 15L, 9L, 
                                                                                               10L, 15L, 13L, NA, 30L, 22L, 14L, 9L, 16L, 6L, 13L, 19L, 15L, 
                                                                                               1L, 7L, 19L, 25L, 10L, NA, 8L, 25L, 5L, 2L, 16L, 8L, 19L, 18L, 
                                                                                               27L, 2L, NA, 16L, 29L, 4L, 7L, 27L, 24L, 5L, 6L, 17L, 16L, 13L, 
                                                                                               11L, NA, 12L, 9L, 8L, 1L, NA, 5L, 12L, 3L, 3L, 10L, 16L, 16L, 
                                                                                               5L, 24L, 10L, 17L, 23L, 19L, 12L, 12L, 18L, 6L, 1L, 3L, 15L, 
                                                                                               26L, 28L, 28L, 27L, 3L, 18L, 22L, 13L, 11L, 30L, 24L, 1L, 25L, 
                                                                                               21L, 7L, 14L, 16L, 9L, 3L, 28L, 11L, 17L, 11L, 25L, 23L, 7L, 
                                                                                               21L), myvar3 = c(78L, 79L, 78L, 78L, 79L, 78L, 79L, 77L, 79L, 
                                                                                                                79L, 76L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                78L, 78L, 78L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                79L, 78L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 73L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 72L, 79L, 78L, 78L, 78L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 78L, 79L, 78L, 78L, 79L, 79L, 
                                                                                                                79L, 76L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 75L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 78L, 79L, 78L, 79L, 78L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 79L, 78L, 78L, 78L, 78L, 79L, 79L, 
                                                                                                                78L, 78L, 76L, 79L, 76L, 77L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 78L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 79L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 78L, 79L, 78L, 79L, 78L, 79L, 79L, 79L, 79L, 76L, 78L, 79L
                                                                                               )), class = "data.frame", row.names = c(NA, -204L)) -> my_wide_data

In other words, instead of

my_wide_data %>%
  mutate(coalesce_var <- coalesce(myvar1, myvar2, myvar3))

I would like to be able to do something like

my_wide_data %>%
  mutate(coalesce_var <- coalesce(starts_with("my")))

QUESTION: Is it possible to accomplish something like this within dplyr or elsewhere in the tidyverse?

like image 949
joemienko Avatar asked Apr 29 '18 15:04

joemienko


1 Answers

The following works by taking advantage that coalesce(...) can accept a list

vecs <- list(
  c(1, 2, NA, NA, 5),
  c(NA, NA, 3, 4, 5)
)
coalesce(!!! vecs)

Which you can combine with using a helper function in select and turning the resulting selected data frame into a list

my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! select(., starts_with("my"))))

#     myvar1 myvar2 myvar3 coalesce_var
# 1       10     24     78           10
# 2        3     24     79            3
# 3       11     27     78           11
# 4        2      8     78            2
# 5        4      9     79            4
# etc

EDIT Here's an alternative construction - which I prefer

library(rlang)
library(tidyselect)
my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! syms(vars_select(names(.), starts_with("my")))))
like image 131
CPak Avatar answered Nov 01 '22 12:11

CPak