Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Setting column names when using bind_cols (r, dplyr)

Tags:

r

dplyr

I have a data.frame (df) which contains another data.frame called url_variables.

url_variables = df$url_variables

url_variables contains many other data.frames such as source, campaign, page and many others. Each of these data frames has the 3 columns key, value and type. I'm using bind_cols to combine all the individual data frames into one flat data frame.

flat_url_variables = bind_cols(as.list(url_variables))

The problem is the column names for flat_url_variables are the non-descriptive key, value, type, key, value, type etc. How can I name the columns of flat_url_variables from the name of each individual url variable data.frame (source_key, source_value, source_type, campaign_key etc)?

EDIT Here is a small sample of url_variables as an image: enter image description here

Here is the output of dput(head(url_variables)):

structure(list(`_privatedomain` = structure(list(key = c("_privatedomain", 
"_privatedomain", "_privatedomain", "_privatedomain", "_privatedomain", 
"_privatedomain"), value = c("t", "t", "t", "t", "t", "t"), type = c("url", 
"url", "url", "url", "url", "url")), .Names = c("key", "value", 
"type"), row.names = c(NA, 6L), class = "data.frame"), p = structure(list(
    key = c("p", NA, NA, "p", "p", "p"), value = c("2", NA, NA, 
    "2", "2", "2"), type = c("url", NA, NA, "url", "url", "url"
    )), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), s = structure(list(key = c("s", NA, 
NA, "s", "s", "s"), value = c("incomplete", NA, NA, "incomplete", 
"incomplete", "incomplete"), type = c("url", NA, NA, "url", "url", 
"url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), first_name = structure(list(key = c("first_name", 
NA, NA, "first_name", "first_name", "first_name"), value = c("Allan", 
NA, NA, "james", "Sheryl", "Yara"), type = c("url", NA, NA, 
"url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), last_name = structure(list(key = c("last_name", 
NA, NA, "last_name", "last_name", "last_name"), value = c("Smith", 
NA, NA, "jones", "Smith", "Keating"), type = c("url", NA, 
NA, "url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), email = structure(list(key = c("email", 
NA, NA, "email", "email", "email"), value = c("[email protected]", 
NA, NA, "[email protected]", "sheryl@email", "[email protected]"
), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
"value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    phone_number = structure(list(key = c("phone_number", NA, 
    NA, "phone_number", "phone_number", "phone_number"), value = c("0401234567", 
    NA, NA, "0401234567", "0401234567", "0401234567"), type = c("url", 
    NA, NA, "url", "url", "url")), .Names = c("key", "value", 
    "type"), row.names = c(NA, 6L), class = "data.frame"), from = structure(list(
        key = c("from", NA, NA, "from", "from", "from"), value = c("landing_page", 
        NA, NA, "landing_page", "landing_page", "landing_page"
        ), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    snc = structure(list(key = c(NA, NA, "snc", NA, NA, NA), 
        value = c(NA, NA, "1495606827_5925262b571d70.64387871", 
        NA, NA, NA), type = c(NA, NA, "url", NA, NA, NA)), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    `__sgtarget` = structure(list(key = c(NA, NA, "__sgtarget", 
    NA, NA, NA), value = c(NA, NA, "10", NA, NA, NA), type = c(NA, 
    NA, "url", NA, NA, NA)), .Names = c("key", "value", "type"
    ), row.names = c(NA, 6L), class = "data.frame"), customertime = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), sotime = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), cancelreschedulelink = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), params = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), icslink = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), type = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame")), .Names = c("_privatedomain", 
"p", "s", "first_name", "last_name", "email", "phone_number", 
"from", "snc", "__sgtarget", "customertime", "sotime", "cancelreschedulelink", 
"params", "icslink", "type"), row.names = c(NA, 6L), class = "data.frame")
like image 200
Nick5a1 Avatar asked Oct 17 '22 10:10

Nick5a1


1 Answers

The most straightforward way if probably to just set the names yourself. Here, I am using lapply to move through each column of url_variables, setting the names directly, then binding the results:

flat <-
  names(url_variables) %>%
  lapply(function(thisVar){
    url_variables[[thisVar]] %>%
      setNames(paste0(thisVar, "_", names(.)))
  }) %>%
  bind_cols()

We can see part of the result as flat[ , 1:6] gives:

  _privatedomain_key _privatedomain_value _privatedomain_type p_key p_value p_type
1     _privatedomain                    t                 url     p       2    url
2     _privatedomain                    t                 url  <NA>    <NA>   <NA>
3     _privatedomain                    t                 url  <NA>    <NA>   <NA>
4     _privatedomain                    t                 url     p       2    url
5     _privatedomain                    t                 url     p       2    url
6     _privatedomain                    t                 url     p       2    url
like image 73
Mark Peterson Avatar answered Oct 21 '22 05:10

Mark Peterson