I have a dataframe where I want to change the column names by matching to another dataframe.
Example dataframe with data and column names:
df <- data.frame("Gene_Symbol" = c("Gene1","Gene2","Gene3","Gene4","Gene5","Gene6","Gene7"),
"Sample1" = c(85657.97656,54417.78906,110949.3281,53197.45313,87156.80469,NA,23880.2832),
"Sample2" = c(10423.40918,41660.73047,40094.54688,49519.78125,129387.1094,NA,23903.25977),
"Sample3" = c(18778.68359,43655.79688,NA,57447.08984,113266.1484,44810.26172,26316.6543),
"Sample4" = c(23919.53125,47829.02344,NA,51478.58203,116275.3359,43110.94922,25417.45508),
"Sample5" = c(NA,46677.20313,63389.45313,48722.15234,NA,77135.52344,40265.6875),
"Sample6" = c(NA,68596.22656,56802.60938,44712.64063,NA,47744.17969,33689.62891),
"Sample7" = c(NA,80506.14844,48722.99219,38629.00781,NA,37885,36638.02344))
The datframe I want to use to exchange from Sample to the Tumor number in df above.
df2 <- data.frame("Sample_name" = c("Sample1","Sample2","Sample3","Sample4","Sample5","Sample6", "Sample7"), "Tumor_name" = c("Tumor56", "Tumor17", "Tumor99", "Tumor2", "Tumor34", "Tumor84", "Tumor51"))
I found a way in dplyr, see below, but it feels very elaborate. Is there an easier way?
library(tidyverse)
df %>%
column_to_rownames("Gene_Symbol")%>% # Bring Gene_Symbol to row name before transpose
t()%>% # Transpose to be able to use join
data.frame()%>% # Transpose makes a matrix - make dataframe again
rownames_to_column("Sample_name")%>% # Bring sample names to column to use join
left_join(., df2, by = "Sample_name", copy = TRUE) %>% # Join by Sample_name column in both data sets
column_to_rownames("Tumor_name")%>% # Bring Tumor names to row name before transpose
select(-Sample_name)%>% # Drop Sample name column
t()%>% # Transpose
data.frame()%>% # Transpose makes a matrix - make dataframe again
rownames_to_column("Gene_Symbol") # Transfer rownames to column again
It would be nice with matching to exchange name, since I can foresee that I will need to do this for subsets of the column names. Looked at rename but could not get it to work. Also, when I transpose, I get a matrix, why is that?
Grateful for help Henrik
Here is a tidyverse friendly solution using the !!!
splice operator.
library(tidyverse)
# original data set up from stack overflow -------------------------------------
df <- data.frame("Gene_Symbol" = c("Gene1","Gene2","Gene3","Gene4","Gene5","Gene6","Gene7"),
"Sample1" = c(85657.97656,54417.78906,110949.3281,53197.45313,87156.80469,NA,23880.2832),
"Sample2" = c(10423.40918,41660.73047,40094.54688,49519.78125,129387.1094,NA,23903.25977),
"Sample3" = c(18778.68359,43655.79688,NA,57447.08984,113266.1484,44810.26172,26316.6543),
"Sample4" = c(23919.53125,47829.02344,NA,51478.58203,116275.3359,43110.94922,25417.45508),
"Sample5" = c(NA,46677.20313,63389.45313,48722.15234,NA,77135.52344,40265.6875),
"Sample6" = c(NA,68596.22656,56802.60938,44712.64063,NA,47744.17969,33689.62891),
"Sample7" = c(NA,80506.14844,48722.99219,38629.00781,NA,37885,36638.02344))
df2 <- data.frame(
"Sample_name" = c("Sample1","Sample2","Sample3","Sample4","Sample5","Sample6", "Sample7"),
"Tumor_name" = c("Tumor56", "Tumor17", "Tumor99", "Tumor2", "Tumor34", "Tumor84", "Tumor51")
)
# create named vector of variable names ----------------------------------------
# values are current variable names, vector names are the new variable names
var_names <- df2 %>%
# new variable names, old variable names
select(Tumor_name, Sample_name) %>%
deframe()
var_names
#> Tumor56 Tumor17 Tumor99 Tumor2 Tumor34 Tumor84 Tumor51
#> "Sample1" "Sample2" "Sample3" "Sample4" "Sample5" "Sample6" "Sample7"
# rename vaiables---------------------------------------------------------------
df_updated <- df %>%
rename(!!!var_names)
df
#> Gene_Symbol Sample1 Sample2 Sample3 Sample4 Sample5 Sample6
#> 1 Gene1 85657.98 10423.41 18778.68 23919.53 NA NA
#> 2 Gene2 54417.79 41660.73 43655.80 47829.02 46677.20 68596.23
#> 3 Gene3 110949.33 40094.55 NA NA 63389.45 56802.61
#> 4 Gene4 53197.45 49519.78 57447.09 51478.58 48722.15 44712.64
#> 5 Gene5 87156.80 129387.11 113266.15 116275.34 NA NA
#> 6 Gene6 NA NA 44810.26 43110.95 77135.52 47744.18
#> 7 Gene7 23880.28 23903.26 26316.65 25417.46 40265.69 33689.63
#> Sample7
#> 1 NA
#> 2 80506.15
#> 3 48722.99
#> 4 38629.01
#> 5 NA
#> 6 37885.00
#> 7 36638.02
df_updated
#> Gene_Symbol Tumor56 Tumor17 Tumor99 Tumor2 Tumor34 Tumor84
#> 1 Gene1 85657.98 10423.41 18778.68 23919.53 NA NA
#> 2 Gene2 54417.79 41660.73 43655.80 47829.02 46677.20 68596.23
#> 3 Gene3 110949.33 40094.55 NA NA 63389.45 56802.61
#> 4 Gene4 53197.45 49519.78 57447.09 51478.58 48722.15 44712.64
#> 5 Gene5 87156.80 129387.11 113266.15 116275.34 NA NA
#> 6 Gene6 NA NA 44810.26 43110.95 77135.52 47744.18
#> 7 Gene7 23880.28 23903.26 26316.65 25417.46 40265.69 33689.63
#> Tumor51
#> 1 NA
#> 2 80506.15
#> 3 48722.99
#> 4 38629.01
#> 5 NA
#> 6 37885.00
#> 7 36638.02
Created on 2022-02-24 by the reprex package (v2.0.1)
We could use match
names(df)[-1] <- as.character(df2$Tumor_name[match(names(df)[-1], df2$Sample_name)])
df
# Gene_Symbol Tumor56 Tumor17 Tumor99 Tumor2 Tumor34 Tumor84 Tumor51
#1 Gene1 85657.98 10423.41 18778.68 23919.53 NA NA NA
#2 Gene2 54417.79 41660.73 43655.80 47829.02 46677.20 68596.23 80506.15
#3 Gene3 110949.33 40094.55 NA NA 63389.45 56802.61 48722.99
#4 Gene4 53197.45 49519.78 57447.09 51478.58 48722.15 44712.64 38629.01
#5 Gene5 87156.80 129387.11 113266.15 116275.34 NA NA NA
#6 Gene6 NA NA 44810.26 43110.95 77135.52 47744.18 37885.00
#7 Gene7 23880.28 23903.26 26316.65 25417.46 40265.69 33689.63 36638.02
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With