I have a data frame whose 1st column (weights
) contains a list (of data frames?):
> head(data$weights)
> data <- structure(list(A373R11 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.151631702143023, Signature.4 = 0.149799882118262,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0.0634912587993959,
Signature.8 = 0, Signature.9 = 0.173189155080817, Signature.10 = 0,
Signature.11 = 0, Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0.0905517653558877,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0, Signature.U1 = 0.155590748898003,
Signature.U2 = 0.145955461287919), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R11", class = "data.frame"),
A373R13 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.221014874027829, Signature.4 = 0,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.279252211893692,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0.115216422668955, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0636987713225648, Signature.U1 = 0.108875099907467,
Signature.U2 = 0), .Names = c("Signature.1A", "Signature.1B",
"Signature.2", "Signature.3", "Signature.4", "Signature.5",
"Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R13", class = "data.frame"),
A373R3 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.0795605471131758, Signature.4 = 0.0973130562439999,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.249674548796242,
Signature.9 = 0.0725013504411567, Signature.10 = 0, Signature.11 = 0.064665155855146,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0703546703126821, Signature.U1 = 0.21753544296676,
Signature.U2 = 0.0739201832004727), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R3", class = "data.frame"),
A373R5 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.113996509522102, Signature.4 = 0.114874220936966,
Signature.5 = 0.142056872670519, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.208376707959741, Signature.9 = 0.0744527503782136,
Signature.10 = 0, Signature.11 = 0, Signature.12 = 0,
Signature.13 = 0, Signature.14 = 0, Signature.15 = 0.0771902641012979,
Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0,
Signature.U1 = 0.0673567355607731, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R5", class = "data.frame"),
A373R9 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.116847300193985, Signature.4 = 0,
Signature.5 = 0.21624751052703, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.252498230882402, Signature.9 = 0, Signature.10 = 0,
Signature.11 = 0.119495912880994, Signature.12 = 0, Signature.13 = 0,
Signature.14 = 0, Signature.15 = 0, Signature.16 = 0,
Signature.17 = 0, Signature.18 = 0, Signature.19 = 0,
Signature.20 = 0, Signature.21 = 0, Signature.R1 = 0,
Signature.R2 = 0, Signature.R3 = 0.0725549911220892,
Signature.U1 = 0, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R9", class = "data.frame"),
A512R19 = structure(list(Signature.1A = 0.109490572493859,
Signature.1B = 0, Signature.2 = 0, Signature.3 = 0, Signature.4 = 0.22010156823306,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.150943894106973, Signature.U1 = 0.248556502648564,
Signature.U2 = 0.119306892617062), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A512R19", class = "data.frame")), .Names = c("A373R11",
"A373R13", "A373R3", "A373R5", "A373R9", "A512R19"))
Here, each row contains a sample, and each column contains a score for a particular signature:
> data[1]
$A373R11
Signature.1A Signature.1B Signature.2 Signature.3 Signature.4 Signature.5 Signature.6 Signature.7 Signature.8 Signature.9 Signature.10 Signature.11
A373R11 0 0 0 0.1516317 0.1497999 0 0 0.06349126 0 0.1731892 0 0
Signature.12 Signature.13 Signature.14 Signature.15 Signature.16 Signature.17 Signature.18 Signature.19 Signature.20 Signature.21 Signature.R1 Signature.R2
A373R11 0 0 0 0 0 0 0 0 0 0.09055177 0 0
Signature.R3 Signature.U1 Signature.U2
A373R11 0 0.1555907 0.1459555
I would like to transfer this into a dataframe with the following structure:
sample signature score
A373R11 Signature.1A 0
A373R11 Signature.1B 0
[...]
A373R13 Signature.1A 0
A373R13 Signature.1B 0
[...]
Can anyone point me in the right direction?
Convert multiple columns into a single column, To combine numerous data frame columns into one column, use the union() function from the tidyr package.
Two approaches:
1) with the data.table-package
Using:
library(data.table)
melt(rbindlist(data, idcol = 'sample'),
id = 'sample', variable.name = 'signature', value.name = 'score')
gives:
sample signature score 1: A373R11 Signature.1A 0.00000000 2: A373R13 Signature.1A 0.00000000 3: A373R3 Signature.1A 0.00000000 4: A373R5 Signature.1A 0.00000000 5: A373R9 Signature.1A 0.00000000 --- 158: A373R13 Signature.U2 0.00000000 159: A373R3 Signature.U2 0.07392018 160: A373R5 Signature.U2 0.00000000 161: A373R9 Signature.U2 0.00000000 162: A512R19 Signature.U2 0.11930689
2) with base R
Using:
dat2 <- do.call(rbind, dat)
reshape(dat2, idvar = 'sample', ids = row.names(dat2),
varying = list(1:ncol(dat2)), times = colnames(dat2),
timevar = 'signature', v.names = 'score',
new.row.names = NULL, direction = 'long')
gives:
signature score sample A373R11.Signature.1A Signature.1A 0.00000000 A373R11 A373R13.Signature.1A Signature.1A 0.00000000 A373R13 A373R3.Signature.1A Signature.1A 0.00000000 A373R3 A373R5.Signature.1A Signature.1A 0.00000000 A373R5 A373R9.Signature.1A Signature.1A 0.00000000 A373R9 ..... A373R13.Signature.U2 Signature.U2 0.00000000 A373R13 A373R3.Signature.U2 Signature.U2 0.07392018 A373R3 A373R5.Signature.U2 Signature.U2 0.00000000 A373R5 A373R9.Signature.U2 Signature.U2 0.00000000 A373R9 A512R19.Signature.U2 Signature.U2 0.11930689 A512R19
NOTE:
It is better not to give your data the same name as a function. See ?data
.
A tidyverse
solution, where we first join all the data.frames together, then use gather
to reshape them as desired:
library(dplyr)
library(tidyr)
data %>%
bind_rows(.id = 'sample') %>%
gather(signature, score, -sample)
Gives:
sample signature score 1 A373R11 Signature.1A 0.00000000 2 A373R13 Signature.1A 0.00000000 3 A373R3 Signature.1A 0.00000000 4 A373R5 Signature.1A 0.00000000 5 A373R9 Signature.1A 0.00000000 6 A512R19 Signature.1A 0.10949057 7 A373R11 Signature.1B 0.00000000 8 A373R13 Signature.1B 0.00000000 9 A373R3 Signature.1B 0.00000000 10 A373R5 Signature.1B 0.00000000 ....
Can be written as a one-liner without pipes as:
gather(bind_rows(data, .id = 'sample'), signature, score, -sample)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With