I'm very new to data.table
but would like to solve my problem with it, as I have the feeling it would be 1000 times faster than with "regular" data.frames.
Here is my problem:
What I have:
2 data.tables dt1
and dt2
like so:
dt1 <- data.table(SID=paste0("S", 1:15), Chromo=rep(1:3, e=5), PP=rep(1:5, 3), P1=0, P2=0, P3=0)
set.seed(17)
dt2 <- data.table(PID=rep(paste0("P", 1:3), c(2, 6, 3)), Chr=c(1, 3, 1, 1, 2, 3, 3, 3, 2, 2, 3), start= c(1, 1, 1, 4, 2, 1, 2, 4, 2, 4, 2), end=c(3, 4, 2, 5, 4, 1, 3, 5, 3, 5, 5), val=rnorm(11))
What I want:
Fill dt1
with dt2[, val]
in the right column, based on dt2[, PID]
and the right lines, based on dt1[, Chromo]
=dt2[, Chr]
and dt1[, PP]
in between dt2[, start]
and dt2[, end]
.
What I do right now: (which doesn't make me proud, to say the least...)
# preparing the tables, computing dt1 rows indices
dt2[, numcol:=(1:ncol(dt1))[match(dt2[,PID], colnames(dt1))]]
setkey(dt2, Chr, start, end)
setkey(dt1, Chromo, PP)
ind_start <- dt1[dt2[,.(Chr, start)], which=T]
ind_end <- dt1[dt2[,.(Chr, end)], which=T]
dt2[,c("ind_start", "ind_end"):=list(ind_start, ind_end)]
# and feeling I'm that close but can't conclude with `data.table` so doing this "lame" `for` loop with `data.frames`.......................
df1 <- as.data.frame(dt1)
df2 <- as.data.frame(dt2)
nr_seg <- nrow(df2)
for(i in 1:nr_seg){
df1[df2[i,"ind_start"]:df2[i,"ind_end"], df2[i,"numcol"]] <- df2[i, "val"]
}
The input tables and desired output (except I'd like a data.table
):
dt1
# SID Chromo PP P1 P2 P3
# 1: S1 1 1 0 0 0
# 2: S2 1 2 0 0 0
# 3: S3 1 3 0 0 0
# 4: S4 1 4 0 0 0
# 5: S5 1 5 0 0 0
# 6: S6 2 1 0 0 0
# 7: S7 2 2 0 0 0
# 8: S8 2 3 0 0 0
# 9: S9 2 4 0 0 0
# 10: S10 2 5 0 0 0
# 11: S11 3 1 0 0 0
# 12: S12 3 2 0 0 0
# 13: S13 3 3 0 0 0
# 14: S14 3 4 0 0 0
# 15: S15 3 5 0 0 0
dt2
# PID Chr start end val
# 1: P2 1 1 2 -0.23298702
# 2: P1 1 1 3 -1.01500872
# 3: P2 1 4 5 -0.81726793
# 4: P3 2 2 3 0.25523700
# 5: P2 2 2 4 0.77209084
# 6: P3 2 4 5 0.36658112
# 7: P2 3 1 1 -0.16561194
# 8: P1 3 1 4 -0.07963674
# 9: P2 3 2 3 0.97287443
# 10: P3 3 2 5 1.18078924
# 11: P2 3 4 5 1.71653398
df1
# SID Chromo PP P1 P2 P3
# 1 S1 1 1 -1.01500872 -0.2329870 0.0000000
# 2 S2 1 2 -1.01500872 -0.2329870 0.0000000
# 3 S3 1 3 -1.01500872 0.0000000 0.0000000
# 4 S4 1 4 0.00000000 -0.8172679 0.0000000
# 5 S5 1 5 0.00000000 -0.8172679 0.0000000
# 6 S6 2 1 0.00000000 0.0000000 0.0000000
# 7 S7 2 2 0.00000000 0.7720908 0.2552370
# 8 S8 2 3 0.00000000 0.7720908 0.2552370
# 9 S9 2 4 0.00000000 0.7720908 0.3665811
# 10 S10 2 5 0.00000000 0.0000000 0.3665811
# 11 S11 3 1 -0.07963674 -0.1656119 0.0000000
# 12 S12 3 2 -0.07963674 0.9728744 1.1807892
# 13 S13 3 3 -0.07963674 0.9728744 1.1807892
# 14 S14 3 4 -0.07963674 1.7165340 1.1807892
# 15 S15 3 5 0.00000000 1.7165340 1.1807892
library(data.table)
dt1 <- data.table(SID=paste0("S", 1:15), Chromo=rep(1:3, e=5), PP=rep(1:5, 3), P1=0, P2=0, P3=0)
set.seed(17)
dt2 <- data.table(PID=rep(paste0("P", 1:3), c(2, 6, 3)), Chr=c(1, 3, 1, 1, 2, 3, 3, 3, 2, 2, 3), start= c(1, 1, 1, 4, 2, 1, 2, 4, 2, 4, 2), end=c(3, 4, 2, 5, 4, 1, 3, 5, 3, 5, 5), val=rnorm(11))
dt1[, PP1 := PP]
dt1[, c("P1", "P2", "P3") := NULL]
setkey(dt2, Chr, start, end)
setkey(dt1, Chromo, PP, PP1)
res <- foverlaps(dt1, dt2, type="within")
res[is.na(PID), PID := "P1"] #to ensure that dcast works if there is no match
res <- dcast.data.table(res, SID + Chromo + PP ~ PID, value.var = "val")
setkey(res, Chromo, PP)
# SID Chromo PP P1 P2 P3
# 1: S1 1 1 -1.01500872 -0.2329870 NA
# 2: S2 1 2 -1.01500872 -0.2329870 NA
# 3: S3 1 3 -1.01500872 NA NA
# 4: S4 1 4 NA -0.8172679 NA
# 5: S5 1 5 NA -0.8172679 NA
# 6: S6 2 1 NA NA NA
# 7: S7 2 2 NA 0.7720908 0.2552370
# 8: S8 2 3 NA 0.7720908 0.2552370
# 9: S9 2 4 NA 0.7720908 0.3665811
#10: S10 2 5 NA NA 0.3665811
#11: S11 3 1 -0.07963674 -0.1656119 NA
#12: S12 3 2 -0.07963674 0.9728744 1.1807892
#13: S13 3 3 -0.07963674 0.9728744 1.1807892
#14: S14 3 4 -0.07963674 1.7165340 1.1807892
#15: S15 3 5 NA 1.7165340 1.1807892
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With