The merge data frame inbuilt function contains an option to change the suffix applied when the two data frames you merge have a shared column name:
## S3 method for class 'data.frame'
merge(x, y, by = intersect(names(x), names(y)),
by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all,
sort = TRUE, suffixes = c(".x",".y"),
incomparables = NULL, ...)
Is there an option to instead have a prefix (e.g. = c("x.","y.")
) applied?
Since merge
doesn't have this option (as of 22-Feb 2016), we can just let merge
do its thing and then clean up ex-post by running:
names(df_new) <- gsub("(.*).([xy])$", "\\2.\\1", names(df_new))
I always hate writing these sort-of distracting lines of code in my scripts, but such is life.
Make a custom merge function.
#dummy data
df1 <- head(mtcars[,1:4])
df2 <- tail(mtcars[,1:6])
#base merge
merge(df1,df2, by="cyl")
# cyl mpg.x disp.x hp.x mpg.y disp.y hp.y drat wt
# 1 4 22.8 108 93 26.0 120.3 91 4.43 2.140
# 2 4 22.8 108 93 30.4 95.1 113 3.77 1.513
# 3 4 22.8 108 93 21.4 121.0 109 4.11 2.780
# 4 6 21.0 160 110 19.7 145.0 175 3.62 2.770
# 5 6 21.0 160 110 19.7 145.0 175 3.62 2.770
# 6 6 21.4 258 110 19.7 145.0 175 3.62 2.770
# 7 6 18.1 225 105 19.7 145.0 175 3.62 2.770
# 8 8 18.7 360 175 15.8 351.0 264 4.22 3.170
# 9 8 18.7 360 175 15.0 301.0 335 3.54 3.570
#custom merge
myMerge(df1,df2, by="cyl")
# cyl x.mpg x.disp x.hp y.mpg y.disp y.hp drat wt
# 1 4 22.8 108 93 26.0 120.3 91 4.43 2.140
# 2 4 22.8 108 93 30.4 95.1 113 3.77 1.513
# 3 4 22.8 108 93 21.4 121.0 109 4.11 2.780
# 4 6 21.0 160 110 19.7 145.0 175 3.62 2.770
# 5 6 21.0 160 110 19.7 145.0 175 3.62 2.770
# 6 6 21.4 258 110 19.7 145.0 175 3.62 2.770
# 7 6 18.1 225 105 19.7 145.0 175 3.62 2.770
# 8 8 18.7 360 175 15.8 351.0 264 4.22 3.170
# 9 8 18.7 360 175 15.0 301.0 335 3.54 3.570
Custom myMerge function:
#custom myMerge function - modified from "base::merge.data.frame"
myMerge <- function (x, y, by = intersect(names(x), names(y)), by.x = by,
by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
prefix = c("x.", "y."), incomparables = NULL, ...)
{
fix.by <- function(by, df) {
if (is.null(by))
by <- numeric()
by <- as.vector(by)
nc <- ncol(df)
if (is.character(by)) {
poss <- c("row.names", names(df))
if (any(bad <- !charmatch(by, poss, 0L)))
stop(ngettext(sum(bad), "'by' must specify a uniquely valid column",
"'by' must specify uniquely valid columns"),
domain = NA)
by <- match(by, poss) - 1L
}
else if (is.numeric(by)) {
if (any(by < 0L) || any(by > nc))
stop("'by' must match numbers of columns")
}
else if (is.logical(by)) {
if (length(by) != nc)
stop("'by' must match number of columns")
by <- seq_along(by)[by]
}
else stop("'by' must specify one or more columns as numbers, names or logical")
if (any(bad <- is.na(by)))
stop(ngettext(sum(bad), "'by' must specify a uniquely valid column",
"'by' must specify uniquely valid columns"),
domain = NA)
unique(by)
}
nx <- nrow(x <- as.data.frame(x))
ny <- nrow(y <- as.data.frame(y))
if (nx >= 2^31 || ny >= 2^31)
stop("long vectors are not supported")
by.x <- fix.by(by.x, x)
by.y <- fix.by(by.y, y)
if ((l.b <- length(by.x)) != length(by.y))
stop("'by.x' and 'by.y' specify different numbers of columns")
if (l.b == 0L) {
nm <- nm.x <- names(x)
nm.y <- names(y)
has.common.nms <- any(cnm <- nm.x %in% nm.y)
if (has.common.nms) {
names(x)[cnm] <- paste0(prefix[1L], nm.x[cnm])
cnm <- nm.y %in% nm
names(y)[cnm] <- paste0(prefix[1L], nm.y[cnm])
}
if (nx == 0L || ny == 0L) {
res <- cbind(x[FALSE, ], y[FALSE, ])
}
else {
ij <- expand.grid(seq_len(nx), seq_len(ny))
res <- cbind(x[ij[, 1L], , drop = FALSE], y[ij[,
2L], , drop = FALSE])
}
}
else {
if (any(by.x == 0L)) {
x <- cbind(Row.names = I(row.names(x)), x)
by.x <- by.x + 1L
}
if (any(by.y == 0L)) {
y <- cbind(Row.names = I(row.names(y)), y)
by.y <- by.y + 1L
}
row.names(x) <- NULL
row.names(y) <- NULL
if (l.b == 1L) {
bx <- x[, by.x]
if (is.factor(bx))
bx <- as.character(bx)
by <- y[, by.y]
if (is.factor(by))
by <- as.character(by)
}
else {
if (!is.null(incomparables))
stop("'incomparables' is supported only for merging on a single column")
bx <- x[, by.x, drop = FALSE]
by <- y[, by.y, drop = FALSE]
names(bx) <- names(by) <- paste0("V", seq_len(ncol(bx)))
bz <- do.call("paste", c(rbind(bx, by), sep = "\r"))
bx <- bz[seq_len(nx)]
by <- bz[nx + seq_len(ny)]
}
comm <- match(bx, by, 0L)
bxy <- bx[comm > 0L]
xinds <- match(bx, bxy, 0L, incomparables)
yinds <- match(by, bxy, 0L, incomparables)
if (nx > 0L && ny > 0L)
m <- .Internal(merge(xinds, yinds, all.x, all.y))
else m <- list(xi = integer(), yi = integer(), x.alone = seq_len(nx),
y.alone = seq_len(ny))
nm <- nm.x <- names(x)[-by.x]
nm.by <- names(x)[by.x]
nm.y <- names(y)[-by.y]
ncx <- ncol(x)
if (all.x)
all.x <- (nxx <- length(m$x.alone)) > 0L
if (all.y)
all.y <- (nyy <- length(m$y.alone)) > 0L
lxy <- length(m$xi)
has.common.nms <- any(cnm <- nm.x %in% nm.y)
if (has.common.nms && nzchar(prefix[1L]))
nm.x[cnm] <- paste0(prefix[1L], nm.x[cnm])
x <- x[c(m$xi, if (all.x) m$x.alone), c(by.x, seq_len(ncx)[-by.x]),
drop = FALSE]
names(x) <- c(nm.by, nm.x)
if (all.y) {
ya <- y[m$y.alone, by.y, drop = FALSE]
names(ya) <- nm.by
xa <- x[rep.int(NA_integer_, nyy), nm.x, drop = FALSE]
names(xa) <- nm.x
x <- rbind(x, cbind(ya, xa))
}
if (has.common.nms && nzchar(prefix[2L])) {
cnm <- nm.y %in% nm
nm.y[cnm] <- paste0(prefix[2L], nm.y[cnm])
}
y <- y[c(m$yi, if (all.x) rep.int(1L, nxx), if (all.y) m$y.alone),
-by.y, drop = FALSE]
if (all.x) {
zap <- (lxy + 1L):(lxy + nxx)
for (i in seq_along(y)) {
if (is.matrix(y[[1]]))
y[[1]][zap, ] <- NA
else is.na(y[[i]]) <- zap
}
}
if (has.common.nms)
names(y) <- nm.y
nm <- c(names(x), names(y))
if (any(d <- duplicated(nm)))
if (sum(d) > 1L)
warning("column names ", paste(sQuote(nm[d]),
collapse = ", "), " are duplicated in the result",
domain = NA)
else warning("column name ", sQuote(nm[d]), " is duplicated in the result",
domain = NA)
res <- cbind(x, y)
if (sort)
res <- res[if (all.x || all.y)
do.call("order", x[, seq_len(l.b), drop = FALSE])
else sort.list(bx[m$xi]), , drop = FALSE]
}
attr(res, "row.names") <- .set_row_names(nrow(res))
res
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With