This question is related to How to change boxplot settings when stat_summary is used, where I managed to build nice unicolor boxplots.
However, due to the "unicolor", the median segment's colour cannot be distinguished from the rest of the box. I've managed to add a black point for the median, but I prefer to add a segment instead. Here is the code:
# Data
xdf2 <- data.frame(month = rep(1:6, each = 100),
grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n = nrow(xdf2),10)
# Definition of whiskers
f <- function(x) {
r <- quantile(x, probs = c(0.10, 0.25, 0.5, 0.75, 0.90))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# Add points outside of whiskers
o <- function(x) {
subset(x, x < quantile(x, probs=0.1) | quantile(x, probs=0.9) < x)
}
# Plot
ggplot(data = xdf2, aes(factor(month), m, colour = grp, fill = grp)) +
stat_summary(fun.data = f, geom="boxplot",
position = position_dodge(width=1), size = 2) +
stat_summary(fun.y = o, geom="point",
position = position_dodge(width = 1)) +
scale_color_manual(values = c("indianred","orange"), labels = c("AAA", "BBB")) +
scale_fill_manual(values = c("indianred", "orange"), labels = c("AAA", "BBB")) +
theme_bw() +
stat_summary(fun.y = "median", geom = "point",
position = position_dodge(width = 1), col = "black", size = 4)
And here is the graph:
I would like to add a segment by building a function which compute the parameters for geom="segment"
:
# Add function to compute segment parameters
s <- function(x,y,z) {
x2 <- x - z
y2 <- median(y)
x2end <- x + z
y2end <- median(y)
# assemble the named output
out <- c(x = x2, y = y2, xend = x2end, yend = y2end)
names(out) <- c("x","y","xend","yend")
out
}
and replace the geom="point" part with:
stat_summary(fun.y = s(month, m, 0.3), geom = "segment",
position = position_dodge(width = 1), col="black")
What I get is:
Error in s(month, m, 0.3) (from #2) : object 'month' not found
If I could better understand the logic of stat_summary
, I could solve this problem. But I find it's not easy. If somebody could help me to solve this problem with stat_summary
and geom = "segment"
, I would be very glad and maybe I will understand better the logic behind.
I would also appreciate alternative solutions for adding a horizontal line to mark the median.
There are so many pieces to a boxplot that it's probably worth the effort to change the underlying ggproto object, rather than recreate outliers / whiskers / boxes / median segments piece by piece, & hope they still stack well together.
Here's the result:
# Data
set.seed(123)
xdf2 <- data.frame(month = rep(1:6,each=100), grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n=nrow(xdf2),10)
p <- ggplot(data = xdf2,
aes(factor(month), m, colour = grp, fill = grp)) +
scale_color_manual(values = c("A" = "indianred", "B" = "orange"),
labels = c("A" = "AAA", "B" = "BBB"),
aesthetics = c("color", "fill")) +
theme_bw() +
theme(legend.position = "bottom")
p +
geom_boxplot2(position = position_dodge(width = 1), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black")
Here are some more variations with different aesthetic options:
library(dplyr)
cowplot::plot_grid(
p +
labs(subtitle = paste("quantiles = c(0.05, 0.3, 0.5, 0.7, 0.95)",
"median segment color = brown",
sep = "\n")) +
geom_boxplot2(position = position_dodge(width = 0.8), size = 2,
qs = c(0.05, 0.3, 0.5, 0.7, 0.95),
median.colour = "brown"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge2, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge2(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
nrow = 1
)
Code:
# define stat_boxplot2() based on stat_boxplot, but with boxplot quantiles (qs)
# added as a parameter (default values are same as original function), &
# stat = StatBoxplot2 instead of StatBoxplot
stat_boxplot2 <- function (
mapping = NULL, data = NULL, geom = "boxplot", position = "dodge2",
..., coef = 1.5, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
layer(data = data, mapping = mapping, stat = StatBoxplot2,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
coef = coef,
qs = qs, ...))
}
# define StatBoxplot2 based on StatBoxplot, with compute_group function tweaked
# to use qs passed from stat_boxplot2(), & outlier definition simplified to
# include all data points beyond the range of qs values
StatBoxplot2 <- ggproto(
"StatBoxplot2", StatBoxplot,
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
if (!is.null(data$weight)) {
mod <- quantreg::rq(y ~ 1, weights = weight, data = data,
tau = qs)
stats <- as.numeric(stats::coef(mod))
}
else {
stats <- as.numeric(stats::quantile(data$y, qs))
}
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
outliers <- data$y < stats[1] | data$y > stats[5] # change outlier definition
if (length(unique(data$x)) > 1)
width <- diff(range(data$x)) * 0.9
df <- as.data.frame(as.list(stats))
df$outliers <- list(data$y[outliers])
if (is.null(data$weight)) {
n <- sum(!is.na(data$y))
}
else {
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
}
df$notchupper <- df$middle + 1.58 * iqr/sqrt(n)
df$notchlower <- df$middle - 1.58 * iqr/sqrt(n)
df$x <- if (is.factor(data$x))
data$x[1]
else mean(range(data$x))
df$width <- width
df$relvarwidth <- sqrt(n)
df
}
)
# define geom_boxplot2() based on geom_boxplot, using stat = "boxplot2" by
# default instead of "boxplot", with a new parameter median.colour, &
# geom = GeomBoxplot2 instead of GeomBoxplot
geom_boxplot2 <- function(mapping = NULL, data = NULL, stat = "boxplot2", position = "dodge2",
..., outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL,
outlier.shape = 19, outlier.size = 1.5, outlier.stroke = 0.5,
outlier.alpha = NULL, notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
median.colour = NULL, median.color = NULL) {
if (is.character(position)) {
if (varwidth == TRUE)
position <- position_dodge2(preserve = "single")
}
else {
if (identical(position$preserve, "total") & varwidth ==
TRUE) {
warning("Can't preserve total widths when varwidth = TRUE.",
call. = FALSE)
position$preserve <- "single"
}
}
layer(data = data, mapping = mapping, stat = stat, geom = GeomBoxplot2,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(outlier.colour = outlier.color %||% outlier.colour,
outlier.fill = outlier.fill, outlier.shape = outlier.shape,
outlier.size = outlier.size, outlier.stroke = outlier.stroke,
outlier.alpha = outlier.alpha, notch = notch, notchwidth = notchwidth,
varwidth = varwidth, na.rm = na.rm,
median.colour = median.color %||% median.colour, ...))
}
# define GeomBoxplot2 based on GeomBoxplot, with draw_group function & draw_key
# functions tweaked to use median.colour for the median segment's colour, if available
GeomBoxplot2 <- ggproto(
"GeomBoxplot2",
GeomBoxplot,
draw_group = function (data, panel_params, coord, fatten = 2, outlier.colour = NULL,
outlier.fill = NULL, outlier.shape = 19, outlier.size = 1.5,
outlier.stroke = 0.5, outlier.alpha = NULL, notch = FALSE,
notchwidth = 0.5, varwidth = FALSE, median.colour = NULL) {
common <- data.frame(colour = data$colour, size = data$size,
linetype = data$linetype, fill = alpha(data$fill, data$alpha),
group = data$group, stringsAsFactors = FALSE)
whiskers <- data.frame(x = data$x, xend = data$x,
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = NA,
common, stringsAsFactors = FALSE)
box <- data.frame(xmin = data$xmin, xmax = data$xmax, ymin = data$lower,
y = data$middle, ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch,
data$notchupper, NA),
notchwidth = notchwidth, alpha = data$alpha,
common, stringsAsFactors = FALSE)
if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
outliers <- data.frame(y = data$outliers[[1]], x = data$x[1],
colour = outlier.colour %||% data$colour[1], fill = outlier.fill %||%
data$fill[1], shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1], stroke = outlier.stroke %||%
data$stroke[1], fill = NA, alpha = outlier.alpha %||%
data$alpha[1], stringsAsFactors = FALSE)
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params,
coord)
}
else {
outliers_grob <- NULL
}
if(is.null(median.colour)){
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord)))
} else {
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord),
GeomSegment$draw_panel(transform(box, x = xmin, xend = xmax, yend = y,
size = size, alpha = NA,
colour = median.colour),
panel_params,
coord)))
}
},
draw_key = function (data, params, size) {
if(is.null(params$median.colour)){
draw_key_boxplot(data, params, size)
} else {
grobTree(linesGrob(0.5, c(0.1, 0.25)),
linesGrob(0.5, c(0.75, 0.9)),
rectGrob(height = 0.5, width = 0.75),
linesGrob(c(0.125, 0.875), 0.5,
gp = gpar(col = params$median.colour)),
gp = gpar(col = data$colour,
fill = alpha(data$fill, data$alpha),
lwd = data$size * .pt,
lty = data$linetype))
}
}
)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With