I have the following dataset:
df <- data.frame(dens = rnorm(5000),
split = as.factor(sample(1:2, 5000, replace = T)),
method = as.factor(sample(c("A","B"), 5000, replace = T)),
counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))
I have the following split violin plots for splits 1 and 2 within groups A and B for each count. We have four groups for each setting but there is a nested aspect to it:
library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
## By @YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
df$key <- factor(paste(df$split, df$method))
levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")
And I get the following:
Which is great, except that I would like to only have labels of counts 1, 10, 100, 1000, 10000 on the x-axis and in between the blue and the green violin plots. So label 1 in between the first blue and the green violin plots, 10 in between the second blue and the green violin plots, 100 in between the second blue and the green violin plots and so on.
Thanks for any suggestions on how to do this.
I usually solve these issues with facets, then format the strips as though they are axis labels. This also naturally puts the pairs closer together, without any hacks, and you can change the distance by changing theme(panel.spacing = .....)
, if needed. E.g.:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_light() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text = element_text(color = 'black'))
Or a different theme with less obvious facets:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())
Instead of changing the break point for a discrete scale, you can try adding a text layer to the plot itself, which is able to accept non-integer values for discrete scale positions:
ggplot(df,
aes(x = x, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
# annotate layer with non-integer positions
annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
label = c("1", "10", "100", "1000", "10000")) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) +
scale_x_discrete(name = "Counts", drop = FALSE) +
theme_minimal() +
# hide the actual discrete labels / ticks
theme(legend.position="bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank())
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With