I am trying to get something like this but unfortunately, I could not find any package that could enable me to plot stacked bar plot with dendrogram like the one shown below:
Does anyone know how to do it?
A first stab at an answer - but it would require more work to make it really work. Specifically the alignment of the location of elements (as well as their order) needs to be thought of more carefully.
# library
library(ggplot2)
# create a dataset
specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)
dend <- as.dendrogram(hclust(dist(with(data, tapply(value, specie, mean)))))
data$specie <- factor(data$specie, levels = labels(dend))
# Stacked Percent
library(dendextend)
p1 <- ggplot(dend, horiz = T)
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill") + coord_flip()
library(cowplot)
plot_grid(p1, p2, align = "h")
Almost three years later there is still no package capable of combining stacked bar plots with hierarchical clustering in ggplot (at least that I'm aware of). Here my solution based on that post joining a dendrogram and a heatmap:
library(tidyverse)
library(phangorn)
library(vegan)
library(ggdendro)
library(dendextend)
library(ggsci)
library(cowplot)
## generate example data ####
set.seed(500)
combined_matrix <- data.frame(a=runif(14, 0, 33), b=runif(14, 0, 33), c=runif(14, 0, 33))
combined_matrix$d <- 100 - combined_matrix$a - combined_matrix$b - combined_matrix$c
row.names(combined_matrix) <- paste0("s", seq(1,14))
# vegan::vegdist() to calculate Bray-Curtis distance matrix
dm <- vegdist(combined_matrix, method = "bray")
# calculate UPGMA tree with phangorn::upgma() and convert to dendrogram
dendUPGMA <- as.dendrogram(upgma(dm))
plot_dendro_bars_v <- function(df, dend, taxonomy) {
#convert dendrogram to segment data
dend_data <- dendro_data(dend, type="rectangle")
segment_data <- dend_data[["segments"]]
#sample positions df
sample_pos_table <- with(dend_data$labels,
data.frame(x_center = x, sample = as.character(label), width = 0.9))
#prepare input data
ptdf <- rownames_to_column(df, var = "sample") %>%
pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
group_by(sample) %>%
mutate(Frequency = Frequency/100,
ymax = cumsum(Frequency/sum(Frequency)),
ymin = ymax - Frequency/sum(Frequency),
y_center = ymax-(Frequency/2)) %>%
left_join(sample_pos_table) %>%
mutate(xmin = x_center-width/2,
xmax = x_center+width/2)
#plot stacked bars
axis_limits <- with(sample_pos_table,
c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) +
0.1 * c(-1, 1) # extra spacing: 0.1
plt_hbars <- ggplot(ptdf,
aes_string(x = "x_center", y = "y_center", fill = taxonomy, xmin = "xmin", xmax = "xmax",
height = "Frequency", width = "width")) +
geom_tile() +
geom_rect(ymin = 0, ymax = 1, color = "black", fill = "transparent") +
scale_fill_rickandmorty() +
scale_y_continuous(expand = c(0, 0)) +
# For the y axis, alternatively set the labels as: gene_position_table$gene
scale_x_continuous(breaks = sample_pos_table[, "x_center"],
labels = sample_pos_table$sample,
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "", y = "Frequency") +
theme_bw() +
theme(# margin: top, right, bottom, and left
plot.margin = unit(c(-0.9, 0.2, 1, 0.2), "cm"),
panel.grid.minor = element_blank())
#plot dendrogram
plt_dendr <- ggplot(segment_data) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
scale_y_continuous(expand = c(0, 0.05)) +
scale_x_continuous(breaks = sample_pos_table$x_center,
labels = rep("", nrow(sample_pos_table)),
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "", y = "Distance", colour = "", size = "") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
#combine plots
comb <- plot_grid(plt_dendr, plt_hbars, align = 'v', ncol = 1, axis = "lr", rel_heights = c(0.3, 1))
comb
}
plot_dendro_bars_v(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")
vertical
or horizontal
plot_dendro_bars_h <- function(df, dend, taxonomy) {
#convert dendrogram to segemnt data
dend_data <- dendro_data(dend, type="rectangle")
segment_data <- with(segment(dend_data),
data.frame(x = y, y = x, xend = yend, yend = xend))
#sample positions df
sample_pos_table <- with(dend_data$labels,
data.frame(y_center = x, sample = as.character(label), height = 0.9))
#prepare input data
ptdf <- rownames_to_column(df, var = "sample") %>%
pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
group_by(sample) %>%
mutate(Frequency = Frequency/100,
xmax = cumsum(Frequency/sum(Frequency)),
xmin = xmax - Frequency/sum(Frequency),
x_center = xmax-(Frequency/2)) %>%
left_join(sample_pos_table) %>%
mutate(ymin = y_center-height/2,
ymax = y_center+height/2)
#plot stacked bars
axis_limits <- with(sample_pos_table,
c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) +
0.1 * c(-1, 1) # extra spacing: 0.1
plt_hbars <- ggplot(ptdf,
aes_string(x = "x_center", y = "y_center", fill = taxonomy, ymin = "ymin", ymax = "ymax",
height = "height", width = "Frequency")) +
geom_tile() +
geom_rect(xmin = 0, xmax = 1, color = "black", fill = "transparent") +
scale_fill_rickandmorty() +
scale_x_continuous(expand = c(0, 0)) +
# For the y axis, alternatively set the labels as: gene_position_table$gene
scale_y_continuous(breaks = sample_pos_table[, "y_center"],
labels = rep("", nrow(sample_pos_table)),
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "Frequency", y = "") +
theme_bw() +
theme(# margin: top, right, bottom, and left
plot.margin = unit(c(1, 0.2, 0.2, -0.9), "cm"),
panel.grid.minor = element_blank())
#plot dendrogram
plt_dendr <- ggplot(segment_data) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
scale_x_reverse(expand = c(0, 0.05)) +
scale_y_continuous(breaks = sample_pos_table$y_center,
labels = sample_pos_table$sample,
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "Distance", y = "", colour = "", size = "") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
#combine plots
comb <- plot_grid(plt_dendr, plt_hbars, align = 'h', rel_widths = c(0.3, 1))
return(comb)
}
plot_dendro_bars_h(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")
horizontal
The data can be combined with any tree which can be coerced to a dendrogram (e.g. also UniFrac trees). Have fun with that, Roman.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With