Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Stacked bar plot with hierarchical clustering (dendrogram)

I am trying to get something like this but unfortunately, I could not find any package that could enable me to plot stacked bar plot with dendrogram like the one shown below:

dendrogram

Does anyone know how to do it?

like image 279
Lu Pan Avatar asked Jun 20 '17 07:06

Lu Pan


2 Answers

A first stab at an answer - but it would require more work to make it really work. Specifically the alignment of the location of elements (as well as their order) needs to be thought of more carefully.

# library
library(ggplot2)

# create a dataset
specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)


dend <- as.dendrogram(hclust(dist(with(data, tapply(value, specie, mean)))))

data$specie <- factor(data$specie, levels = labels(dend))

# Stacked Percent
library(dendextend)
p1 <- ggplot(dend, horiz = T) 
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) + 
    geom_bar( stat="identity", position="fill") + coord_flip()

library(cowplot)
plot_grid(p1, p2, align = "h")

enter image description here

like image 83
Tal Galili Avatar answered Sep 29 '22 16:09

Tal Galili


Almost three years later there is still no package capable of combining stacked bar plots with hierarchical clustering in ggplot (at least that I'm aware of). Here my solution based on that post joining a dendrogram and a heatmap:

library(tidyverse)
library(phangorn)
library(vegan)
library(ggdendro)
library(dendextend)
library(ggsci)
library(cowplot)

## generate example data ####
set.seed(500)
combined_matrix <- data.frame(a=runif(14, 0, 33), b=runif(14, 0, 33), c=runif(14, 0, 33))
combined_matrix$d <- 100 - combined_matrix$a - combined_matrix$b - combined_matrix$c
row.names(combined_matrix) <- paste0("s", seq(1,14))

# vegan::vegdist() to calculate Bray-Curtis distance matrix
dm <- vegdist(combined_matrix, method = "bray")
# calculate UPGMA tree with phangorn::upgma() and convert to dendrogram
dendUPGMA <- as.dendrogram(upgma(dm))
plot_dendro_bars_v <- function(df, dend, taxonomy) {
  #convert dendrogram to segment data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- dend_data[["segments"]]
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(x_center = x, sample = as.character(label), width = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           ymax = cumsum(Frequency/sum(Frequency)),
           ymin = ymax - Frequency/sum(Frequency),
           y_center = ymax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(xmin = x_center-width/2,
           xmax = x_center+width/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, xmin = "xmin", xmax = "xmax",
                                 height = "Frequency", width = "width")) + 
    geom_tile() +
    geom_rect(ymin = 0, ymax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_y_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_x_continuous(breaks = sample_pos_table[, "x_center"], 
                       labels = sample_pos_table$sample,
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Frequency") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(-0.9, 0.2, 1, 0.2), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_y_continuous(expand = c(0, 0.05)) + 
    scale_x_continuous(breaks = sample_pos_table$x_center, 
                       labels = rep("", nrow(sample_pos_table)), 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Distance", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'v', ncol = 1, axis = "lr", rel_heights = c(0.3, 1))
  comb
}
plot_dendro_bars_v(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")

vertical

or horizontal

  plot_dendro_bars_h <- function(df, dend, taxonomy) {
  #convert dendrogram to segemnt data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- with(segment(dend_data), 
                       data.frame(x = y, y = x, xend = yend, yend = xend))
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(y_center = x, sample = as.character(label), height = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           xmax = cumsum(Frequency/sum(Frequency)),
           xmin = xmax - Frequency/sum(Frequency),
           x_center = xmax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(ymin = y_center-height/2,
           ymax = y_center+height/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, ymin = "ymin", ymax = "ymax",
                                 height = "height", width = "Frequency")) + 
    geom_tile() +
    geom_rect(xmin = 0, xmax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_x_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_y_continuous(breaks = sample_pos_table[, "y_center"], 
                       labels = rep("", nrow(sample_pos_table)),
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Frequency", y = "") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(1, 0.2, 0.2, -0.9), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_x_reverse(expand = c(0, 0.05)) + 
    scale_y_continuous(breaks = sample_pos_table$y_center, 
                       labels = sample_pos_table$sample, 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Distance", y = "", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'h', rel_widths = c(0.3, 1))
  return(comb)
}
plot_dendro_bars_h(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")

horizontal

The data can be combined with any tree which can be coerced to a dendrogram (e.g. also UniFrac trees). Have fun with that, Roman.

like image 38
Roman_G Avatar answered Sep 29 '22 15:09

Roman_G