Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

A shared legend for z-scores and corresponding p-values in a heatmap

I have a z-scores matrix:

set.seed(1)
z.score.mat <- matrix(rnorm(1000),nrow=100,ncol=10)

which are the result of some biological experimental data, and a corresponding p-value matrix:

p.val.mat <- pnorm(abs(z.score.mat),lower.tail = F)

Both have identical dimnames:

rownames(z.score.mat) <- paste("p",1:100,sep="")
colnames(z.score.mat) <- paste("c",1:10,sep="")
rownames(p.val.mat) <- paste("p",1:100,sep="")
colnames(p.val.mat) <- paste("c",1:10,sep="")

I'm plotting a hierarchically clustered heatmap of the z-scores like this:

hc.col <- hclust(dist(z.score.mat))
dd.col <- as.dendrogram(hc.col)
col.ord <- order.dendrogram(dd.col)
hc.row <- hclust(dist(t(z.score.mat)))
dd.row <- as.dendrogram(hc.row)
row.ord <- order.dendrogram(dd.row)
clustered.mat <- z.score.mat[col.ord,row.ord]
clustered.mat.names <- attr(clustered.mat,"dimnames")
clustered.mat.df <- as.data.frame(clustered.mat)
colnames(clustered.mat.df) <- clustered.mat.names[[2]]
clustered.mat.df[,"process"] <- clustered.mat.names[[1]]
clustered.mat.df[,"process"] <- with(clustered.mat.df,factor(clustered.mat.df[,"process"],levels=clustered.mat.df[,"process"],ordered=TRUE))

require(reshape2)
clustered.mat.df <- reshape2::melt(clustered.mat.df,id.vars="process")
colnames(clustered.mat.df)[2:3] <- c("condition","z.score")
clustered.mat.df$p.value <- sapply(1:nrow(clustered.mat.df),function(x) p.val.mat[which(rownames(p.val.mat) == clustered.mat.df$process[x]),which(colnames(p.val.mat) == clustered.mat.df$condition[x])])
lab.legend <- colnames(clustered.mat.df)[3]
lab.row <- colnames(clustered.mat.df)[1]
lab.col <- colnames(clustered.mat.df)[2]

require(ggplot2)
ggplot(clustered.mat.df,aes(x=condition,y=process))+
  geom_tile(aes(fill=z.score))+
  scale_fill_gradient2(lab.legend,high="darkred",low="darkblue")+
  theme_bw()+
  theme(legend.key=element_blank(),
        legend.position="right",
        panel.border=element_blank(),
        strip.background=element_blank(),
        axis.text.x=element_text(angle=45,vjust=0.5)
  )

enter image description here

My question is if it is possible, and how, to have on one side of the legend bar the z-score range (which is currently on the right hand) and on the other side the corresponding p-value range?

like image 407
dan Avatar asked Dec 30 '16 20:12

dan


2 Answers

This is quite fiddly when the plot dimensions change, but you do get the required result:

br <- seq(-3, 3, 1)
lab <- round(pnorm(abs(br),lower.tail = F), 3)

p <- ggplot(clustered.mat.df,aes(x=condition,y=process))+
  geom_tile(aes(fill=z.score), show.legend = FALSE)+
  scale_fill_gradient2(lab.legend, high="darkred", low="darkblue", breaks = br)

p1 <- ggplot(clustered.mat.df,aes(x=condition,y=process))+
  geom_tile(aes(fill=z.score))+
  scale_fill_gradient2(lab.legend, high="darkred", low="darkblue", breaks = br) +
  guides(fill = guide_colorbar(title = '', label.position = 'right', barheight = 10))

p2 <- ggplot(clustered.mat.df,aes(x=condition,y=process))+
  geom_tile(aes(fill=z.score))+
  scale_fill_gradient2(lab.legend, high="darkred", low="darkblue", breaks = br, labels = lab) +
  guides(fill = guide_colorbar('', label.position = 'left', barheight = 10))

library(cowplot)
l1 <- get_legend(p1)
l2 <- get_legend(p2)

ggdraw() + 
  draw_plot(p, width = 0.85) +
  draw_grob(l1, 0.89, 0, 0.1, 1) +
  draw_grob(l2, 0.85, 0, 0.1, 1) +
  draw_label('p         z', 0.88, 0.675, hjust = 0)

enter image description here

like image 154
Axeman Avatar answered Oct 04 '22 02:10

Axeman


This approach uses gtable and grid functions. It takes the legend from your plot, edits the legend so that the p values appear on the left side, then puts the edited legend back into the plot.

# Your data
set.seed(1)
z.score.mat <- matrix(rnorm(1000),nrow=100,ncol=10)
# which are the result of some biological experimental data, and a corresponding p-value matrix:

p.val.mat <- pnorm(abs(z.score.mat),lower.tail = F)

rownames(z.score.mat) <- paste("p",1:100,sep="")
colnames(z.score.mat) <- paste("c",1:10,sep="")
rownames(p.val.mat) <- paste("p",1:100,sep="")
colnames(p.val.mat) <- paste("c",1:10,sep="")

hc.col <- hclust(dist(z.score.mat))
dd.col <- as.dendrogram(hc.col)
col.ord <- order.dendrogram(dd.col)
hc.row <- hclust(dist(t(z.score.mat)))
dd.row <- as.dendrogram(hc.row)
row.ord <- order.dendrogram(dd.row)
clustered.mat <- z.score.mat[col.ord,row.ord]
clustered.mat.names <- attr(clustered.mat,"dimnames")
clustered.mat.df <- as.data.frame(clustered.mat)
colnames(clustered.mat.df) <- clustered.mat.names[[2]]
clustered.mat.df[,"process"] <- clustered.mat.names[[1]]
clustered.mat.df[,"process"] <- with(clustered.mat.df,factor(clustered.mat.df[,"process"],levels=clustered.mat.df[,"process"],ordered=TRUE))

require(reshape2)
clustered.mat.df <- reshape2::melt(clustered.mat.df,id.vars="process")
colnames(clustered.mat.df)[2:3] <- c("condition","z.score")
clustered.mat.df$p.value <- sapply(1:nrow(clustered.mat.df),function(x) p.val.mat[which(rownames(p.val.mat) == clustered.mat.df$process[x]),which(colnames(p.val.mat) == clustered.mat.df$condition[x])])
lab.legend <- colnames(clustered.mat.df)[3]
lab.row <- colnames(clustered.mat.df)[1]
lab.col <- colnames(clustered.mat.df)[2]

# Your plot
require(ggplot2)
p = ggplot(clustered.mat.df,aes(x=condition,y=process))+
  geom_tile(aes(fill=z.score))+
  scale_fill_gradient2(lab.legend,high="darkred",low="darkblue") +
  theme_bw()+
  theme(legend.key=element_blank(),
        legend.position="right",
        panel.border=element_blank(),
        strip.background=element_blank(),
        axis.text.x=element_text(angle=45,vjust=0.5))


library(gtable)
library(grid)
# Get the ggplot grob
g = ggplotGrob(p)

# Get the legend
index = which(g$layout$name == "guide-box")
leg = g$grobs[[index]] 

# Get the legend labels 
# and calculate corresponding p values
z.breaks = as.numeric(leg$grobs[[1]]$grobs[[3]]$label)
p.breaks = as.character(round(pnorm(abs(z.breaks), lower.tail = F), 3))

# Get the width of the longest p.break string, taking account of font and font size
w = lapply(na.omit(p.breaks), function(x) grobWidth(textGrob(x, 
             gp = gpar(fontsize = leg$grobs[[1]]$grobs[[3]]$gp$fontsize,
                       fontfamily = leg$grobs[[1]]$grobs[[3]]$gp$fontfamily))))
w = do.call(unit.pmax, w)
w = convertX(w, "mm")

# Add columns to the legend gtable to take p.breaks, 
# setting the width of relevant column to w
leg$grobs[[1]] = gtable_add_cols(leg$grobs[[1]], leg$grobs[[1]]$widths[3], 1)
leg$grobs[[1]] = gtable_add_cols(leg$grobs[[1]], w, 1)

# Construct grob containing p.breaks
# Begin with the z.score grob, then make relevant changes
p.values = leg$grobs[[1]]$grobs[[3]]
p.values[c("label", "x", "hjust")] = list(p.breaks, unit(1, "npc"), 1)

# Put the p.values grob into the legend gtable
leg$grobs[[1]] = gtable_add_grob(leg$grobs[[1]], p.values, t=4, l=2, 
                        name = "p.values", clip = "off")

# Put 'p' and 'z' labels into the legend gtable
leg$grobs[[1]] = gtable_add_grob(leg$grobs[[1]], list(textGrob("p"), textGrob("z")), 
                        t=2, l=c(2,6), clip = "off")

# Drop the current legend title
leg$grobs[[1]]$grobs[[4]] = nullGrob()

# Put the legend back into the plot,
# and make sure the relevant column is wide enough to take the new legend
g$grobs[[index]] = leg
g$widths[8] = g$widths[8] + sum(leg$grobs[[1]]$widths[2:3])

# Draw the plot
grid.newpage()
grid.draw(g)

enter image description here

like image 37
Sandy Muspratt Avatar answered Oct 04 '22 02:10

Sandy Muspratt