Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

order and fill with 2 different variables geom_bar ggplot2 R

I have a question concerning the fill field in geom_bar of the ggplot2 package.

I would like to fill my geom_bar with a variable (in the next example the variable is called var_fill) but order the geom_plot with another variable (called clarity in the example).

How can I do that?

Thank you very much!

The example:

rm(list=ls())

set.seed(1)

library(dplyr)
data_ex <- diamonds %>% 
  group_by(cut, clarity) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  mutate(var_fill= LETTERS[sample.int(3, 40, replace = TRUE)])

head(data_ex)

# A tibble: 6 x 4
   cut  clarity count var_fill
  <ord>   <ord> <int>    <chr>
1  Fair      I1   210        A
2  Fair     SI2   466        B
3  Fair     SI1   408        B
4  Fair     VS2   261        C
5  Fair     VS1   170        A
6  Fair    VVS2    69        C

I would like this order of the boxes [clarity] :

library(ggplot2)
ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, fill=clarity),stat = "identity", position = "fill", color="black")

enter image description here

with this fill (color) of the boxes [var_fill] :

ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, fill=var_fill),stat = "identity", position = "fill", color="black")

enter image description here

EDIT1 : answer found by missuse :

p1 <- ggplot(data_ex) + geom_bar(aes(x = cut, y = count, group = clarity, fill = var_fill), stat = "identity", position = "fill", color="black")+ ggtitle("var fill")

p2 <- ggplot(data_ex) +  geom_bar(aes(x = cut, y = count, fill = clarity), stat = "identity", position = "fill", color = "black")+ ggtitle("clarity")

library(cowplot)
cowplot::plot_grid(p1, p2)

enter image description here

EDIT2 : Now i tried to do this with ggmosaic extension with the help of missuse

rm(list=ls())
set.seed(1)
library(ggplot2)
library(dplyr)
library(ggmosaic)

data_ex <- diamonds %>% 
  group_by(cut, clarity) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  mutate(residu= runif(nrow(.), min=-4.5, max=5)) %>%
  mutate(residu_classe = case_when(residu < -4~"< -4 (p<0.001)",(residu >= -4 & residu < -2)~"[-4;-2[ (p<0.05)",(residu >= -2 & residu < 2)~"[-2;2[ non significatif",(residu >= 2 & residu < 4)~"[2;4[ (p<0.05)",residu >= 4~">= 4 (p<0.001)")) %>%
  mutate(residu_color = case_when(residu < -4~"#D04864",(residu >= -4 & residu < -2)~"#E495A5",(residu >= -2 & residu < 2)~"#CCCCCC",(residu >= 2 & residu < 4)~"#9DA8E2",residu >= 4~"#4A6FE3")) 


ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = data_ex$residu_color, na.rm=T)+
  scale_y_productlist() +
  theme_classic() +
  theme(axis.ticks=element_blank(), axis.line=element_blank())+
  labs(x = "cut",y="clarity")

enter image description here

But I would like to add this legend (below) on the right of the plot but I don't know how I could do it because the fill field is outside aes so scale_fill_manual does not work...

enter image description here

like image 664
antuki Avatar asked Oct 25 '17 14:10

antuki


1 Answers

Using group aesthetic:

p1 <- ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, group = clarity, fill = var_fill),
           stat = "identity", position = "fill", color="black") + ggtitle("var fill")

p2 <- ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, fill = clarity), stat = "identity", position = "fill", color = "black")+
  ggtitle("clarity")

library(cowplot)
cowplot::plot_grid(p1, p2)

enter image description here

EDIT: with ggmosaic

library(ggmosaic)

p3 <- ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut), fill=var_fill), na.rm=T)+
  scale_x_productlist()

p4 <- ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut), fill=clarity,), na.rm=T)+
  scale_x_productlist()

cowplot::plot_grid(p3, p4)

enter image description here

Seems to me for ggmosaic the group is not needed at all, both plots are reversed versions of geom_bar.

EDIT3:
defining fill outside the aes fixes the problems such as:
1) X axis readability
2) removes the very small colored lines in the borders of each rectangle

data_ex %>%
mutate(color = ifelse(var_fill == "A", "#0073C2FF", ifelse(var_fill == "B", "#EFC000FF", "#868686FF"))) -> try2

ggplot(try2) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = try2$color, na.rm=T)+
  scale_x_productlist()

enter image description here

To add y axis labels one needs a bit of wrangling. Here is an approach:

ggplot(try2) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = try2$color, na.rm=T)+
  scale_x_productlist()+
  scale_y_continuous(sec.axis = dup_axis(labels = unique(try2$clarity),
                                         breaks = try2 %>%
                                           filter(cut == "Ideal") %>%
                                           mutate(count2 = cumsum(count/sum(count)),
                                                  lag = lag(count2)) %>%
                                           replace(is.na(.), 0) %>%
                                           rowwise() %>%
                                           mutate(post = sum(count2, lag)/2)%>%
                                           select(post) %>%
                                           unlist()))

enter image description here

EDIT4: adding the legend can be accomplished in two ways.

1 - by adding a fake layer to generate the legend - however this produces a problem with the x axis labels (they are a combination of cut and fill) hence I defined the manual breaks and labels

data_ex from OP edit2

ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut), fill = residu_classe), alpha=0, na.rm=T)+
  geom_mosaic(aes(weight= count, x=product(clarity, cut)), fill = data_ex$residu_color, na.rm=T)+
  scale_y_productlist()+
  theme_classic() +
  theme(axis.ticks=element_blank(), axis.line=element_blank())+
  labs(x = "cut",y="clarity")+
  scale_fill_manual(values = unique(data_ex$residu_color), breaks = unique(data_ex$residu_classe))+
  guides(fill = guide_legend(override.aes = list(alpha = 1)))+
  scale_x_productlist(breaks = data_ex %>% 
                        group_by(cut) %>%
                        summarise(sumer = sum(count)) %>% 
                        mutate(sumer = cumsum(sumer/sum(sumer)),
                               lag = lag(sumer)) %>%
                        replace(is.na(.), 0) %>%
                        rowwise() %>%
                        mutate(post = sum(sumer, lag)/2)%>%
                        select(post) %>%
                        unlist(), labels = unique(data_ex$cut))

enter image description here

2 - by extracting the legend from one plot and adding it to the other

library(gtable)              
library(gridExtra) 

make fake plot for legend:

gg_pl <- ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut), fill = residu_classe), alpha=1, na.rm=T)+
  scale_fill_manual(values = unique(data_ex$residu_color), breaks = unique(data_ex$residu_classe))

make the correct plot

z = ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut)), fill = data_ex$residu_color, na.rm=T)+
  scale_y_productlist()+
  theme_classic() +
  theme(axis.ticks=element_blank(), axis.line=element_blank())+
  labs(x = "cut",y="clarity")


a.gplot <- ggplotGrob(gg_pl)
tab <- gtable::gtable_filter(a.gplot, 'guide-box', fixed=TRUE)
gridExtra::grid.arrange(z, tab, nrow = 1, widths = c(4,1))

enter image description here

like image 172
missuse Avatar answered Nov 03 '22 02:11

missuse