Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

ggplot2: add distribution jitter near the legend bar

A non-trivial ggplot challenge

I compare the spatial distribution of a variable at two moments using maps with a fixed color scale - to show the actual change. It would be very nice to add the distributions of the variables near the legend bars as jitter points.

The desired plot should look something like the picture: the supposed red jitter points are added manually (I just used paint.net) to the plot produced by R.

enter image description here


Reproduce the maps

To reproduce the maps, an R object called fortIT is required. This is a fortified (using ggplot2::fortify) SpatialPolygonsDataFrame of Italian NUTS-2 regions with the data attached. The RData file can be downloaded here [89KB]

And the code for the maps:

require(dplyr)
require(ggplot2)
require(ggthemes)
require(gridExtra)
require(rgeos)
require(maptools)
require(cowplot)
require(viridis)

# load the data
load(url("https://ikashnitsky.github.io/share/1602-so-q-map-jitter/fortIT.RData"))

# produce the first map
gIT1 <- ggplot()+
        geom_polygon(data = fortIT, aes(x=long, y=lat, group=group, fill=tsr03),
                     color='grey30',size=.1)+
        scale_fill_viridis('TSR\n2003',limits=range(fortIT[,9:10]))+ # !!! limits fix the color scale
        
        coord_equal(xlim=c(4000000, 5500000), ylim=c(1500000,3000000))+
        guides(fill = guide_colorbar(barwidth = 1.5, barheight = 15))+
        
        theme_map()+
        theme(panel.border=element_rect(color = 'black',size=.5,fill = NA),
              legend.position = c(1, 1),
              legend.justification = c(1, 1),
              legend.background = element_rect(colour = NA, fill = NA),
              legend.title = element_text(size=15),
              legend.text = element_text(size=15))+
        scale_x_continuous(expand=c(0,0)) +
        scale_y_continuous(expand=c(0,0)) +
        labs(x = NULL, y = NULL)


# produce the second map
gIT2 <- ggplot()+
        geom_polygon(data = fortIT, aes(x=long, y=lat, group=group, fill=tsr43),
                     color='grey30',size=.1)+
        scale_fill_viridis('TSR\n2043',limits=range(fortIT[,9:10]))+
        
        coord_equal(xlim=c(4000000, 5500000), ylim=c(1500000,3000000))+
        guides(fill = guide_colorbar(barwidth = 1.5, barheight = 15))+
        
        theme_map()+
        theme(panel.border=element_rect(color = 'black',size=.5,fill = NA),
              legend.position = c(1, 1),
              legend.justification = c(1, 1),
              legend.background = element_rect(colour = NA, fill = NA),
              legend.title = element_text(size=15),
              legend.text = element_text(size=15))+
        scale_x_continuous(expand=c(0,0)) +
        scale_y_continuous(expand=c(0,0)) +
        labs(x = NULL, y = NULL)

# align both maps side by side
gIT <- plot_grid(gIT1,gIT2,ncol=2,labels=LETTERS[1:2],label_size=20)

ggsave('italy.png',gIT,width=12,height=7,dpi=192)

Additional information

The variables visualized in the maps are Total Support Ratios in 2003 (panel A) and 2043 (panel B, Eurostat regional projection). Total Support Ratio is the ratio of working-age population (15-64) to non-working-age population (younger than 15 and older than 65).

like image 755
ikashnitsky Avatar asked Feb 27 '16 17:02

ikashnitsky


2 Answers

you could replace the legend with one that has a plot panel stuck to it with the density information,

g <- ggplotGrob(p)
leg = gtable_filter(g, "guide-box")

dd <- ddply(fortIT, "group", summarise, fill=unique(tsr03))
dum <- ggplot(dd, aes(0,y=fill)) +
  geom_dotplot(fill="red", binaxis = "y", dotsize=0.5, stackdir = "down")+
  scale_y_continuous(lim=range(fortIT[,c("tsr03", "tsr43")]), expand=c(0,0)) +
  theme_void() 

dummy_panel <- gtable_filter(ggplotGrob(dum), "panel")
dummy_panel$layout$clip <- FALSE

a <- leg[[1]][[1]][[1]][[1]]
a <- gtable_add_cols(a, unit(1,"cm"), 0)
a <- gtable_add_grob(a, dummy_panel, 4, 1)
a$layout$clip <- FALSE
grid.newpage()
grid.draw(a)

leg[[1]][[1]][[1]][[1]] <- a
g$grobs[g$layout$name=="guide-box"] <- list(leg)

library(grid)
grid.newpage()
grid.draw(g)

enter image description here

like image 96
baptiste Avatar answered Oct 07 '22 17:10

baptiste


Whenever a custom legend is required, I find it preferable to draw the legend as a separate plot and then combine.

For example, we can define the following function:

plot_legend <- function(dots, limits, title, bins = 20) {
  n <- 100
  tiles <- data.frame(x = rep(0.5, n),
                      y = seq(limits[1], limits[2], length.out = n))

  ggplot() +
    geom_raster(data=tiles, aes(x = x, y = y, fill = y), interpolate = TRUE) +
    geom_dotplot(data = data.frame(x = dots), aes(x = -.05, y = x, fill = ..y..),
                 stackdir = "down", binaxis = "y", binwidth = diff(limits)/bins, dotsize = .8) +
    scale_x_continuous(limits = c(-5, 1), expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0), position = "right") +
    ggtitle(title) +
    theme_cowplot(12) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.line = element_blank(),
          axis.title = element_blank(),
          plot.title = element_text(face = "plain", hjust = 1),
          legend.position = "none")
}

Which we can use like so:

require(ggplot2)
require(cowplot)
require(viridis)

dots <- 3*runif(100)
range <- c(0, 3)
plot_legend(dots, range, "random numbers") + scale_fill_viridis()

enter image description here

Now we use this together with the maps code. It requires a little bit of fiddling with the final placement of the legends into the figures, but it's not overly complicated.

require(dplyr)
load(url("https://ikashnitsky.github.io/misc/160227-SO-question/fortIT.RData"))

# extract tsr03 and tsr43 data
fortIT %>% group_by(group) %>%
  summarize(tsr03 = tsr03[1], tsr43 = tsr43[1]) -> df_tsr

# get color range limits
limits <- range(fortIT[,9:10])

# make the legends
legIT1 <- plot_legend(df_tsr$tsr03, limits, "TSR 2003") + scale_fill_viridis()
legIT2 <- plot_legend(df_tsr$tsr43, limits, "TSR 2043") + scale_fill_viridis()

# produce the first map
gIT1 <- ggplot()+
  geom_polygon(data = fortIT, aes(x=long, y=lat, group=group, fill=tsr03),
               color='grey30', size=.1) +
  scale_x_continuous(expand=c(0,0)) +
  scale_y_continuous(expand=c(0,0)) +
  scale_fill_viridis('TSR\n2003', limits = limits, guide = "none") +
  coord_equal(xlim=c(4000000, 5500000), ylim=c(1500000, 3000000)) +
  theme_map() +
  theme(panel.border=element_rect(color = 'black',size=.5,fill = NA))

# produce the second map
gIT2 <- ggplot()+
  geom_polygon(data = fortIT, aes(x=long, y=lat, group=group, fill=tsr43),
               color='grey30',size=.1)+
  scale_x_continuous(expand=c(0,0)) +
  scale_y_continuous(expand=c(0,0)) +
  scale_fill_viridis('TSR\n2043', limits = limits, guide = "none") +
  coord_equal(xlim=c(4000000, 5500000), ylim=c(1500000, 3000000)) +
  theme_map() +
  theme(panel.border=element_rect(color = 'black',size=.5,fill = NA))

# put everything together
plot_grid(ggdraw(gIT1) + draw_plot(legIT1, .62, .35, .35, .55),
          ggdraw(gIT2) + draw_plot(legIT2, .62, .35, .35, .55),
          ncol=2, labels="AUTO")

enter image description here

Two comments:

  1. The size of the stacked points can be controlled by the bins argument of the plot_legend() function. The larger bins, the smaller the points.

  2. I would typically remove the frames around each map, but I tried here to reproduce the original figure as closely as possible.

like image 29
Claus Wilke Avatar answered Oct 07 '22 15:10

Claus Wilke