Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Shade geom_density over interval on x-axis if there is no y variable

Tags:

r

ggplot2

I'm having difficulty shading under a geom_density curve for a probability distribution that only has an x variable. I want to shade the plots under the area where x > 0.05. The other threads on R only work if a y variable is included.

Using these randomly generated distribution values:

a <- c(-0.1125, -0.1405, -0.1038, -0.1246, -0.1381, -0.1281, -0.144, 
    -0.1377, -0.1287, -0.1119, -0.1553, -0.1578, -0.154, -0.1379, 
    -0.1506, -0.1166, -0.09943, -0.1689, -0.1794, -0.1632, -0.175, 
    -0.1561, -0.1143, -0.1952, -0.1865, -0.1478, -0.1556, -0.1175, 
    -0.1098, -0.1224, -0.09501, -0.1164, -0.2199, -0.1501, -0.1461, 
    -0.08725, -0.1158, -0.1917, -0.1405, -0.1081, -0.1013, -0.07569, 
    -0.121, -0.1811, -0.1248, -0.1255, -0.09941, -0.1829, -0.212, 
    -0.1053, -0.1311, -0.1057, -0.1344, -0.09613, -0.1535, -0.1362, 
    -0.1477, -0.1196, -0.13, -0.1721, -0.1419, -0.1344, -0.08684, 
    -0.1137, -0.1054, -0.179, -0.1314, -0.122, -0.14, -0.1453, -0.1063, 
    -0.1382, -0.143, -0.1278, -0.1114, -0.1008, -0.1237, -0.08701, 
    -0.08896, -0.1261, -0.1674, -0.1116, -0.1192, -0.156, -0.1738, 
    -0.1137, -0.1405, -0.1663, -0.1393, -0.1259, -0.07659, -0.1176, 
    -0.1325, -0.1432, -0.1373, -0.1153, -0.1173, -0.1683, -0.1485, 
    -0.1222)
b <- c(0.02765, 0.0003655, 0.01315, 0.03996, 0.009496, 0.0006978, 
    0.01546, 0.006651, 0.03626, -0.02307, 0.01906, 0.006012, -0.03311, 
    0.03919, 0.001477, 0.005686, -0.01026, -0.02559, -0.01881, -0.02306, 
    -0.00751, -0.002696, 0.008015, -0.01801, -0.04651, 0.001755, 
    -0.02369, 0.03002, 0.01155, 0.04294, 0.01012, 0.05339, -0.007262, 
    0.0272, 0.02658, -0.04211, -0.01421, 0.008791, -0.0005405, 0.02552, 
    0.004705, 0.03458, 0.02617, 0.007282, -0.007129, 0.004159, 0.01888, 
    0.01341, -0.02492, 0.01837, 0.024, 0.02048, 0.00438, -0.006591, 
    0.02295, 0.008665, 0.02429, 0.006213, -0.04526, -0.01066, -0.003409, 
    -0.007527, 0.008865, 0.03149, 0.03217, -0.004714, 0.009994, -0.009908, 
    -0.01366, -0.0108, -0.003148, 0.006765, -0.04191, 0.04184, 0.01474, 
    -0.0099, 0.001694, 0.00889, 0.01091, 0.001035, -0.01351, 0.00369, 
    -0.05145, 0.01338, 0.004623, -0.007436, -0.007046, 0.01927, 0.0005834, 
    0.01277, 0.02874, -0.01633, 0.006894, 0.02411, 0.0292, 0.05691, 
    0.02347, 0.02901, 0.02329, 0.00198)

And this function to graph them:

library(ggplot2)
library(gridExtra) 

proportion.distribution.fn <- function(a, b) {
    # Generating data frames
    a1 <- as.data.frame(a)
    b1 <- as.data.frame(b)

    # Generating graphs
    a1g <- ggplot(a1, aes(x = a1[,1])) + 
        geom_density(fill = "skyblue1") +
        labs(title = "a distribution", x = "Proportion", y = "Density")
    b1g <- ggplot(b1, aes(x = b1[,1])) + 
        geom_density(fill = "skyblue1") +
        labs(title = "b distribution", x = "Proportion", y = "Density")

    return(grid.arrange(a1g, b1g))
}

proportion.distribution.fn(a, b) 

Two probability distributions. Would like to shade anywhere from the right of x=0.05

like image 559
MBorg Avatar asked Apr 13 '18 01:04

MBorg


3 Answers

This is one of those situations where it's sometimes just easiest to do the calculations ahead of time, outside of ggplot, rather than trying to coerce behind-the-scenes calculations to behave like you desire. In tidyverse grammar,

library(tidyverse)

df <- data_frame(a = c(-0.1125, -0.1405, -0.1038, -0.1246, -0.1381, -0.1281, -0.144, -0.1377, -0.1287, -0.1119, -0.1553, -0.1578, -0.154, -0.1379, -0.1506, -0.1166, -0.09943, -0.1689, -0.1794, -0.1632, -0.175, -0.1561, -0.1143, -0.1952, -0.1865, -0.1478, -0.1556, -0.1175, -0.1098, -0.1224, -0.09501, -0.1164, -0.2199, -0.1501, -0.1461, -0.08725, -0.1158, -0.1917, -0.1405, -0.1081, -0.1013, -0.07569, -0.121, -0.1811, -0.1248, -0.1255, -0.09941, -0.1829, -0.212, -0.1053, -0.1311, -0.1057, -0.1344, -0.09613, -0.1535, -0.1362, -0.1477, -0.1196, -0.13, -0.1721, -0.1419, -0.1344, -0.08684, -0.1137, -0.1054, -0.179, -0.1314, -0.122, -0.14, -0.1453, -0.1063, -0.1382, -0.143, -0.1278, -0.1114, -0.1008, -0.1237, -0.08701, -0.08896, -0.1261, -0.1674, -0.1116, -0.1192, -0.156, -0.1738, -0.1137, -0.1405, -0.1663, -0.1393, -0.1259, -0.07659, -0.1176, -0.1325, -0.1432, -0.1373, -0.1153, -0.1173, -0.1683, -0.1485, -0.1222),
                 b = c(0.02765, 0.0003655, 0.01315, 0.03996, 0.009496, 0.0006978, 0.01546, 0.006651, 0.03626, -0.02307, 0.01906, 0.006012, -0.03311, 0.03919, 0.001477, 0.005686, -0.01026, -0.02559, -0.01881, -0.02306, -0.00751, -0.002696, 0.008015, -0.01801, -0.04651, 0.001755, -0.02369, 0.03002, 0.01155, 0.04294, 0.01012, 0.05339, -0.007262, 0.0272, 0.02658, -0.04211, -0.01421, 0.008791, -0.0005405, 0.02552, 0.004705, 0.03458, 0.02617, 0.007282, -0.007129, 0.004159, 0.01888, 0.01341, -0.02492, 0.01837, 0.024, 0.02048, 0.00438, -0.006591, 0.02295, 0.008665, 0.02429, 0.006213, -0.04526, -0.01066, -0.003409, -0.007527, 0.008865, 0.03149, 0.03217, -0.004714, 0.009994, -0.009908, -0.01366, -0.0108, -0.003148, 0.006765, -0.04191, 0.04184, 0.01474, -0.0099, 0.001694, 0.00889, 0.01091, 0.001035, -0.01351, 0.00369, -0.05145, 0.01338, 0.004623, -0.007436, -0.007046, 0.01927, 0.0005834, 0.01277, 0.02874, -0.01633, 0.006894, 0.02411, 0.0292, 0.05691, 0.02347, 0.02901, 0.02329, 0.00198))

df_density <- df %>% 
    map(density) %>% 
    map_dfr(~data_frame(x = .x$x, y = .x$y), .id = 'variable')

df_density
#> # A tibble: 1,024 x 3
#>    variable      x       y
#>    <chr>     <dbl>   <dbl>
#>  1 a        -0.249 0.00495
#>  2 a        -0.248 0.00560
#>  3 a        -0.248 0.00632
#>  4 a        -0.248 0.00714
#>  5 a        -0.247 0.00804
#>  6 a        -0.247 0.00904
#>  7 a        -0.246 0.0101 
#>  8 a        -0.246 0.0114 
#>  9 a        -0.246 0.0127 
#> 10 a        -0.245 0.0142 
#> # ... with 1,014 more rows

ggplot(df_density, aes(x, y, color = variable, fill = variable)) + 
    geom_line() + 
    geom_area(data = filter(df_density, x > .05))

like image 88
alistaire Avatar answered Oct 18 '22 22:10

alistaire


I was a bit surprised to find that there's (apparently) no easy way to do this with ggplot2. Here's a solution that uses ggplot_build to extract the desired x-values where shading should occur, and then geom_area to manually draw your distribution:

proportion.distribution.fn <- function(a, b) {
  # Posteriors (delta)
  a1 <- as.data.frame(a)
  b1 <- as.data.frame(b)

  # Plotting delta
  a1g <- ggplot(a1, aes(x = a1[,1])) + 
    geom_density() + # Note the lack of fill here
    labs(title = "a distribution", x = "Proportion", y = "Density")

  a1g_df <- ggplot_build(a1g)$data[[1]]

  a1g <- a1g + geom_area(data = subset(a1g_df, x > 0.05),
                         aes(x=x,y=y),
                         fill = "skyblue1",
                         color = "black") # gives a nice border

  b1g <- ggplot(b1, aes(x = b1[,1])) + 
    geom_density() + # Note the lack of fill here
    labs(title = "b distribution", x = "Proportion", y = "Density")

  b1g_df <- ggplot_build(b1g)$data[[1]]

  b1g <- b1g + geom_area(data = subset(b1g_df, x > 0.05),
                         aes(x=x,y=y),
                         fill = "skyblue1", 
                         color = "black") # gives a nice border

  return(grid.arrange(a1g, b1g))
}

proportion.distribution.fn(a, b) 

enter image description here

like image 6
Marcus Campbell Avatar answered Oct 18 '22 20:10

Marcus Campbell


I liked both the answers given (and upvoted both). I based this answer on Marcus' example with minor alterations, as it was easier to edit the code to produce the desired graph I was after. Of note, alistaire's answer is a more efficient coding method, and would likely be better if starting from scratch.

library(ggplot2)
library(gridExtra) 
proportion.distribution.fn <- function(a, b) {
  # Data frames
  a1 <- as.data.frame(a)
  b1 <- as.data.frame(b)

  # Initial graphs - 1st fill
  a1g <- ggplot(a1, aes(x = a1[,1])) + 
    geom_density(fill = "skyblue1") +
    labs(title = "a distribution", x = "Proportion", y = "Density")
  b1g <- ggplot(b1, aes(x = b1[,1])) + 
    geom_density(fill = "skyblue1") +
    labs(title = "b distribution", x = "Proportion", y = "Density")

  # Adding 2nd fill
  a1g_df <- ggplot_build(a1g)$data[[1]]
  b1g_df <- ggplot_build(b1g)$data[[1]]
  a1graph <- a1g + geom_area(data = subset(a1g_df, x > 0.05), aes(x=x,y=y),
                             fill = "darkblue")
  b1graph <- b1g + geom_area(data = subset(b1g_df, x > 0.05), aes(x=x,y=y),
                             fill = "darkblue")
  return(grid.arrange(a1graph, b1graph))
}
proportion.distribution.fn(a, b) 

enter image description here

like image 2
MBorg Avatar answered Oct 18 '22 21:10

MBorg