I'm having difficulty shading under a geom_density
curve for a probability distribution that only has an x variable. I want to shade the plots under the area where x > 0.05. The other threads on R only work if a y variable is included.
Using these randomly generated distribution values:
a <- c(-0.1125, -0.1405, -0.1038, -0.1246, -0.1381, -0.1281, -0.144,
-0.1377, -0.1287, -0.1119, -0.1553, -0.1578, -0.154, -0.1379,
-0.1506, -0.1166, -0.09943, -0.1689, -0.1794, -0.1632, -0.175,
-0.1561, -0.1143, -0.1952, -0.1865, -0.1478, -0.1556, -0.1175,
-0.1098, -0.1224, -0.09501, -0.1164, -0.2199, -0.1501, -0.1461,
-0.08725, -0.1158, -0.1917, -0.1405, -0.1081, -0.1013, -0.07569,
-0.121, -0.1811, -0.1248, -0.1255, -0.09941, -0.1829, -0.212,
-0.1053, -0.1311, -0.1057, -0.1344, -0.09613, -0.1535, -0.1362,
-0.1477, -0.1196, -0.13, -0.1721, -0.1419, -0.1344, -0.08684,
-0.1137, -0.1054, -0.179, -0.1314, -0.122, -0.14, -0.1453, -0.1063,
-0.1382, -0.143, -0.1278, -0.1114, -0.1008, -0.1237, -0.08701,
-0.08896, -0.1261, -0.1674, -0.1116, -0.1192, -0.156, -0.1738,
-0.1137, -0.1405, -0.1663, -0.1393, -0.1259, -0.07659, -0.1176,
-0.1325, -0.1432, -0.1373, -0.1153, -0.1173, -0.1683, -0.1485,
-0.1222)
b <- c(0.02765, 0.0003655, 0.01315, 0.03996, 0.009496, 0.0006978,
0.01546, 0.006651, 0.03626, -0.02307, 0.01906, 0.006012, -0.03311,
0.03919, 0.001477, 0.005686, -0.01026, -0.02559, -0.01881, -0.02306,
-0.00751, -0.002696, 0.008015, -0.01801, -0.04651, 0.001755,
-0.02369, 0.03002, 0.01155, 0.04294, 0.01012, 0.05339, -0.007262,
0.0272, 0.02658, -0.04211, -0.01421, 0.008791, -0.0005405, 0.02552,
0.004705, 0.03458, 0.02617, 0.007282, -0.007129, 0.004159, 0.01888,
0.01341, -0.02492, 0.01837, 0.024, 0.02048, 0.00438, -0.006591,
0.02295, 0.008665, 0.02429, 0.006213, -0.04526, -0.01066, -0.003409,
-0.007527, 0.008865, 0.03149, 0.03217, -0.004714, 0.009994, -0.009908,
-0.01366, -0.0108, -0.003148, 0.006765, -0.04191, 0.04184, 0.01474,
-0.0099, 0.001694, 0.00889, 0.01091, 0.001035, -0.01351, 0.00369,
-0.05145, 0.01338, 0.004623, -0.007436, -0.007046, 0.01927, 0.0005834,
0.01277, 0.02874, -0.01633, 0.006894, 0.02411, 0.0292, 0.05691,
0.02347, 0.02901, 0.02329, 0.00198)
And this function to graph them:
library(ggplot2)
library(gridExtra)
proportion.distribution.fn <- function(a, b) {
# Generating data frames
a1 <- as.data.frame(a)
b1 <- as.data.frame(b)
# Generating graphs
a1g <- ggplot(a1, aes(x = a1[,1])) +
geom_density(fill = "skyblue1") +
labs(title = "a distribution", x = "Proportion", y = "Density")
b1g <- ggplot(b1, aes(x = b1[,1])) +
geom_density(fill = "skyblue1") +
labs(title = "b distribution", x = "Proportion", y = "Density")
return(grid.arrange(a1g, b1g))
}
proportion.distribution.fn(a, b)
This is one of those situations where it's sometimes just easiest to do the calculations ahead of time, outside of ggplot, rather than trying to coerce behind-the-scenes calculations to behave like you desire. In tidyverse grammar,
library(tidyverse)
df <- data_frame(a = c(-0.1125, -0.1405, -0.1038, -0.1246, -0.1381, -0.1281, -0.144, -0.1377, -0.1287, -0.1119, -0.1553, -0.1578, -0.154, -0.1379, -0.1506, -0.1166, -0.09943, -0.1689, -0.1794, -0.1632, -0.175, -0.1561, -0.1143, -0.1952, -0.1865, -0.1478, -0.1556, -0.1175, -0.1098, -0.1224, -0.09501, -0.1164, -0.2199, -0.1501, -0.1461, -0.08725, -0.1158, -0.1917, -0.1405, -0.1081, -0.1013, -0.07569, -0.121, -0.1811, -0.1248, -0.1255, -0.09941, -0.1829, -0.212, -0.1053, -0.1311, -0.1057, -0.1344, -0.09613, -0.1535, -0.1362, -0.1477, -0.1196, -0.13, -0.1721, -0.1419, -0.1344, -0.08684, -0.1137, -0.1054, -0.179, -0.1314, -0.122, -0.14, -0.1453, -0.1063, -0.1382, -0.143, -0.1278, -0.1114, -0.1008, -0.1237, -0.08701, -0.08896, -0.1261, -0.1674, -0.1116, -0.1192, -0.156, -0.1738, -0.1137, -0.1405, -0.1663, -0.1393, -0.1259, -0.07659, -0.1176, -0.1325, -0.1432, -0.1373, -0.1153, -0.1173, -0.1683, -0.1485, -0.1222),
b = c(0.02765, 0.0003655, 0.01315, 0.03996, 0.009496, 0.0006978, 0.01546, 0.006651, 0.03626, -0.02307, 0.01906, 0.006012, -0.03311, 0.03919, 0.001477, 0.005686, -0.01026, -0.02559, -0.01881, -0.02306, -0.00751, -0.002696, 0.008015, -0.01801, -0.04651, 0.001755, -0.02369, 0.03002, 0.01155, 0.04294, 0.01012, 0.05339, -0.007262, 0.0272, 0.02658, -0.04211, -0.01421, 0.008791, -0.0005405, 0.02552, 0.004705, 0.03458, 0.02617, 0.007282, -0.007129, 0.004159, 0.01888, 0.01341, -0.02492, 0.01837, 0.024, 0.02048, 0.00438, -0.006591, 0.02295, 0.008665, 0.02429, 0.006213, -0.04526, -0.01066, -0.003409, -0.007527, 0.008865, 0.03149, 0.03217, -0.004714, 0.009994, -0.009908, -0.01366, -0.0108, -0.003148, 0.006765, -0.04191, 0.04184, 0.01474, -0.0099, 0.001694, 0.00889, 0.01091, 0.001035, -0.01351, 0.00369, -0.05145, 0.01338, 0.004623, -0.007436, -0.007046, 0.01927, 0.0005834, 0.01277, 0.02874, -0.01633, 0.006894, 0.02411, 0.0292, 0.05691, 0.02347, 0.02901, 0.02329, 0.00198))
df_density <- df %>%
map(density) %>%
map_dfr(~data_frame(x = .x$x, y = .x$y), .id = 'variable')
df_density
#> # A tibble: 1,024 x 3
#> variable x y
#> <chr> <dbl> <dbl>
#> 1 a -0.249 0.00495
#> 2 a -0.248 0.00560
#> 3 a -0.248 0.00632
#> 4 a -0.248 0.00714
#> 5 a -0.247 0.00804
#> 6 a -0.247 0.00904
#> 7 a -0.246 0.0101
#> 8 a -0.246 0.0114
#> 9 a -0.246 0.0127
#> 10 a -0.245 0.0142
#> # ... with 1,014 more rows
ggplot(df_density, aes(x, y, color = variable, fill = variable)) +
geom_line() +
geom_area(data = filter(df_density, x > .05))
I was a bit surprised to find that there's (apparently) no easy way to do this with ggplot2
. Here's a solution that uses ggplot_build
to extract the desired x-values where shading should occur, and then geom_area
to manually draw your distribution:
proportion.distribution.fn <- function(a, b) {
# Posteriors (delta)
a1 <- as.data.frame(a)
b1 <- as.data.frame(b)
# Plotting delta
a1g <- ggplot(a1, aes(x = a1[,1])) +
geom_density() + # Note the lack of fill here
labs(title = "a distribution", x = "Proportion", y = "Density")
a1g_df <- ggplot_build(a1g)$data[[1]]
a1g <- a1g + geom_area(data = subset(a1g_df, x > 0.05),
aes(x=x,y=y),
fill = "skyblue1",
color = "black") # gives a nice border
b1g <- ggplot(b1, aes(x = b1[,1])) +
geom_density() + # Note the lack of fill here
labs(title = "b distribution", x = "Proportion", y = "Density")
b1g_df <- ggplot_build(b1g)$data[[1]]
b1g <- b1g + geom_area(data = subset(b1g_df, x > 0.05),
aes(x=x,y=y),
fill = "skyblue1",
color = "black") # gives a nice border
return(grid.arrange(a1g, b1g))
}
proportion.distribution.fn(a, b)
I liked both the answers given (and upvoted both). I based this answer on Marcus' example with minor alterations, as it was easier to edit the code to produce the desired graph I was after. Of note, alistaire's answer is a more efficient coding method, and would likely be better if starting from scratch.
library(ggplot2)
library(gridExtra)
proportion.distribution.fn <- function(a, b) {
# Data frames
a1 <- as.data.frame(a)
b1 <- as.data.frame(b)
# Initial graphs - 1st fill
a1g <- ggplot(a1, aes(x = a1[,1])) +
geom_density(fill = "skyblue1") +
labs(title = "a distribution", x = "Proportion", y = "Density")
b1g <- ggplot(b1, aes(x = b1[,1])) +
geom_density(fill = "skyblue1") +
labs(title = "b distribution", x = "Proportion", y = "Density")
# Adding 2nd fill
a1g_df <- ggplot_build(a1g)$data[[1]]
b1g_df <- ggplot_build(b1g)$data[[1]]
a1graph <- a1g + geom_area(data = subset(a1g_df, x > 0.05), aes(x=x,y=y),
fill = "darkblue")
b1graph <- b1g + geom_area(data = subset(b1g_df, x > 0.05), aes(x=x,y=y),
fill = "darkblue")
return(grid.arrange(a1graph, b1graph))
}
proportion.distribution.fn(a, b)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With