I use the following example data and code
# Example
x1<- as.Date("2013-12-31")
adddate1 <- 1:60
dts <- x1 + adddate1
df <- data.frame(a=runif(100),b=runif(100),c=runif(100) ,d=rnorm(2700), dates=dts)
df$Metric <- ifelse(df$a > 0.5,"a", "b")
df$Methodology <- ifelse(df$a > 0.5,"One", "Two")
df$Methodology <- factor(df$Methodology)
pl<-df %>%
group_by(Methodology) %>%
do(
plots = ggplot(data=., aes(x = dates, y = b)) +
geom_point() +
stat_smooth(method="auto",size=1.5) +
stat_summary(fun.data=median_hilow, fun.args=(conf.int=1)) + # Show IQR
scale_x_date(date_breaks = "1 week", date_labels = "%d-%b-%y") +
facet_wrap(~Metric, scales="free") +
ggtitle(unique(.$Methodology))
)
pl[[1,2]]
The output I see is:
However, I would like to see IQR, as calculated by stat_summary
or some such routine, shown as a ribbon plot, as well as a line showing the median value.
I suspect I will have to write a user defined function and play with that.
Appreciate any hints or tips.
You can use stat_summary
with geom_smooth
:
library(ggplot2)
set.seed(47)
df <- data.frame(a = runif(100),
b = runif(100),
c = runif(100),
d = rnorm(2700),
dates = as.Date("2013-12-31") + 1:60)
df$Metric <- ifelse(df$a > 0.5, "a", "b")
df$Methodology <- factor(ifelse(df$a > 0.5, "One", "Two"))
ggplot(df, aes(x = dates, y = b)) +
geom_point() +
stat_smooth(size = 1.5) +
geom_smooth(stat = 'summary', alpha = 0.2, fill = 'red', color = 'red',
fun.data = median_hilow, fun.args = list(conf.int = 1)) +
scale_x_date(date_breaks = "1 week", date_labels = "%d-%b-%y") +
facet_wrap(~ Methodology + Metric, ncol = 1)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Since conf.int = 1
, this plots a ribbon between the minimum and maximum at each x value, with the median as the line. If you actually want to plot 25th and 75th percentiles, set conf.int = 0.5
. On this data, there aren't enough observations at each x value for that to look very different, though, so on some new sample data,
library(ggplot2)
set.seed(47)
ggplot(tibble::tibble(x = rep(seq(0, 4*pi, length.out = 50), 50),
y = rnorm(2500) * sin(x) + sin(x)),
aes(x, y)) +
geom_point(alpha = 0.1) +
geom_smooth(fill = 'darkblue') +
geom_smooth(stat = 'summary', color = 'red', fill = 'red', alpha = 0.2,
fun.data = median_hilow, fun.args = list(conf.int = 0.5))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
median_hilow
(really Hmisc::smedian.hilow
) doesn't allow you to set the type of quantile, though, so for more precise control, rewrite the function (returning a similarly structured data frame) or pass separate functions for each statistic to the fun.y
, fun.ymin
and fun.ymax
parameters.
Something's changed in either R or ggplot 2, but stat_summary() no longer works with the geom = 'smooth' option. It needs to be geom = 'ribbon'. This works as advertised in R 3.6.0 and ggplot 3.1.1
library(ggplot2)
set.seed(47)
ggplot(tibble::data_frame(x = rep(seq(0, 4*pi, length.out = 50), 50),
y = rnorm(2500) * sin(x) + sin(x)),
aes(x, y)) +
geom_point(alpha = 0.1) +
geom_smooth(fill = 'darkblue') +
stat_summary(fun.data = median_hilow, fun.args = list(conf.int = 0.5),
geom = 'ribbon', color = 'red', fill = 'red', alpha = 0.2)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With