Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Add vline to geom_density and shade confidence interval of mean R

After reading through different posts, I found out how to add a vline of mean to density plots as shown here. Using the data provided in the above link:

1) How can one add 95% confidence intervals around the mean using geom_ribbon? CIs can be computed as

#computation of the standard error of the mean
sem<-sd(x)/sqrt(length(x))
#95% confidence intervals of the mean
c(mean(x)-2*sem,mean(x)+2*sem)

2) How can one limit the vline to the region under the curve? You will see in the picture below that vline plots outside the curve.

Sample data very close to my real problem can be found at https://www.dropbox.com/s/bvvfdpgekbjyjh0/test.csv?dl=0

sample plot

UPDATE

Using real data in the link above, I have tried the following using @beetroot's answer.

# Find the mean of each group
dat=me
library(dplyr)
library(plyr)
cdat <- ddply(data,.(direction,cond), summarise, rating.mean=mean(rating,na.rm=T))# summarize by season and variable
cdat

#ggplot
p=ggplot(data,aes(x = rating)) + 
  geom_density(aes(colour = cond),size=1.3,adjust=4)+
  facet_grid(.~direction, scales="free")+
  xlab(NULL) + ylab("Density")
p=p+coord_cartesian(xlim = c(0, 130))+scale_color_manual(name="",values=c("blue","#00BA38","#F8766D"))+
  scale_fill_manual(values=c("blue", "#00BA38", "#F8766D"))+
  theme(legend.title = element_text(colour="black", size=15, face="plain"))+
  theme(legend.text = element_text(colour="black", size = 15, face = "plain"))+
  theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)+
  theme(strip.text.x = element_text(size=20, color="black",face="plain"))+ # facet labels
  ggtitle("SAMPLE A") +theme(plot.title = element_text(size = 20, face = "bold"))+
    theme(axis.text = blue.bold.italic.16.text)+ theme(legend.position = "none")+
  geom_vline(data=cdat, aes(xintercept=rating.mean, color=cond),linetype="dotted",size=1)
p

sample plot from data

## implementing @beetroot's code to restrict lines under the curve and shade CIs around the mean
# I will use ddply for mean and CIs
cdat <- ddply(data,.(direction,cond), summarise, rating.mean=mean(rating,na.rm=T),
              sem = sd(rating,na.rm=T)/sqrt(length(rating)),
              ci.low = mean(rating,na.rm=T) - 2*sem,
              ci.upp = mean(rating,na.rm=T) + 2*sem)# summarize by direction and variable


#In order to limit the lines to the outline of the curves you first need to find out which y values
#of the curves correspond to the means, e.g. by accessing the density values with ggplot_build and 
#using approx:

   cdat.dens <- ggplot_build(ggplot(data, aes(x=rating, colour=cond)) +
                              facet_grid(.~direction, scales="free")+
                              geom_density(aes(colour = cond),size=1.3,adjust=4))$data[[1]] %>%
  mutate(cond = ifelse(group==1, "A",
                       ifelse(group==2, "B","C"))) %>%
  left_join(cdat) %>%
  select(y, x, cond, rating.mean, sem, ci.low, ci.upp) %>%
  group_by(cond) %>%
  mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
         dens.cilow = approx(x, y, xout = ci.low)[[2]],
         dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
  select(-y, -x) %>%
  slice(1)

 cdat.dens

#---
 #You can then combine everything with various geom_segments:

   ggplot(data, aes(x=rating, colour=cond)) +
   geom_density(data = data, aes(x = rating, colour = cond),size=1.3,adjust=4) +facet_grid(.~direction, scales="free")+
   geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
                linetype = "dashed", size = 1) +
   geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
                linetype = "dotted", size = 1) +
   geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
                linetype = "dotted", size = 1)

Gives this:

enter image description here

You will notice the mean and CIs are not aligned as in the original plot. What am I not doing right @beetroot?

like image 247
code123 Avatar asked Mar 10 '23 00:03

code123


1 Answers

Using the data from the link, you can calculate the mean, se and ci like so (I suggest using dplyr, the successor of plyr):

set.seed(1234)
dat <- data.frame(cond = factor(rep(c("A","B"), each=200)), 
                  rating = c(rnorm(200),rnorm(200, mean=.8)))

library(ggplot2)
library(dplyr)
cdat <- dat %>%
  group_by(cond) %>%
  summarise(rating.mean = mean(rating),
            sem = sd(rating)/sqrt(length(rating)),
            ci.low = mean(rating) - 2*sem,
            ci.upp = mean(rating) + 2*sem)

In order to limit the lines to the outline of the curves you first need to find out which y values of the curves correspond to the means, e.g. by accessing the density values with ggplot_build and using approx:

cdat.dens <- ggplot_build(ggplot(dat, aes(x=rating, colour=cond)) + geom_density())$data[[1]] %>%
  mutate(cond = ifelse(group == 1, "A", "B")) %>%
  left_join(cdat) %>%
  select(y, x, cond, rating.mean, sem, ci.low, ci.upp) %>%
  group_by(cond) %>%
  mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
         dens.cilow = approx(x, y, xout = ci.low)[[2]],
         dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
  select(-y, -x) %>%
  slice(1)

> cdat.dens
Source: local data frame [2 x 8]
Groups: cond [2]

   cond rating.mean        sem     ci.low     ci.upp dens.mean dens.cilow dens.ciupp
  <chr>       <dbl>      <dbl>      <dbl>      <dbl>     <dbl>      <dbl>      <dbl>
1     A -0.05775928 0.07217200 -0.2021033 0.08658471 0.3865929   0.403623  0.3643583
2     B  0.87324927 0.07120697  0.7308353 1.01566320 0.3979347   0.381683  0.4096153

You can then combine everything with various geom_segments:

ggplot() +
  geom_density(data = dat, aes(x = rating, colour = cond)) +
  geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
             linetype = "dashed", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
             linetype = "dotted", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
               linetype = "dotted", size = 1)

enter image description here

As Axeman pointed out you can create a polygon based on the ribbon area as explained in this answer.

So for your data you can subset and add the additional rows like so:

ribbon <- ggplot_build(ggplot(dat, aes(x=rating, colour=cond)) + geom_density())$data[[1]] %>%
  mutate(cond = ifelse(group == 1, "A", "B")) %>%
  left_join(cdat.dens) %>%
  group_by(cond) %>%
  filter(x >= ci.low & x <= ci.upp) %>%
  select(cond, x, y)

ribbon <- rbind(data.frame(cond = c("A", "B"), x = c(-0.2021033, 0.7308353), y = c(0, 0)), 
                as.data.frame(ribbon), 
                data.frame(cond = c("A", "B"), x = c(0.08658471, 1.01566320), y = c(0, 0)))

And add geom_polygon to the plot:

ggplot() +
  geom_polygon(data = ribbon, aes(x = x, y = y, fill = cond), alpha = .5) +
  geom_density(data = dat, aes(x = rating, colour = cond)) +
  geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
             linetype = "dashed", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
             linetype = "dotted", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
               linetype = "dotted", size = 1)

enter image description here


Here's the adapted code for your real data. It's just a bit tricky to incorporate two groups instead of one:

cdat <- dat %>%
  group_by(direction, cond) %>%
  summarise(rating.mean = mean(rating, na.rm = TRUE),
            sem = sd(rating, na.rm = TRUE)/sqrt(length(rating)),
            ci.low = mean(rating, na.rm = TRUE) - 2*sem,
            ci.upp = mean(rating, na.rm = TRUE) + 2*sem)

cdat.dens <- ggplot_build(ggplot(dat, aes(x=rating, colour=interaction(direction, cond))) + geom_density())$data[[1]] %>%
  mutate(cond = ifelse((group == 1 | group == 2 | group == 3 | group == 4), "A",
                        ifelse((group == 5 | group == 6 | group == 7 | group == 8), "B", "C")),
         direction = ifelse((group == 1 | group == 5 | group == 9), "EAST",
                            ifelse((group == 2 | group == 6 | group == 10), "NORTH",
                                   ifelse((group == 3 | group == 7 | group == 11), "SOUTH", "WEST")))) %>%
  left_join(cdat) %>%
  select(y, x, cond, direction, rating.mean, sem, ci.low, ci.upp) %>%
  group_by(cond, direction) %>%
  mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
         dens.cilow = approx(x, y, xout = ci.low)[[2]],
         dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
  select(-y, -x) %>%
  slice(1)

ggplot() +
  geom_density(data = dat, aes(x = rating, colour = cond)) +
  geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
               linetype = "dashed", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
               linetype = "dotted", size = 1) +
  geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
               linetype = "dotted", size = 1) +
  facet_wrap(~direction)

enter image description here

like image 160
erc Avatar answered Mar 12 '23 12:03

erc