I am trying to visualize continuous data points across two Time points (pre and post intervention) with three parallel Conditions (CET, RES, END), and Response to intervention (High or Low, i.e., CET_Hi, CET_Lo, etc)
I'd like to create a bar graph showing the mean output for each Condition on the X-axis, with separate bars for Time (Pre and Post). Then, I'd like to overlay the individual Subject data points at Pre and Post with lines to connect the Subjects data points and have the Responses grouped by color.
I have successfully created the bar graph using ggplot2 with the geom_bar function. I have also got geom_point to overlay the individual points by condition, but can't get the position aligned with the Time.
ggplot(Leg_Press_Summary, aes(x=Condition, y=Leg_Press, fill=as.factor(Time))) +
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Blues", name = "Time", labels = c("Pre", "Post")) +
geom_point(data=Phys_Data, aes(x=Condition, y=Leg_Press, colour=Response, fill=as.factor(Time))) +
geom_line(data=Phys_Data, aes(x=Condition, y=Leg_Press, group=Subject)) +
labs(title="Leg Press", x="Condition", y ="Leg Press (kg)")
I expected the geom_points to be positioned according to Time, however, they points just stack in a vertical line between the Pre and Post bars for each condition.
My result:

Figure I'm trying to recreate:

How can I fix this?
Data set included below, which I forgot to include in original post.
LegPress
# A tibble: 36 x 5
Subject Time Condition Response Leg_Press
6 1 CET CET_Hi 212.
6 2 CET CET_Hi 300
9 1 CET CET_Lo 350
9 2 CET CET_Lo 370
14 1 CET CET_Hi 330
14 2 CET CET_Hi 450
26 1 CET CET_Hi 180
26 2 CET CET_Hi 250
28 1 CET CET_Lo 230
28 2 CET CET_Lo 275
29 1 CET CET_Lo 330
29 2 CET CET_Lo 325
2 1 RES RES_Hi 142.
2 2 RES RES_Hi 225
16 1 RES RES_Lo 280
16 2 RES RES_Lo 320
19 1 RES RES_Hi 205
19 2 RES RES_Hi 295
27 1 RES RES_Hi 175
27 2 RES RES_Hi 260
31 1 RES RES_Lo 340
31 2 RES RES_Lo 370
32 1 RES RES_Lo 310
32 2 RES RES_Lo 370
8 1 END END_Lo 205
8 2 END END_Lo 250
13 1 END END_Hi 310
13 2 END END_Hi 320
20 1 END END_Hi 200
20 2 END END_Hi 185
24 1 END END_Lo 260
24 2 END END_Lo 270
25 1 END END_Hi 210
25 2 END END_Hi 235
30 1 END END_Lo 250
30 2 END END_Lo 245
It think this is a case where you want to use faceting:
library(tidyverse)
Phys_Data <- data.frame(stringsAsFactors=FALSE,
Subject = c(6, 6, 9, 9, 14, 14, 26, 26, 28, 28, 29, 29, 2, 2, 16, 16, 19,
19, 27, 27, 31, 31, 32, 32, 8, 8, 13, 13, 20, 20, 24, 24, 25,
25, 30, 30),
Time = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
Condition = c("CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET",
"CET", "CET", "CET", "RES", "RES", "RES", "RES", "RES", "RES",
"RES", "RES", "RES", "RES", "RES", "RES", "END", "END", "END",
"END", "END", "END", "END", "END", "END", "END", "END", "END"),
Response = c("CET_Hi", "CET_Hi", "CET_Lo", "CET_Lo", "CET_Hi", "CET_Hi",
"CET_Hi", "CET_Hi", "CET_Lo", "CET_Lo", "CET_Lo", "CET_Lo",
"RES_Hi", "RES_Hi", "RES_Lo", "RES_Lo", "RES_Hi", "RES_Hi",
"RES_Hi", "RES_Hi", "RES_Lo", "RES_Lo", "RES_Lo", "RES_Lo", "END_Lo",
"END_Lo", "END_Hi", "END_Hi", "END_Hi", "END_Hi", "END_Lo",
"END_Lo", "END_Hi", "END_Hi", "END_Lo", "END_Lo"),
Leg_Press = c(212, 300, 350, 370, 330, 450, 180, 250, 230, 275, 330, 325,
142, 225, 280, 320, 205, 295, 175, 260, 340, 370, 310, 370,
205, 250, 310, 320, 200, 185, 260, 270, 210, 235, 250, 245)
)
Phys_Data %>%
mutate(
Time = as.factor(Time),
Response = str_split_fixed(Response, "_", 2)[,2]
) %>%
ggplot(aes(x=Time, y=Leg_Press, fill=Time)) +
facet_wrap(~Condition, strip.position = "bottom") +
geom_col(
data = ~group_by(.x, Time, Condition) %>%
summarize(Leg_Press = mean(Leg_Press)) %>%
ungroup()
) +
scale_fill_brewer(palette="Blues", name = "Time", labels = c("Pre", "Post")) +
geom_point(aes(color=Response)) +
geom_line(aes(color=Response, group=Subject)) +
labs(title="Leg Press", x = "Condition", y ="Leg Press (kg)") +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)

Created on 2019-09-04 by the reprex package (v0.3.0)
Loading packages:
library(dplyr); library(tidyr); library(ggplot2)
Setting up example data based loosely on your graphics:
set.seed(4)
df <- data.frame(Time = rep(rep(c("pre", "post"), each=20),3),
Condition = rep(c("CET", "END", "RES"), each=40),
Leg_Press = c(rnorm(20, 275, 20), rnorm(20, 325, 20), rnorm(20, 245, 20), rnorm(320, 251, 20), rnorm(20, 247, 10), rnorm(320, 305, 10)))
Generate a summary table of mean, min and max value for each condition and time period:
dat <- df %>% group_by(Time, Condition) %>% summarise(mean = mean(Leg_Press), max = max(Leg_Press), min = min(Leg_Press))
dat$Time <- factor(dat$Time, level=c("pre", "post"))
# # A tibble: 6 x 5
# # Groups: Time [2]
# Time Condition mean max min
# <fct> <fct> <dbl> <dbl> <dbl>
# 1 post CET 283. 373. 209.
# 2 post END 277. 329. 200.
# 3 post RES 278. 328. 215.
# 4 pre CET 273. 326. 191.
# 5 pre END 276. 323. 197.
# 6 pre RES 276. 329. 204.
Simple bar-plot for Leg Press by Condition, separated into pre and post time periods:
ggplot(dat, aes(Condition, mean, fill=Time)) +
geom_col(position="dodge")

Calculate the new x-value for each point for maximum and minimum value:
dat <- dat %>% mutate(new.x = ifelse(Time == "pre", -0.25, 0.25) + as.numeric(as.factor(Condition)))
ggplot(data=dat) +
geom_col(aes(Condition, mean, fill=Time), position="dodge") +
geom_point(aes(x=new.x, y=max)) +
geom_point(aes(x=new.x, y=min))

To draw the line for each group, you need a data-frame for each set of maximum and minimum values.
max.frame <- dat %>%
group_by(Condition) %>%
mutate(t2 = Time) %>%
spread(Time, max) %>%
summarise(x1 = min(new.x), x2 = max(new.x), y1 = mean(pre, na.rm=T), y2 = mean(post, na.rm=T))
# # A tibble: 3 x 5
# Condition x1 x2 y1 y2
# <fct> <dbl> <dbl> <dbl> <dbl>
# 1 CET 0.75 1.25 326. 373.
# 2 END 1.75 2.25 323. 329.
# 3 RES 2.75 3.25 329. 328.
min.frame <- dat %>%
group_by(Condition) %>%
mutate(t2 = Time) %>%
spread(Time, min) %>%
summarise(x1 = min(new.x), x2 = max(new.x), y1 = mean(pre, na.rm=T), y2 = mean(post, na.rm=T))
# # A tibble: 3 x 5
# Condition x1 x2 y1 y2
# <fct> <dbl> <dbl> <dbl> <dbl>
# 1 CET 0.75 1.25 191. 209.
# 2 END 1.75 2.25 197. 200.
# 3 RES 2.75 3.25 204. 215.
Plot based on the three frames:
ggplot() +
geom_col(data=dat, aes(Condition, mean, fill=Time), position="dodge") +
geom_segment(data=max.frame, aes(x=x1, y=y1, xend = x2, yend = y2)) +
geom_segment(data=min.frame, aes(x=x1, y=y1, xend = x2, yend = y2)) +
geom_point(data=dat, aes(x=new.x, y=max)) +
geom_point(data=dat, aes(x=new.x, y=min))

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With