I am trying to add means using geom_segment
to a geom_density_ridges
plot made in ggplot2.
library(dplyr)
library(ggplot2)
library(ggridges)
Fig1 <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1)
ingredients <- ggplot_build(Fig1) %>% purrr::pluck("data", 1)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
p <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn( colours = c("#0000FF", "#FFFFFF", "#FF0000"),name =
NULL, limits=c(-2,2))+ coord_flip() +
theme_ridges(font_size = 20, grid=TRUE, line_size=1,
center_axis_labels=TRUE) +
scale_x_continuous(name='Average Self-Perceived Hair Change', limits=c(-2,2))+
ylab('Total SSM Effort (hours)')+
geom_segment(data =density_lines,
aes(x = x, y = ymin, xend = x, yend = ymin+density*scale*iscale))
print(p)
However, I am getting the following error: "Error: data
must be uniquely named but has duplicate elements". Below is a plot without the means for the dataset I have. Any suggestions on how to fix the code?
The first 35 rows of data are below:
structure(list(MonthsMassage = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 1, 1), MinutesPerDayMassage = c("0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "11-20 minutes daily",
"11-20 minutes daily", "11-20 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "11-20 minutes daily", "11-20 minutes daily"
), Minutes = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15),
hairchange = c(-1, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1,
-1, 0, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, -1,
0, 1, -1, 0, 0, -1, 0), HairType1 = c("Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "other", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "other",
"other", "other", "Templefrontal", "Templefrontal", "other",
"Templefrontal", "other", "Templefrontal", "Templefrontal"
), HairType2 = c("other", "other", "other", "other", "other",
"other", "other", "other", "other", "Vertexthinning", "Vertexthinning",
"other", "Vertexthinning", "other", "other", "Vertexthinning",
"other", "Vertexthinning", "Vertexthinning", "other", "other",
"other", "Vertexthinning", "other", "Vertexthinning", "other",
"other", "other", "other", "other", "other", "Vertexthinning",
"other", "other", "other"), HairType3 = c("other", "Diffusethinning",
"other", "Diffusethinning", "other", "other", "Diffusethinning",
"Diffusethinning", "Diffusethinning", "other", "Diffusethinning",
"Diffusethinning", "other", "other", "Diffusethinning", "Diffusethinning",
"other", "Diffusethinning", "Diffusethinning", "Diffusethinning",
"other", "other", "other", "other", "other", "other", "other",
"other", "other", "Diffusethinning", "other", "other", "other",
"other", "other"), Effort = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 5, 5, 5, 5, 5, 7.5, 7.5), EffortGroup = c("<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "12.5", "12.5",
"12.5", "12.5", "12.5", "12.5", "12.5")), row.names = c(NA,
-35L), class = c("tbl_df", "tbl", "data.frame"))
geom_density_ridges (): first estimates data densities and then draws those using ridgelines. It arranges multiple density plots in a staggered fashion. geom_density_ridges (): Estimates data densities and then draws those using ridgelines. It arranges multiple density plots in a staggered fashion.
The density ridgeline plot [ggridges package] is an alternative to the standard geom_density ()
The ridgeline plot with vertical mean line for each group, in this case “year”, would help easily understand the mean trend over years. To add mean line to ridgeline plot with ggridges, we need to use quantile_lines and quantile_fun arguments inside geom_density_ridges () function from ggridges package.
It appears that the geom_density_ridges () geom cannot take weights in the calculation of the densities. However, it is quite common that... This topic was automatically closed 21 days after the last reply.
If I understand correctly, the OP wants to plot a horizontal line at a position where the density equals the mean density for each of the ridgelines.
The expression
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
returns an empty dataset as there is no record where the density
value exactly matches mean(density)
.
However, it does work for the overall maximum (but not for all of the local maxima)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == max(density)) %>% ungroup()
which gives
As there is no exactly match, the closest value can be picked by
density_lines <- ingredients %>%
group_by(group) %>%
top_n(1, -abs(density - mean(density)))
which plots as
This plots one segment per ridgeline but we expect to see 4 segments in each of the curve branches (those where the maximum of the adjacent peak is greater than the mean). With
density_lines <- ingredients %>%
group_by(group) %>%
top_n(4, -abs(density - mean(density)))
we get
You can play around with the n
parameter to top_n()
but IMHO the correct way would be to group each ridgeline from peak to valley and from valley to peak to get one segment for each of the curve branches.
Alternatively, we can filter using the near()
function. This function requires to specify a tolerance tol
which we need to compute from the dataset:
density_lines <- ingredients %>%
group_by(group) %>%
filter(near(
density, mean(density),
tol = ingredients %>% summarise(0.25 * max(abs(diff(density)))) %>% pull()
))
For the carefully selected factor 0.25
(try and error) we do get
It seems I had misinterpreted OP's intentions. Now, we will try to plot a vertical line at mean(density)
using geom_hline
(with coord_flip()
, geom_hline()
creates a vertical line).
Again, we follow OP's clever approach to extract densities and scale factors from the created plot.
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name =
NULL,
limits = c(-2, 2)
) + coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE
) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change', limits =
c(-2, 2)) +
ylab('Total SSM Effort (hours)')
# extract plot data and summarise
mean_density <-
ggplot_build(Fig1) %>%
purrr::pluck("data", 1) %>%
group_by(group) %>%
summarise(density = mean(density), scale = first(scale), iscale = first(iscale))
# add hline and plot
Fig1 +
geom_hline(aes(yintercept = group + density * scale * iscale),
data = mean_density)
The OP has clarified that
I want was the mean self perceived hair change (y axis data) for each of the 10 ridgelines.
This can be achieved in the following steps:
EffortGroup
.approx()
The mean self perceived hair change for each EffortGroup
is computed by
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
which yields (for the posted subset of OP's data):
EffortGroup x_mean <chr> <dbl> 1 <5 -0.643 2 12.5 -0.143
All steps together:
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name = NULL,
limits = c(-2, 2)) +
coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change',
limits = c(-2, 2)) +
ylab('Total SSM Effort (hours)')
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(factor(EffortGroup))) %>%
left_join(ggplot_build(Fig1) %>% purrr::pluck("data", 1),
on = "group") %>%
group_by(group) %>%
summarise(x_mean = first(x_mean),
density = approx(x, density, first(x_mean))$y,
scale = first(scale),
iscale = first(iscale))
# add segments and plot
Fig1 +
geom_segment(aes(x = x_mean,
y = group,
xend = x_mean,
yend = group + density * scale * iscale),
data = density_lines)
The OP has asked to reorder the horizontal axis appropriately. This can be done by coercing EffortGroup
from type character
to factor
beforehand where the factor levels are explicitly specified in the expected order:
# turn EffortGroup into factor with levels in desired order
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = factor(EffortGroup, levels = lvls))
Alternatively, EffortGroup
can be derived directly from the given Effort
values by
# create Effort Group from scratch
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
brks <- c(-Inf, 5, 12.5, 22.5, 35, 50, 75, 105, 152, 210, Inf)
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = cut(Effort, brks, lvls, right = FALSE))
In any case, the computation of density_lines
has to be amended as EffortGroup
is a factor already:
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(EffortGroup)) %>% # remove call to factor() here
left_join( ...
With the full dataset supplied by the OP (link) the plot finally becomes
The locations of the mean self perceived hair change for each EffortGroup
are given by
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
# A tibble: 10 x 2 EffortGroup x_mean <fct> <dbl> 1 <5 -0.643 2 12.5 -0.393 3 22.5 -0.118 4 35 -0.0606 5 50 0.286 6 75 0 7 105 0.152 8 152 0.167 9 210 0.379 10 210+ 0.343
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With