I'm struggling to create a series of high-quality ggboxplots, like the one I attach as follows:
Statistics for post-hocs comparisons have been obtained for the example above in the way you could find following this link page and I've run the following code
#Compute the post-hocs
postHocs <- df_join %>%
tidyr::pivot_longer(., -c(ID, GR, SES, COND),'signals')%>%
mutate(signals = fct_relevel(signals,
c("P3FCz", "P3Cz", "P3Pz",
"LPPearlyFCz", "LPPearlyCz", "LPPearlyPz",
"LPP1FCz", "LPP1Cz", "LPP1Pz",
"LPP2FCz", "LPP2Cz", "LPP2Pz"))) %>%
arrange(signals) %>%
group_by(signals) %>%
pairwise_t_test(
value ~ COND, paired = TRUE,
p.adjust.method = "bonferroni"
) %>%
#dplyr::select(., -'signals')%>%
print()
while the Anova statistics have been obtained:
res.aov <- df_join %>%
tidyr::pivot_longer(., -c(ID, GR, SES, COND),'signals')%>%
mutate(signals = fct_relevel(signals,
c("P3FCz", "P3Cz", "P3Pz",
"LPPearlyFCz", "LPPearlyCz", "LPPearlyPz",
"LPP1FCz", "LPP1Cz", "LPP1Pz",
"LPP2FCz", "LPP2Cz", "LPP2Pz"))) %>%
arrange(signals) %>%
group_by(signals) %>%
anova_test(., dv = value, wid = ID, within = COND) %>%
print()
from which I've tried to obtain the interested statistics with this code:
unnested_aov <- lapply(res.aov$anova, `[[`, 1)
I scripted this for loop to adding the statistics to graphs
comparisons <- postHocs %>% add_xy_position(x = "COND") comparisons
for (i in 5:ncol(df_join)) {
bxp <- ggboxplot(df_join,
x = 'COND', y = colnames(df_join[i]))
print(
bxp + stat_pvalue_manual(comparisons[,i]) +
labs(
subtitle = get_test_label(unnested_aov[[i]], detailed = TRUE),
caption = get_pwc_label(comparisons[,i])))
Sys.sleep(1)
}
Since I'm getting back some error, I thing that the problem is that 'comparison' contains 36 rows and length does not fit well the number of the graph I should obtain (12).
I'll let you here the code below and I'll be open to your best suggestion in this concerns.
Thank yuo
> dput(head(df_join))
structure(list(ID = c("01", "01", "01", "04", "04", "04"), GR = c("RP",
"RP", "RP", "RP", "RP", "RP"), SES = c("V", "V", "V", "V", "V",
"V"), COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), P3FCz = c(-11.6312151716924,
-11.1438413285935, -3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385), P3Cz = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441
), P3Pz = c(11.8802266972569, 12.1053426662461, 12.955441582096,
15.0981004360619, 15.4046229884164, 16.671036999147), LPPearlyFCz = c(-11.7785042972793,
-9.14927207125904, -7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179), LPPearlyCz = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645, 0.160393685024631
), LPPearlyPz = c(8.23981597718437, 9.51261484648731, 9.42367409925817,
5.06332653216481, 5.02619159395405, 9.07903916629231), LPP1FCz = c(-5.67295796971287,
-4.3918290080777, -2.96652960658775, 0.159183652691071, -1.78361184935376,
1.97377908783621), LPP1Cz = c(-0.774461731301161, -0.650009462761383,
1.14010250644923, 1.51403741206392, 0.25571835554024, 3.76051565494304
), LPP1Pz = c(9.99385579756163, 11.1212652173052, 10.6989716871958,
3.7899021820967, 4.59413830322224, 8.52123662617732), LPP2FCz = c(-0.198736254963744,
-3.16101041766438, 0.895992279831378, 3.11042068112836, 2.27800090558473,
3.83846437952292), LPP2Cz = c(2.96437294922766, -2.12913230708907,
2.94619035115619, 3.44844607014521, 3.02403433835637, 4.7045767546583
), LPP2Pz = c(6.28027312932027, 5.24535230966772, 7.68162285335806,
1.08242973465635, 2.99896314000211, 5.36085942954182)), row.names = c(NA,
6L), class = "data.frame")
This takes a bit more work.
Let's start with data preparation
#Loading libraries
library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)
#Upload data
df_join <- read_excel("df_join.xlsx")
df = df_join %>%
mutate_at(vars(ID:COND), factor) %>%
pivot_longer(P3FCz:LPP2Pz, names_to = "signals") %>%
group_by(signals) %>%
nest()
#Let's see what we have here
df
df$data[[1]]
output
# A tibble: 12 x 2
# Groups: signals [12]
signals data
<chr> <list>
1 P3FCz <tibble [75 x 5]>
2 P3Cz <tibble [75 x 5]>
3 P3Pz <tibble [75 x 5]>
4 LPPearlyFCz <tibble [75 x 5]>
5 LPPearlyCz <tibble [75 x 5]>
6 LPPearlyPz <tibble [75 x 5]>
7 LPP1FCz <tibble [75 x 5]>
8 LPP1Cz <tibble [75 x 5]>
9 LPP1Pz <tibble [75 x 5]>
10 LPP2FCz <tibble [75 x 5]>
11 LPP2Cz <tibble [75 x 5]>
12 LPP2Pz <tibble [75 x 5]>
> df$data[[1]]
# A tibble: 75 x 5
ID GR SES COND value
<fct> <fct> <fct> <fct> <dbl>
1 01 RP V NEG-CTR -11.6
2 01 RP V NEG-NOC -11.1
3 01 RP V NEU-NOC -4.00
4 04 RP V NEG-CTR -0.314
5 04 RP V NEG-NOC 0.239
6 04 RP V NEU-NOC 5.04
7 06 RP V NEG-CTR -0.214
8 06 RP V NEG-NOC -2.96
9 06 RP V NEU-NOC -1.97
10 07 RP V NEG-CTR -2.83
# ... with 65 more rows
Now we have to prepare functions that will return us some statistics
#Preparation of functions for statistics
ShapiroTest = function(d, alpha=0.05){
st = list(statistic = NA, p.value = NA, test = FALSE)
if(length(d)>5000) d=sample(d, 5000)
if(length(d)>=3 & length(d)<=5000){
sw = shapiro.test(d)
st$statistic = sw$statistic
st$p.value = sw$p.value
st$test = sw$p.value>alpha
}
return(st)
}
sum_stats = function(data, group, value, alpha=0.05)data %>%
group_by(!!enquo(group)) %>%
summarise(
n = n(),
q1 = quantile(!!enquo(value),1/4,8),
min = min(!!enquo(value)),
mean = mean(!!enquo(value)),
median = median(!!enquo(value)),
q3 = quantile(!!enquo(value),3/4,8),
max = max(!!enquo(value)),
sd = sd(!!enquo(value)),
kurtosis = e1071::kurtosis(!!enquo(value)),
skewness = e1071::skewness(!!enquo(value)),
SW.stat = ShapiroTest(!!enquo(value), alpha)$statistic,
SW.p = ShapiroTest(!!enquo(value), alpha)$p.value,
SW.test = ShapiroTest(!!enquo(value), alpha)$test,
nout = length(boxplot.stats(!!enquo(value))$out)
)
#Using the sum stats function
df$data[[1]] %>% sum_stats(COND, value)
df %>% group_by(signals) %>%
mutate(stats = map(data, ~sum_stats(.x, COND, value))) %>%
unnest(stats)
output
# A tibble: 36 x 17
# Groups: signals [12]
signals data COND n q1 min mean median q3 max sd kurtosis skewness SW.stat SW.p SW.test nout
<chr> <list> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <int>
1 P3FCz <tibble [7~ NEG-~ 25 -5.73 -11.6 -1.59 -1.83 1.86 9.57 4.93 -0.612 0.128 0.984 0.951 TRUE 0
2 P3FCz <tibble [7~ NEG-~ 25 -4.14 -11.1 -1.39 -0.522 0.659 8.16 4.44 -0.169 -0.211 0.978 0.842 TRUE 1
3 P3FCz <tibble [7~ NEU-~ 25 -5.35 -6.97 -1.48 -1.97 1.89 6.24 4.00 -1.28 0.189 0.943 0.174 TRUE 0
4 P3Cz <tibble [7~ NEG-~ 25 -2.76 -8.16 0.313 0.906 2.45 13.7 4.58 0.867 0.622 0.952 0.277 TRUE 1
5 P3Cz <tibble [7~ NEG-~ 25 -2.22 -9.23 0.739 0.545 3.10 14.5 4.78 1.05 0.527 0.963 0.477 TRUE 1
6 P3Cz <tibble [7~ NEU-~ 25 -4.14 -6.07 -0.107 0.622 2.16 7.76 3.88 -1.09 0.0112 0.957 0.359 TRUE 0
7 P3Pz <tibble [7~ NEG-~ 25 5.66 -0.856 8.79 7.48 11.9 23.4 5.22 0.502 0.673 0.961 0.436 TRUE 1
8 P3Pz <tibble [7~ NEG-~ 25 3.86 -0.888 8.45 7.88 11.2 20.7 5.23 -0.530 0.206 0.982 0.924 TRUE 0
9 P3Pz <tibble [7~ NEU-~ 25 2.30 -0.932 6.43 6.82 8.46 16.7 4.60 -0.740 0.373 0.968 0.588 TRUE 0
10 LPPearlyFCz <tibble [7~ NEG-~ 25 -4.02 -11.8 -0.666 0.149 1.90 13.3 5.02 0.916 0.202 0.947 0.215 TRUE 1
# ... with 26 more rows
Next we need to analyze the QQ charts. To do this, let's prepare the appropriate functions.
#function that creates a QQ plot
SignalQQPlot = function(data, signal, autor = "G. Anonim") data %>%
group_by(COND) %>%
mutate(label = paste("\nSW p-value:" ,
signif(shapiro.test(value)$p.value, 3))) %>%
ggqqplot("value", facet.by = "COND") %>%
ggpar(title = paste("QQ plots for", signal, "signal"),
caption = autor)+
geom_label(aes(x=0, y=+Inf, label=label))
#QQ plot for the P3FCz signal
df$data[[1]] %>% SignalQQPlot("P3FCz")
#A function that creates a QQ plot and adds it to a data frame
AddSignalQQPlot = function(df, signal, printPlot=TRUE) {
plot = SignalQQPlot(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(qqplot = list(plot))
}
#Added QQ charts
df %>% group_by(signals) %>%
group_modify(~AddSignalQQPlot(.x, .y))
Correct the author's name.
Now we will add statistics from ANOVA and tTest
#Add ANOVA test value
fAddANOVA = function(data) data %>%
anova_test(value~COND) %>% as_tibble()
#Method 1
df %>% group_by(signals) %>%
group_modify(~fAddANOVA(.x$data[[1]]))
#Method 2
df %>% group_by(signals) %>%
mutate(ANOVA = map(data, ~fAddANOVA(.x))) %>%
unnest(ANOVA)
output
# A tibble: 12 x 9
# Groups: signals [12]
signals data Effect DFn DFd F p `p<.05` ges
<chr> <list> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 P3FCz <tibble [75 x 5]> COND 2 72 0.012 0.988 "" 0.000338
2 P3Cz <tibble [75 x 5]> COND 2 72 0.228 0.797 "" 0.006
3 P3Pz <tibble [75 x 5]> COND 2 72 1.61 0.207 "" 0.043
4 LPPearlyFCz <tibble [75 x 5]> COND 2 72 0.885 0.417 "" 0.024
5 LPPearlyCz <tibble [75 x 5]> COND 2 72 2.65 0.077 "" 0.069
6 LPPearlyPz <tibble [75 x 5]> COND 2 72 4.62 0.013 "*" 0.114
7 LPP1FCz <tibble [75 x 5]> COND 2 72 1.08 0.344 "" 0.029
8 LPP1Cz <tibble [75 x 5]> COND 2 72 2.45 0.094 "" 0.064
9 LPP1Pz <tibble [75 x 5]> COND 2 72 3.97 0.023 "*" 0.099
10 LPP2FCz <tibble [75 x 5]> COND 2 72 0.103 0.903 "" 0.003
11 LPP2Cz <tibble [75 x 5]> COND 2 72 0.446 0.642 "" 0.012
12 LPP2Pz <tibble [75 x 5]> COND 2 72 1.17 0.316 "" 0.031
And for tTest
#Add pairwise_t_test
fAddtTest = function(data) data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni")
#Method 1
df %>% group_by(signals) %>%
group_modify(~fAddtTest(.x$data[[1]]))
#Method 2
df %>% group_by(signals) %>%
mutate(tTest = map(data, ~fAddtTest(.x))) %>%
unnest(tTest)
output
# A tibble: 36 x 12
# Groups: signals [12]
signals data .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
<chr> <list> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
1 P3FCz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.322 24 0.75 1 ns
2 P3FCz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 -0.178 24 0.86 1 ns
3 P3FCz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 0.112 24 0.911 1 ns
4 P3Cz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.624 24 0.539 1 ns
5 P3Cz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 0.592 24 0.559 1 ns
6 P3Cz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 0.925 24 0.364 1 ns
7 P3Pz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 0.440 24 0.664 1 ns
8 P3Pz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 3.11 24 0.005 0.014 *
9 P3Pz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 2.43 24 0.023 0.069 ns
10 LPPearlyFCz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.0919 24 0.928 1 ns
# ... with 26 more rows
Now is the time to create a boxplot
#Function to special boxplot
SpecBoxplot = function(data, autor = "G. Anonim"){
box = data %>% ggboxplot(x = "COND", y = "value", add = "point")
pwc = data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni") %>%
add_xy_position(x = "COND")
res.aov = data %>% anova_test(value~COND)
data %>%
ggboxplot(x = "COND", y = "value", add = "point") +
stat_pvalue_manual(pwc) +
labs(
title = get_test_label(res.aov, detailed = TRUE),
subtitle = get_pwc_label(pwc),
caption = autor
)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot()
#A function that creates a special boxplot and adds it to a data frame
AddSignalBoxplot = function(df, signal, printPlot=TRUE) {
plot = SpecBoxplot(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(boxplot = list(plot))
}
#Added special boxplot
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot(.x, .y))
Due to the inconsistency with the normal distribution in the modified function, I used the Wilcoxon test for comparison. I also improved the boxplots a bit.
#Function to special boxplot2
SpecBoxplot2 = function(data, signal, autor = "G. Anonim"){
pwc = data %>% pairwise_wilcox_test(value~COND) %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
data %>% ggplot(aes(COND, value, fill=COND))+
geom_violin(alpha=0.2)+
geom_boxplot(outlier.shape = 23,
outlier.size = 3,
alpha=0.6)+
geom_jitter(shape=21, width =0.1)+
stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
labs(title = signal,
subtitle = get_pwc_label(pwc),
caption = autor)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot2("P3FCz")
#A function that creates a special boxplot2 and adds it to a data frame
AddSignalBoxplot2 = function(df, signal, printPlot=TRUE) {
plot = SpecBoxplot2(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(boxplot = list(plot))
}
#Added special boxplot2
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot2(.x, .y))
Special update fom my frend
Hey @mały_statystyczny! You will be great!
Here is a special update for you. Below is a function that prepares graphs with both parametric and non-parametric statistics.
#Function to special boxplot3
SpecBoxplot3 = function(data, signal, parametric = FALSE, autor = "G. Anonim"){
if(parametric) {
pwc = data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni") %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
res.test = data %>% anova_test(value~COND)
} else {
pwc = data %>% pairwise_wilcox_test(value~COND) %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
res.test = data %>% kruskal_test(value~COND)
}
data %>% ggplot(aes(COND, value, fill=COND))+
geom_violin(alpha=0.2)+
geom_boxplot(outlier.shape = 23,
outlier.size = 3,
alpha=0.6)+
geom_jitter(shape=21, width =0.1)+
stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
ylab(signal)+
labs(title = get_test_label(res.test, detailed = TRUE),
subtitle = get_pwc_label(pwc),
caption = autor)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot3("P3FCz", TRUE)
df$data[[1]] %>% SpecBoxplot3("P3FCz", FALSE)
#A function that creates a special boxplot3 and adds it to a data frame
AddSignalBoxplot3 = function(df, signal, printPlot=TRUE) {
plot1 = SpecBoxplot3(df$data[[1]], signal, TRUE)
plot2 = SpecBoxplot3(df$data[[1]], signal, FALSE)
if(printPlot) print(plot1)
if(printPlot) print(plot2)
df %>% mutate(boxplot1 = list(plot1),
boxplot2 = list(plot2),
)
}
#Added special boxplot3
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot3(.x, .y))
Here are the results of its operation
Another possible solution:
library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)
data <- read_excel("df_join.xlsx")
n <- ncol(data)-4
lAnovas <- map(1:n, ~ anova_test(
data = data.frame(data[,c(1,4)], data[,.x+4]) %>%
setNames(c("ID", "COND", names(data)[.x+4])),
dv = !!names(data)[.x+4], wid = ID, within = COND) %>%
get_anova_table())
lPwc <- map(1:n, ~ data %>%
pairwise_t_test(
formula(
paste0(names(data)[.x+4]," ~ COND")),
paired = TRUE,
p.adjust.method = "bonferroni"
)
)
lPwc <- map(lPwc, ~ .x %>% add_xy_position(x = "COND"))
drawbox <- function(x)
{
ggboxplot(data = data,
x = "COND", y = names(data)[x+4], add = "point") +
stat_pvalue_manual(lPwc[[x]]) +
labs(
subtitle = get_test_label(lAnovas[[x]], detailed = TRUE),
caption = get_pwc_label(lPwc[[x]])
)
}
walk(1:n, ~ drawbox(.x) %>% print())
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With