Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to print on a serie sof graphs pairwise comparisons bars and effect size value?

I'm struggling to create a series of high-quality ggboxplots, like the one I attach as follows:

enter image description here

  1. With labels for ANOVA with F(df) test value, p.value and effects size
  2. With multi-pairwose comparisons bars with bars and stars with significant difference.

Statistics for post-hocs comparisons have been obtained for the example above in the way you could find following this link page and I've run the following code

#Compute the post-hocs
postHocs <- df_join %>%
  tidyr::pivot_longer(., -c(ID, GR, SES, COND),'signals')%>%
  mutate(signals = fct_relevel(signals, 
                               c("P3FCz", "P3Cz", "P3Pz", 
                                 "LPPearlyFCz", "LPPearlyCz", "LPPearlyPz",
                                 "LPP1FCz", "LPP1Cz", "LPP1Pz", 
                                 "LPP2FCz", "LPP2Cz", "LPP2Pz"))) %>% 
  arrange(signals) %>%
  group_by(signals) %>% 
  pairwise_t_test(
    value ~ COND, paired = TRUE,
    p.adjust.method = "bonferroni"
  ) %>% 
  #dplyr::select(., -'signals')%>% 
  print()

while the Anova statistics have been obtained:

res.aov <- df_join %>% 
  tidyr::pivot_longer(., -c(ID, GR, SES, COND),'signals')%>%
  mutate(signals = fct_relevel(signals, 
                               c("P3FCz", "P3Cz", "P3Pz", 
                                 "LPPearlyFCz", "LPPearlyCz", "LPPearlyPz",
                                 "LPP1FCz", "LPP1Cz", "LPP1Pz", 
                                 "LPP2FCz", "LPP2Cz", "LPP2Pz"))) %>% 
  arrange(signals) %>%
  group_by(signals) %>% 
  anova_test(., dv = value, wid = ID, within = COND) %>% 
  print()

from which I've tried to obtain the interested statistics with this code:

unnested_aov <- lapply(res.aov$anova, `[[`, 1)

I scripted this for loop to adding the statistics to graphs

comparisons <- postHocs %>% add_xy_position(x = "COND") comparisons

for (i in 5:ncol(df_join)) {
  bxp <- ggboxplot(df_join,
                   x = 'COND', y = colnames(df_join[i]))
  print(
    bxp +   stat_pvalue_manual(comparisons[,i]) +
      labs(
        subtitle = get_test_label(unnested_aov[[i]], detailed = TRUE),
        caption = get_pwc_label(comparisons[,i])))
  Sys.sleep(1)
}

Since I'm getting back some error, I thing that the problem is that 'comparison' contains 36 rows and length does not fit well the number of the graph I should obtain (12).

I'll let you here the code below and I'll be open to your best suggestion in this concerns.

Thank yuo

> dput(head(df_join))
structure(list(ID = c("01", "01", "01", "04", "04", "04"), GR = c("RP", 
"RP", "RP", "RP", "RP", "RP"), SES = c("V", "V", "V", "V", "V", 
"V"), COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", 
"NEG-NOC", "NEU-NOC"), class = "factor"), P3FCz = c(-11.6312151716924, 
-11.1438413285935, -3.99591470944713, -0.314155675382471, 0.238885648959708, 
5.03749946898385), P3Cz = c(-5.16524399006139, -5.53112490175437, 
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441
), P3Pz = c(11.8802266972569, 12.1053426662461, 12.955441582096, 
15.0981004360619, 15.4046229884164, 16.671036999147), LPPearlyFCz = c(-11.7785042972793, 
-9.14927207125904, -7.58190508537766, -4.01515836011381, -6.60165385653499, 
-2.02861964460179), LPPearlyCz = c(-5.96429031525769, -5.10918437158799, 
-2.81732229625975, -1.43557366487622, -3.14872157912645, 0.160393685024631
), LPPearlyPz = c(8.23981597718437, 9.51261484648731, 9.42367409925817, 
5.06332653216481, 5.02619159395405, 9.07903916629231), LPP1FCz = c(-5.67295796971287, 
-4.3918290080777, -2.96652960658775, 0.159183652691071, -1.78361184935376, 
1.97377908783621), LPP1Cz = c(-0.774461731301161, -0.650009462761383, 
1.14010250644923, 1.51403741206392, 0.25571835554024, 3.76051565494304
), LPP1Pz = c(9.99385579756163, 11.1212652173052, 10.6989716871958, 
3.7899021820967, 4.59413830322224, 8.52123662617732), LPP2FCz = c(-0.198736254963744, 
-3.16101041766438, 0.895992279831378, 3.11042068112836, 2.27800090558473, 
3.83846437952292), LPP2Cz = c(2.96437294922766, -2.12913230708907, 
2.94619035115619, 3.44844607014521, 3.02403433835637, 4.7045767546583
), LPP2Pz = c(6.28027312932027, 5.24535230966772, 7.68162285335806, 
1.08242973465635, 2.99896314000211, 5.36085942954182)), row.names = c(NA, 
6L), class = "data.frame")
like image 760
creativity Avatar asked Oct 11 '21 15:10

creativity


2 Answers

This takes a bit more work.
Let's start with data preparation

#Loading libraries
library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)

#Upload data
df_join <- read_excel("df_join.xlsx")

df = df_join  %>%
  mutate_at(vars(ID:COND), factor) %>%
  pivot_longer(P3FCz:LPP2Pz, names_to = "signals") %>%
  group_by(signals) %>%
  nest()

#Let's see what we have here
df
df$data[[1]]

output

# A tibble: 12 x 2
# Groups:   signals [12]
   signals     data             
   <chr>       <list>           
 1 P3FCz       <tibble [75 x 5]>
 2 P3Cz        <tibble [75 x 5]>
 3 P3Pz        <tibble [75 x 5]>
 4 LPPearlyFCz <tibble [75 x 5]>
 5 LPPearlyCz  <tibble [75 x 5]>
 6 LPPearlyPz  <tibble [75 x 5]>
 7 LPP1FCz     <tibble [75 x 5]>
 8 LPP1Cz      <tibble [75 x 5]>
 9 LPP1Pz      <tibble [75 x 5]>
10 LPP2FCz     <tibble [75 x 5]>
11 LPP2Cz      <tibble [75 x 5]>
12 LPP2Pz      <tibble [75 x 5]>
> df$data[[1]]
# A tibble: 75 x 5
   ID    GR    SES   COND      value
   <fct> <fct> <fct> <fct>     <dbl>
 1 01    RP    V     NEG-CTR -11.6  
 2 01    RP    V     NEG-NOC -11.1  
 3 01    RP    V     NEU-NOC  -4.00 
 4 04    RP    V     NEG-CTR  -0.314
 5 04    RP    V     NEG-NOC   0.239
 6 04    RP    V     NEU-NOC   5.04 
 7 06    RP    V     NEG-CTR  -0.214
 8 06    RP    V     NEG-NOC  -2.96 
 9 06    RP    V     NEU-NOC  -1.97 
10 07    RP    V     NEG-CTR  -2.83 
# ... with 65 more rows

Now we have to prepare functions that will return us some statistics

#Preparation of functions for statistics
ShapiroTest = function(d, alpha=0.05){
  st = list(statistic = NA, p.value = NA, test = FALSE)
  if(length(d)>5000) d=sample(d, 5000)
  if(length(d)>=3 & length(d)<=5000){
    sw = shapiro.test(d)
    st$statistic = sw$statistic
    st$p.value = sw$p.value
    st$test = sw$p.value>alpha
  }
  return(st)
}

sum_stats = function(data, group, value, alpha=0.05)data %>%
  group_by(!!enquo(group)) %>%
  summarise(
    n = n(),
    q1 = quantile(!!enquo(value),1/4,8),
    min = min(!!enquo(value)),
    mean = mean(!!enquo(value)),
    median = median(!!enquo(value)),
    q3 = quantile(!!enquo(value),3/4,8),
    max = max(!!enquo(value)),
    sd = sd(!!enquo(value)),
    kurtosis = e1071::kurtosis(!!enquo(value)),
    skewness = e1071::skewness(!!enquo(value)),
    SW.stat = ShapiroTest(!!enquo(value), alpha)$statistic,
    SW.p = ShapiroTest(!!enquo(value), alpha)$p.value,
    SW.test = ShapiroTest(!!enquo(value), alpha)$test,
    nout = length(boxplot.stats(!!enquo(value))$out)
  )

#Using the sum stats function
df$data[[1]] %>% sum_stats(COND, value)

df %>% group_by(signals) %>%
  mutate(stats = map(data, ~sum_stats(.x, COND, value))) %>%
  unnest(stats)

output

# A tibble: 36 x 17
# Groups:   signals [12]
   signals     data        COND      n    q1     min   mean median     q3   max    sd kurtosis skewness SW.stat  SW.p SW.test  nout
   <chr>       <list>      <fct> <int> <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl>    <dbl>    <dbl>   <dbl> <dbl> <lgl>   <int>
 1 P3FCz       <tibble [7~ NEG-~    25 -5.73 -11.6   -1.59  -1.83   1.86   9.57  4.93   -0.612   0.128    0.984 0.951 TRUE        0
 2 P3FCz       <tibble [7~ NEG-~    25 -4.14 -11.1   -1.39  -0.522  0.659  8.16  4.44   -0.169  -0.211    0.978 0.842 TRUE        1
 3 P3FCz       <tibble [7~ NEU-~    25 -5.35  -6.97  -1.48  -1.97   1.89   6.24  4.00   -1.28    0.189    0.943 0.174 TRUE        0
 4 P3Cz        <tibble [7~ NEG-~    25 -2.76  -8.16   0.313  0.906  2.45  13.7   4.58    0.867   0.622    0.952 0.277 TRUE        1
 5 P3Cz        <tibble [7~ NEG-~    25 -2.22  -9.23   0.739  0.545  3.10  14.5   4.78    1.05    0.527    0.963 0.477 TRUE        1
 6 P3Cz        <tibble [7~ NEU-~    25 -4.14  -6.07  -0.107  0.622  2.16   7.76  3.88   -1.09    0.0112   0.957 0.359 TRUE        0
 7 P3Pz        <tibble [7~ NEG-~    25  5.66  -0.856  8.79   7.48  11.9   23.4   5.22    0.502   0.673    0.961 0.436 TRUE        1
 8 P3Pz        <tibble [7~ NEG-~    25  3.86  -0.888  8.45   7.88  11.2   20.7   5.23   -0.530   0.206    0.982 0.924 TRUE        0
 9 P3Pz        <tibble [7~ NEU-~    25  2.30  -0.932  6.43   6.82   8.46  16.7   4.60   -0.740   0.373    0.968 0.588 TRUE        0
10 LPPearlyFCz <tibble [7~ NEG-~    25 -4.02 -11.8   -0.666  0.149  1.90  13.3   5.02    0.916   0.202    0.947 0.215 TRUE        1
# ... with 26 more rows

Next we need to analyze the QQ charts. To do this, let's prepare the appropriate functions.

#function that creates a QQ plot
SignalQQPlot = function(data, signal, autor = "G. Anonim") data %>%
  group_by(COND) %>%
  mutate(label = paste("\nSW p-value:" ,
                       signif(shapiro.test(value)$p.value, 3))) %>%
  ggqqplot("value", facet.by = "COND") %>%
  ggpar(title = paste("QQ plots for", signal, "signal"),
        caption = autor)+
  geom_label(aes(x=0, y=+Inf, label=label))

#QQ plot for the P3FCz signal
df$data[[1]] %>% SignalQQPlot("P3FCz")

#A function that creates a QQ plot and adds it to a data frame
AddSignalQQPlot = function(df, signal, printPlot=TRUE) {
  plot = SignalQQPlot(df$data[[1]], signal)
  if(printPlot) print(plot)
  df %>% mutate(qqplot = list(plot))
}

#Added QQ charts
df %>% group_by(signals) %>%
  group_modify(~AddSignalQQPlot(.x, .y))

Correct the author's name. enter image description here

Now we will add statistics from ANOVA and tTest

#Add ANOVA test value
fAddANOVA = function(data) data %>% 
  anova_test(value~COND) %>% as_tibble()

#Method 1
df %>% group_by(signals) %>% 
  group_modify(~fAddANOVA(.x$data[[1]]))

#Method 2
df %>% group_by(signals) %>% 
  mutate(ANOVA = map(data, ~fAddANOVA(.x))) %>% 
  unnest(ANOVA)

output

# A tibble: 12 x 9
# Groups:   signals [12]
   signals     data              Effect   DFn   DFd     F     p `p<.05`      ges
   <chr>       <list>            <chr>  <dbl> <dbl> <dbl> <dbl> <chr>      <dbl>
 1 P3FCz       <tibble [75 x 5]> COND       2    72 0.012 0.988 ""      0.000338
 2 P3Cz        <tibble [75 x 5]> COND       2    72 0.228 0.797 ""      0.006   
 3 P3Pz        <tibble [75 x 5]> COND       2    72 1.61  0.207 ""      0.043   
 4 LPPearlyFCz <tibble [75 x 5]> COND       2    72 0.885 0.417 ""      0.024   
 5 LPPearlyCz  <tibble [75 x 5]> COND       2    72 2.65  0.077 ""      0.069   
 6 LPPearlyPz  <tibble [75 x 5]> COND       2    72 4.62  0.013 "*"     0.114   
 7 LPP1FCz     <tibble [75 x 5]> COND       2    72 1.08  0.344 ""      0.029   
 8 LPP1Cz      <tibble [75 x 5]> COND       2    72 2.45  0.094 ""      0.064   
 9 LPP1Pz      <tibble [75 x 5]> COND       2    72 3.97  0.023 "*"     0.099   
10 LPP2FCz     <tibble [75 x 5]> COND       2    72 0.103 0.903 ""      0.003   
11 LPP2Cz      <tibble [75 x 5]> COND       2    72 0.446 0.642 ""      0.012   
12 LPP2Pz      <tibble [75 x 5]> COND       2    72 1.17  0.316 ""      0.031   

And for tTest

#Add pairwise_t_test
fAddtTest = function(data) data %>% 
  pairwise_t_test(value~COND, paired = TRUE,
                  p.adjust.method = "bonferroni")

#Method 1
df %>% group_by(signals) %>% 
  group_modify(~fAddtTest(.x$data[[1]]))

#Method 2
df %>% group_by(signals) %>% 
  mutate(tTest = map(data, ~fAddtTest(.x))) %>% 
  unnest(tTest)

output

# A tibble: 36 x 12
# Groups:   signals [12]
   signals     data              .y.   group1  group2     n1    n2 statistic    df     p p.adj p.adj.signif
   <chr>       <list>            <chr> <chr>   <chr>   <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
 1 P3FCz       <tibble [75 x 5]> value NEG-CTR NEG-NOC    25    25   -0.322     24 0.75  1     ns          
 2 P3FCz       <tibble [75 x 5]> value NEG-CTR NEU-NOC    25    25   -0.178     24 0.86  1     ns          
 3 P3FCz       <tibble [75 x 5]> value NEG-NOC NEU-NOC    25    25    0.112     24 0.911 1     ns          
 4 P3Cz        <tibble [75 x 5]> value NEG-CTR NEG-NOC    25    25   -0.624     24 0.539 1     ns          
 5 P3Cz        <tibble [75 x 5]> value NEG-CTR NEU-NOC    25    25    0.592     24 0.559 1     ns          
 6 P3Cz        <tibble [75 x 5]> value NEG-NOC NEU-NOC    25    25    0.925     24 0.364 1     ns          
 7 P3Pz        <tibble [75 x 5]> value NEG-CTR NEG-NOC    25    25    0.440     24 0.664 1     ns          
 8 P3Pz        <tibble [75 x 5]> value NEG-CTR NEU-NOC    25    25    3.11      24 0.005 0.014 *           
 9 P3Pz        <tibble [75 x 5]> value NEG-NOC NEU-NOC    25    25    2.43      24 0.023 0.069 ns          
10 LPPearlyFCz <tibble [75 x 5]> value NEG-CTR NEG-NOC    25    25   -0.0919    24 0.928 1     ns          
# ... with 26 more rows

Now is the time to create a boxplot

#Function to special boxplot
SpecBoxplot = function(data, autor = "G. Anonim"){
box = data %>% ggboxplot(x = "COND", y = "value", add = "point")
pwc = data %>% 
  pairwise_t_test(value~COND, paired = TRUE,
                  p.adjust.method = "bonferroni") %>% 
  add_xy_position(x = "COND")
res.aov = data %>% anova_test(value~COND)
data %>% 
  ggboxplot(x = "COND", y = "value", add = "point") + 
  stat_pvalue_manual(pwc) +
  labs(
    title = get_test_label(res.aov, detailed = TRUE),
    subtitle = get_pwc_label(pwc),
    caption = autor
  )
}  

#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot()
 
#A function that creates a special boxplot and adds it to a data frame
AddSignalBoxplot = function(df, signal, printPlot=TRUE) {
  plot = SpecBoxplot(df$data[[1]], signal)
  if(printPlot) print(plot)
  df %>% mutate(boxplot = list(plot))
}

#Added special boxplot
df %>% group_by(signals) %>%
  group_modify(~AddSignalBoxplot(.x, .y))

enter image description here

Due to the inconsistency with the normal distribution in the modified function, I used the Wilcoxon test for comparison. I also improved the boxplots a bit.

#Function to special boxplot2
SpecBoxplot2 = function(data, signal, autor = "G. Anonim"){
  pwc = data %>% pairwise_wilcox_test(value~COND) %>%
    add_xy_position(x = "COND") %>% 
    mutate(COND="NEG-CTR",
           lab = paste(p, " - ", p.adj.signif))
  
  data %>% ggplot(aes(COND, value, fill=COND))+
    geom_violin(alpha=0.2)+
    geom_boxplot(outlier.shape = 23,
                 outlier.size = 3,
                 alpha=0.6)+
    geom_jitter(shape=21, width =0.1)+
    stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
    labs(title = signal,
         subtitle = get_pwc_label(pwc),
         caption = autor)
}

#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot2("P3FCz")

#A function that creates a special boxplot2 and adds it to a data frame
AddSignalBoxplot2 = function(df, signal, printPlot=TRUE) {
  plot = SpecBoxplot2(df$data[[1]], signal)
  if(printPlot) print(plot)
  df %>% mutate(boxplot = list(plot))
}

#Added special boxplot2
df %>% group_by(signals) %>%
  group_modify(~AddSignalBoxplot2(.x, .y))

enter image description here

Special update fom my frend

Hey @mały_statystyczny! You will be great!

Here is a special update for you. Below is a function that prepares graphs with both parametric and non-parametric statistics.

#Function to special boxplot3
SpecBoxplot3 = function(data, signal, parametric = FALSE, autor = "G. Anonim"){
  if(parametric) {
    pwc = data %>%
      pairwise_t_test(value~COND, paired = TRUE,
                      p.adjust.method = "bonferroni") %>%
      add_xy_position(x = "COND") %>%
      mutate(COND="NEG-CTR",
             lab = paste(p, " - ", p.adj.signif))
    res.test = data %>% anova_test(value~COND)
  } else {
    pwc = data %>% pairwise_wilcox_test(value~COND) %>%
      add_xy_position(x = "COND") %>%
      mutate(COND="NEG-CTR",
             lab = paste(p, " - ", p.adj.signif))
    res.test = data %>% kruskal_test(value~COND)
  }
  
  data %>% ggplot(aes(COND, value, fill=COND))+
    geom_violin(alpha=0.2)+
    geom_boxplot(outlier.shape = 23,
                 outlier.size = 3,
                 alpha=0.6)+
    geom_jitter(shape=21, width =0.1)+
    stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
    ylab(signal)+
    labs(title = get_test_label(res.test, detailed = TRUE),
         subtitle = get_pwc_label(pwc),
         caption = autor)
}


#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot3("P3FCz", TRUE)
df$data[[1]] %>% SpecBoxplot3("P3FCz", FALSE)

#A function that creates a special boxplot3 and adds it to a data frame
AddSignalBoxplot3 = function(df, signal, printPlot=TRUE) {
  plot1 = SpecBoxplot3(df$data[[1]], signal, TRUE)
  plot2 = SpecBoxplot3(df$data[[1]], signal, FALSE)
  if(printPlot) print(plot1)
  if(printPlot) print(plot2)
  df %>% mutate(boxplot1 = list(plot1),
                boxplot2 = list(plot2),
  )
}

#Added special boxplot3
df %>% group_by(signals) %>%
  group_modify(~AddSignalBoxplot3(.x, .y))

Here are the results of its operation enter image description here

enter image description here

like image 156
Marek Fiołka Avatar answered Nov 15 '22 04:11

Marek Fiołka


Another possible solution:

library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)

data <- read_excel("df_join.xlsx")

n <- ncol(data)-4

lAnovas <- map(1:n, ~ anova_test(
  data = data.frame(data[,c(1,4)], data[,.x+4]) %>% 
    setNames(c("ID", "COND", names(data)[.x+4])), 
  dv = !!names(data)[.x+4], wid = ID, within = COND) %>% 
  get_anova_table())

lPwc <- map(1:n, ~ data %>%
      pairwise_t_test(
        formula(
          paste0(names(data)[.x+4]," ~ COND")),
        paired = TRUE,
        p.adjust.method = "bonferroni"
      )
)

lPwc <- map(lPwc, ~ .x %>% add_xy_position(x = "COND"))

drawbox <-  function(x)
{
  ggboxplot(data = data, 
            x = "COND", y = names(data)[x+4], add = "point") + 
    stat_pvalue_manual(lPwc[[x]]) +
    labs(
      subtitle = get_test_label(lAnovas[[x]], detailed = TRUE),
      caption = get_pwc_label(lPwc[[x]]) 
    )
}

walk(1:n, ~ drawbox(.x) %>% print())
like image 28
PaulS Avatar answered Nov 15 '22 05:11

PaulS