I'm trying to make a similar plot to Fig. 2d-f in an article published on Nature this year. It's basically a half boxplot with points on the other half.
Can anyone give me some hints? Thank you very much!
These are my data and code which produced full boxes with points inside
require(magrittr)
require(tidyverse)
dat <- structure(list(p1 = c(0.0854261831077604, 0.408418657218253,
0.577793646477315, 0.578028229977424, 0.48933166218204, 0.53117814324334,
0.526653494462464, 0.00687616283435221, 0.444300425796509, 0.00287319455358522,
0.949821402532831, 0.96832469523368, 0.953281969982759, 0.360125244759434,
0.407921095422844, 0.885776732104954, 0.159882184516691, 0.911094990767761,
0.0444367172734037, 0.144888951725151, 0.508858686640707, 0.694913731085945,
0.117270366119258, 0.78227546070467, 0.980457304886186, 0.711464034564424,
0.753944466390685, 0.0474210438747038, 0.00344183466223558, 0.0290017465534545,
0.75092385236303, 0.868873921257987, 0.744396990487425, 0.0140007244233847,
0.0332266395043963, 0.482897084793009, 0.0535516646483004, 0.452926358923891,
0.0144057727301603, 0.171918034525543), p2 = c(0.101262675229211,
0.196913109208586, 0.37814311161382, 0.0677625689405156, 0.12517090579686,
0.409083554335168, 0.158886941347288, 0.847394861862651, 0.180560031076741,
0.967122694294885, 0.000901627067665116, 0.00039495110143705,
9.70707318411806e-05, 0.546200038486894, 0.435475454787648, 5.95555269800323e-06,
0.0178837768834925, 8.42690065415846e-06, 0.00777059697751842,
0.0020397073541544, 0.486699073016371, 0.283679673247571, 0.857183359146641,
0.200712003853458, 0.0164911141652784, 0.0542250670734297, 0.232340206984506,
0.948523714169708, 0.169881661474024, 0.968983592882272, 0.00250367590158291,
0.000792323746977033, 0.000185068166140097, 0.0193600071757997,
0.114775271592724, 4.65931778380389e-05, 0.000754760900847164,
2.07521623816406e-05, 0.00782764273312856, 0.00276993826117348
), p3 = c(0.0118642223785376, 0.0267362912322735, 6.60753171741111e-08,
0.053576051466652, 0.00375873110094442, 9.85095078844696e-08,
0.0525436528683484, 0.0193735809639814, 8.44717454802822e-07,
0.00608007737576027, 0.0205563904131287, 0.0104638062130591,
0.0249997053664864, 0.0587924727726031, 0.0443600964770995, 0.067125687916273,
0.758612877724648, 0.0618158334848203, 0.0251025592849138, 0.790905778949543,
0.00126904829915329, 0.00760772364901772, 0.00119821088328392,
0.0115117347754715, 0.000863676435448072, 0.000996891439583434,
0.0115279148630096, 0.00249122388568909, 5.21508620418823e-05,
0.00144050407848742, 0.120373444447631, 0.0534773096149069, 0.110284261289338,
0.571243879053544, 0.438152084363961, 0.364887514202121, 0.696293189762153,
0.414870716968937, 0.0557358576822093, 0.783929426716999), p4 = c(0.000107231042599948,
0.000379648762557529, 8.25102162601208e-06, 0.000343829024899591,
0.000140680688077216, 1.90076798696051e-06, 0.000214507212681323,
1.38587688080716e-05, 3.48104084092359e-06, 6.50782599216903e-07,
0.0114584884733498, 0.00652170746426181, 0.0143309604192116,
0.0275718029789144, 0.0352327288308957, 0.022950800779703, 0.0569939247302654,
0.0190248244391564, 0.0305921420687752, 0.00589871320676732,
0.000805515847378872, 1.97674357551495e-05, 8.30853708305541e-06,
1.32462751169762e-06, 4.8731965929686e-05, 0.0057411315642433,
4.82406700397824e-05, 0.000204633566379066, 0.0552263911781015,
0.000181994007177494, 0.0585729576787707, 0.0273685460128338,
0.0568746134466117, 0.299309335625926, 0.278980446497419, 0.105600715225359,
0.176549247514501, 0.101420411455169, 0.01003894550707, 0.0010803018725911
), p5 = c(0.786823338804824, 0.151956168584644, 0.0433468890359269,
0.19556481029922, 0.380808150243027, 0.0389798680141623, 0.260481184897901,
0.101147673996922, 0.0184624278061585, 0.0222416874775066, 0.000113517761014704,
0.00329593083795693, 0.000476682365422989, 0.00571997662739322,
0.0697473913851358, 0.0216803412883361, 0.00631472476841249,
0.00628215584877364, 0.540944692186543, 0.0135127011440213, 0.00235752761214414,
3.10282042735927e-06, 0.0239147204208516, 4.97334784773176e-05,
0.00213837866453402, 0.000212207014031345, 0.00180443364400107,
8.15954685083038e-05, 0.00445169398173509, 0.000391265642772285,
0.0676128522356959, 0.0494864355994384, 0.0882575475549674, 0.0960799089263987,
0.134853114895623, 0.0465661014986807, 0.0728456746626632, 0.0307607877988244,
0.476388236185883, 0.00831263646470973), p6 = c(0.0145163494370677,
0.215596124993685, 0.00070803577599434, 0.104724510291289, 0.000789869989050939,
0.0207564351298348, 0.00122021921131791, 0.0251938615732845,
0.356672789562296, 0.00168169551566413, 0.0171485737520108, 0.0109989091496048,
0.00681361113427885, 0.00159046437476052, 0.00726323309637717,
0.00246048235803604, 0.000312511376490686, 0.00177376855883463,
0.351153292208846, 0.0427541476203625, 1.01485842454486e-05,
0.0137760017612841, 0.000425034892882118, 0.0054497425604112,
7.93882623673471e-07, 0.227360668344289, 0.000334737447758259,
0.0012777890350116, 0.766946267841861, 8.96835836820999e-07,
1.32173732897771e-05, 1.46376785664669e-06, 1.51905551715105e-06,
6.14479494697213e-06, 1.24431458762028e-05, 1.99110299298599e-06,
5.46251153509928e-06, 9.72690797485877e-07, 0.435603545161549,
0.0319896621589845), type = c("small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big"), loc = c("abro", "abro", "abro",
"abro", "abro", "abro", "abro", "abro", "abro", "abro", "dome",
"dome", "dome", "dome", "dome", "dome", "dome", "dome", "dome",
"dome", "abro", "abro", "abro", "abro", "abro", "abro", "abro",
"abro", "abro", "abro", "dome", "dome", "dome", "dome", "dome",
"dome", "dome", "dome", "dome", "dome")), .Names = c("p1", "p2",
"p3", "p4", "p5", "p6", "type", "loc"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -40L))
glimpse(dat)
#> Observations: 40
#> Variables: 8
#> $ p1 <dbl> 0.085426183, 0.408418657, 0.577793646, 0.578028230, 0.489...
#> $ p2 <dbl> 1.012627e-01, 1.969131e-01, 3.781431e-01, 6.776257e-02, 1...
#> $ p3 <dbl> 1.186422e-02, 2.673629e-02, 6.607532e-08, 5.357605e-02, 3...
#> $ p4 <dbl> 1.072310e-04, 3.796488e-04, 8.251022e-06, 3.438290e-04, 1...
#> $ p5 <dbl> 7.868233e-01, 1.519562e-01, 4.334689e-02, 1.955648e-01, 3...
#> $ p6 <dbl> 1.451635e-02, 2.155961e-01, 7.080358e-04, 1.047245e-01, 7...
#> $ type <chr> "small", "small", "small", "small", "small", "small", "sm...
#> $ loc <chr> "abro", "abro", "abro", "abro", "abro", "abro", "abro", "...
Convert data to long format
dat_long <- dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key))
Plot boxplot with points
ggplot(dat_long, aes(x = type, y = value, color = key)) +
facet_grid(loc ~ key) +
geom_point(position = position_jitter(width = 0.3), alpha = 0.3, size = 2) +
geom_boxplot(outlier.color = NA) +
theme_light() +
theme(legend.position = "bottom") +
guides(col = guide_legend(nrow = 1))
Adding jittered points (a stripchart) to a box plot in ggplot is useful to see the underlying distribution of the data. You will need to use geom_jitter. A good practice is removing the outliers of the box plot with outlier.shape = NA, as the jitter will add them again.
Raincloud plots or Half Violin plots are basically used to visualize the distribution and the overall summary of the data at the same time. This plot is a combination of half violin plots with jittered points on top, boxplots and can be further enhanced by adding central measures of tendency, quartile ranges, etc. Here let’s use the “iris” dataset.
How To Make Grouped Boxplot with data points using geom_jitter ()? Naively, we might try to add jittered data points to the grouped boxplot using geom_jitter () function after geom_boxplot () function. geom_jitter () function is a handy shortcut for geom_point (position=”jitter”).
Boxplot hides the distribution behind each group. This post show how to tackle this issue in base R, adding individual observation using dots with jittering. Boxplot can be dangerous: the exact distribution of each group is hidden behind boxes as explained in data-to-viz.
I find this hybrid boxplot very, very lovely so I wanted to recreate it too.
I wrote a geom_boxjitter
that inherits from geom_boxplot
and only adds minor changes:
geom_rect
only on the left half. errorbar.draw
is set to TRUE
. Their length
can also be adjusted.You can check the code here. I think it is great how easy it has become to alter existing geom
s with slight changes. Using part of your data:
library(tidyverse)
library(cowplot)
library(ggparl)
P <- ggplot(
dat_long %>% filter(key %in% c("p1", "p2")),
aes(x = type, y = value, fill = key)) +
geom_boxjitter(outlier.color = NA, jitter.shape = 21, jitter.color = NA,
jitter.height = 0.05, jitter.width = 0.075, errorbar.draw = TRUE) +
theme(legend.position = "none") +
ylim(c(-0.05, 1.05)) +
scale_fill_manual(values = c("#ecb21e", "#812e91"))
P
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With