I am trying to recreate this graph I saw in this book, but instead make it as a histogram.

This is my code so far, but it is not looking as good:
set.seed(123)
library(dplyr)
library(ggplot2)
# Create random sample data
example_data <- data.frame(
pooled_robustness = rnorm(172, mean = 0, sd = 1),
ISO_3 = replicate(172, paste0(sample(LETTERS, 3, replace = FALSE), collapse = ""))
)
example_data |>
select(ISO_3, pooled_robustness) |>
mutate(round_pooled_robustness = round(pooled_robustness,1)) |>
group_by(round_pooled_robustness) |>
mutate(stack_position = row_number() - 1) |>
ungroup() |>
ggplot(aes(x = round_pooled_robustness)) +
geom_text(aes(label = ISO_3, y = stack_position), size = 3)
The difficulty lies in creating the right binwidth and setting a text size that fits the binwidth. Now I do it by rounding, but it doesn't work either.

Here's a textual approach :-)
First, we determine screen width with getOption("width") and reduce it by 5 (buffer and the Count column). I further buffer each column by 2 to give us some space between columns, you can change that as needed.
library(dplyr)
blank <- strrep(" ", max(nchar(example_data$ISO_3)))
wid <- getOption("width") - 5L
wid
# [1] 133
nbins <- floor(wid / (2L + nchar(blank))) - 1L # allow for the "y-axis"
bins <- seq(min(example_data$pooled_robustness)-1e-6, max(example_data$pooled_robustness)+1e-6, length.out = nbins + 1)
dat <- example_data |>
mutate(bin = findInterval(pooled_robustness, bins)) |>
mutate(.by = bin, y = row_number())
dat <- expand.grid(bin = 1:nbins, y = 1:max(dat$y)) |>
full_join(dat, join_by(bin, y)) |>
mutate(ISO_3 = coalesce(ISO_3, blank)) |>
arrange(bin, desc(y)) |>
mutate(
Count = if_else(y %% 5 == 0, sprintf(paste0("%", ceiling(max(log10(y))), " s"), as.character(y)),
strrep(" ", ceiling(max(log10(y)))))
)
There are two ways to show this. We can pivot it and show it as a frame,
tidyr::pivot_wider(dat, id_cols = c(y, Count), names_from = bin, values_from = ISO_3) |>
select(-y) |>
print.data.frame()
Output:
# Count 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
# 1 XKD
# 2 IUW JYR
# 3 OHC CAG
# 4 JMZ KFU BAT KVA
# 5 15 GDX GFQ IAT GXR
# 6 LUN EWI LJG QKG TAE
# 7 TJI FGD OZX MLZ VDS
# 8 IHZ VBH JQP IWG PIB MTN XIK
# 9 WDM AFM MCG UFJ MXC BZH OSJ
# 10 10 OSI PVX VAZ XWE KOT ENF ZWK YKJ
# 11 RJH YMG SJZ LDU IKY PGD BPW XFD BOW
# 12 HUE YOS SWH TNY TAE SDI HOD TPY KEZ NLH
# 13 YDC QYK UYG OEN WFH SXW ZTN TVU RDJ PCA BFN
# 14 TLI PSH ZOS QXG NHF WFL WSB PQL TAC XHK UEX
# 15 5 JOY WYO VOY NSE QFJ IUK CNA ERF QCO SJW QGF SJW XSR
# 16 HAX WEX JXG SWC SYR RQT LOS UBR ZEG BVL TJF JPE XDV DKE
# 17 LBV TFO HGJ LHG WCI UFP KQL PBO YDO CWZ LHR MSV JEQ HCV UNH HUC CKJ
# 18 YXQ HAP HBA XAP OVC MFI ELM ASL LCR RQA JIK PCD DRA BSM LMJ DXC DWJ GID OWV ALR
# 19 FRB IKG HJM PXZ CRB BUQ ZQW FGI JFZ PQU UGT TIP MXQ MEK TFZ JFP QRH MUH BQZ BYF KDX VPH
or we can cat it to the console, removing the column and row names in lieu of clearer (?) axis labels.
lbls <- pretty(bins, n = 5)
lbls <- lbls[between(lbls, min(example_data$pooled_robustness), max(example_data$pooled_robustness))]
lbls_x <- round(scales::rescale(lbls, from = range(bins), to = c(1, wid)), 0)
lbls_x <- paste(sprintf(paste0("%", c(lbls_x[1], diff(lbls_x)), " s"), lbls), collapse = "")
reframe(dat, .by = c(y, Count), ISO_3 = paste(ISO_3, collapse = " ")) |>
select(-y) |>
bind_rows(tibble(Count = "", ISO_3 = lbls_x)) |>
with(cat(paste(Count, ISO_3, sep = " ", collapse = "\n"), "\n"))
which yields
XKD
IUW JYR
OHC CAG
JMZ KFU BAT KVA
15 GDX GFQ IAT GXR
LUN EWI LJG QKG TAE
TJI FGD OZX MLZ VDS
IHZ VBH JQP IWG PIB MTN XIK
WDM AFM MCG UFJ MXC BZH OSJ
10 OSI PVX VAZ XWE KOT ENF ZWK YKJ
RJH YMG SJZ LDU IKY PGD BPW XFD BOW
HUE YOS SWH TNY TAE SDI HOD TPY KEZ NLH
YDC QYK UYG OEN WFH SXW ZTN TVU RDJ PCA BFN
TLI PSH ZOS QXG NHF WFL WSB PQL TAC XHK UEX
5 JOY WYO VOY NSE QFJ IUK CNA ERF QCO SJW QGF SJW XSR
HAX WEX JXG SWC SYR RQT LOS UBR ZEG BVL TJF JPE XDV DKE
LBV TFO HGJ LHG WCI UFP KQL PBO YDO CWZ LHR MSV JEQ HCV UNH HUC CKJ
YXQ HAP HBA XAP OVC MFI ELM ASL LCR RQA JIK PCD DRA BSM LMJ DXC DWJ GID OWV ALR
FRB IKG HJM PXZ CRB BUQ ZQW FGI JFZ PQU UGT TIP MXQ MEK TFZ JFP QRH MUH BQZ BYF KDX VPH
-2 -1 0 1 2 3
In contrast, setting wid <- 76, changes the view a bit:
XKD
30 IUW
OHC KVA
JMZ JYR PIB
GDX KFU MXC
EWI GFQ VDS
25 LUN LJG IAT
FGD OZX ENF
TJI CAG QKG
IHZ BAT MLZ
VBH JQP PGD
20 AFM IWG HOD GXR
WDM UFJ ZTN XIK
PVX MCG ZWK TAE
YMG XWE WSB OSJ
SJZ VAZ BPW YKJ
15 SWH LDU QCO XFD BFN
OSI UYG KOT TPY KEZ BOW
RJH TNY TAE ZEG RDJ UEX
NSE OEN WFH TVU MTN NLH
WYO SWC QXG IKY YDO TAC PCA
10 WEX YOS IUK NHF SDI BZH XSR
HUE LHG RQT SXW JIK QGF XDV
TLI MFI ZOS WFL SJW TJF JEQ
HAX ZQW UFP ERF TIP LHR XHK
YDC LBV QYK QFJ CNA PBO PQL SJW DKE
5 JOY TFO PSH ASL LOS CWZ DRA LMJ UNH OWV
HBA JXG VOY WCI UBR PCD JPE MSV HCV HUC
YXQ HAP XAP SYR JFZ KQL MXQ TFZ BSM DWJ BYF CKJ
FRB PXZ HGJ OVC ELM LCR RQA MEK QRH DXC GID ALR
IKG HJM CRB BUQ FGI PQU UGT BVL JFP MUH BQZ KDX VPH
-2 -1 0 1 2 3
Yes, it's still not ggplot2.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With