Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I create a 'listogram' in ggplot?

I am trying to recreate this graph I saw in this book, but instead make it as a histogram.

Example listogram

This is my code so far, but it is not looking as good:

set.seed(123)
library(dplyr)
library(ggplot2)

# Create random sample data
example_data <- data.frame(
  pooled_robustness = rnorm(172, mean = 0, sd = 1),
  ISO_3 = replicate(172, paste0(sample(LETTERS, 3, replace = FALSE), collapse = ""))
)

example_data |>
  select(ISO_3, pooled_robustness) |>
  mutate(round_pooled_robustness = round(pooled_robustness,1)) |>
  group_by(round_pooled_robustness) |>
  mutate(stack_position = row_number() - 1) |>
  ungroup() |>
  ggplot(aes(x = round_pooled_robustness)) +
  geom_text(aes(label = ISO_3, y = stack_position), size = 3)

The difficulty lies in creating the right binwidth and setting a text size that fits the binwidth. Now I do it by rounding, but it doesn't work either.

Example ggplot listogram

like image 449
Victor Hartman Avatar asked Mar 26 '26 18:03

Victor Hartman


1 Answers

Here's a textual approach :-)

First, we determine screen width with getOption("width") and reduce it by 5 (buffer and the Count column). I further buffer each column by 2 to give us some space between columns, you can change that as needed.

library(dplyr)
blank <- strrep(" ", max(nchar(example_data$ISO_3)))
wid <- getOption("width") - 5L
wid
# [1] 133
nbins <- floor(wid / (2L + nchar(blank))) - 1L # allow for the "y-axis"
bins <- seq(min(example_data$pooled_robustness)-1e-6, max(example_data$pooled_robustness)+1e-6, length.out = nbins + 1)
dat <- example_data |>
  mutate(bin = findInterval(pooled_robustness, bins)) |>
  mutate(.by = bin, y = row_number())
dat <- expand.grid(bin = 1:nbins, y = 1:max(dat$y)) |>
  full_join(dat, join_by(bin, y)) |>
  mutate(ISO_3 = coalesce(ISO_3, blank)) |>
  arrange(bin, desc(y)) |>
  mutate(
    Count = if_else(y %% 5 == 0, sprintf(paste0("%", ceiling(max(log10(y))), " s"), as.character(y)),
                    strrep(" ", ceiling(max(log10(y)))))
  )

There are two ways to show this. We can pivot it and show it as a frame,

tidyr::pivot_wider(dat, id_cols = c(y, Count), names_from = bin, values_from = ISO_3) |>
  select(-y) |>
  print.data.frame()

Output:

#    Count   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25
# 1                                        XKD
# 2                                        IUW     JYR
# 3                                        OHC     CAG
# 4                                        JMZ KFU BAT     KVA
# 5     15                                 GDX GFQ IAT     GXR
# 6                                    LUN EWI LJG QKG     TAE
# 7                                    TJI FGD OZX MLZ     VDS
# 8                                    IHZ VBH JQP IWG PIB MTN XIK
# 9                                    WDM AFM MCG UFJ MXC BZH OSJ
# 10    10                     OSI     PVX VAZ XWE KOT ENF ZWK YKJ
# 11                           RJH     YMG SJZ LDU IKY PGD BPW XFD BOW
# 12                   HUE     YOS     SWH TNY TAE SDI HOD TPY KEZ NLH
# 13                   YDC     QYK     UYG OEN WFH SXW ZTN TVU RDJ PCA BFN
# 14                   TLI     PSH     ZOS QXG NHF WFL WSB PQL TAC XHK UEX
# 15     5             JOY WYO VOY NSE QFJ IUK CNA ERF QCO SJW QGF SJW XSR
# 16                   HAX WEX JXG SWC SYR RQT LOS UBR ZEG BVL TJF JPE XDV DKE
# 17                   LBV TFO HGJ LHG WCI UFP KQL PBO YDO CWZ LHR MSV JEQ HCV UNH HUC CKJ
# 18           YXQ HAP HBA XAP OVC MFI ELM ASL LCR RQA JIK PCD DRA BSM LMJ DXC DWJ GID OWV ALR
# 19       FRB IKG HJM PXZ CRB BUQ ZQW FGI JFZ PQU UGT TIP MXQ MEK TFZ JFP QRH MUH BQZ BYF KDX             VPH

or we can cat it to the console, removing the column and row names in lieu of clearer (?) axis labels.

lbls <- pretty(bins, n = 5)
lbls <- lbls[between(lbls, min(example_data$pooled_robustness), max(example_data$pooled_robustness))]
lbls_x <- round(scales::rescale(lbls, from = range(bins), to = c(1, wid)), 0)
lbls_x <- paste(sprintf(paste0("%", c(lbls_x[1], diff(lbls_x)), " s"), lbls), collapse = "")
reframe(dat, .by = c(y, Count), ISO_3 = paste(ISO_3, collapse = "  ")) |>
  select(-y) |>
  bind_rows(tibble(Count = "", ISO_3 = lbls_x)) |>
  with(cat(paste(Count, ISO_3, sep = "  ", collapse = "\n"), "\n"))

which yields

                                            XKD
                                            IUW       JYR
                                            OHC       CAG
                                            JMZ  KFU  BAT       KVA
15                                          GDX  GFQ  IAT       GXR
                                       LUN  EWI  LJG  QKG       TAE
                                       TJI  FGD  OZX  MLZ       VDS
                                       IHZ  VBH  JQP  IWG  PIB  MTN  XIK
                                       WDM  AFM  MCG  UFJ  MXC  BZH  OSJ
10                           OSI       PVX  VAZ  XWE  KOT  ENF  ZWK  YKJ
                             RJH       YMG  SJZ  LDU  IKY  PGD  BPW  XFD  BOW
                   HUE       YOS       SWH  TNY  TAE  SDI  HOD  TPY  KEZ  NLH
                   YDC       QYK       UYG  OEN  WFH  SXW  ZTN  TVU  RDJ  PCA  BFN
                   TLI       PSH       ZOS  QXG  NHF  WFL  WSB  PQL  TAC  XHK  UEX
 5                 JOY  WYO  VOY  NSE  QFJ  IUK  CNA  ERF  QCO  SJW  QGF  SJW  XSR
                   HAX  WEX  JXG  SWC  SYR  RQT  LOS  UBR  ZEG  BVL  TJF  JPE  XDV  DKE
                   LBV  TFO  HGJ  LHG  WCI  UFP  KQL  PBO  YDO  CWZ  LHR  MSV  JEQ  HCV  UNH  HUC  CKJ
         YXQ  HAP  HBA  XAP  OVC  MFI  ELM  ASL  LCR  RQA  JIK  PCD  DRA  BSM  LMJ  DXC  DWJ  GID  OWV  ALR
    FRB  IKG  HJM  PXZ  CRB  BUQ  ZQW  FGI  JFZ  PQU  UGT  TIP  MXQ  MEK  TFZ  JFP  QRH  MUH  BQZ  BYF  KDX                 VPH
        -2                      -1                       0                       1                      2                       3

In contrast, setting wid <- 76, changes the view a bit:

                        XKD
30                      IUW
                        OHC       KVA
                        JMZ  JYR  PIB
                        GDX  KFU  MXC
                        EWI  GFQ  VDS
25                      LUN  LJG  IAT
                        FGD  OZX  ENF
                        TJI  CAG  QKG
                        IHZ  BAT  MLZ
                        VBH  JQP  PGD
20                      AFM  IWG  HOD  GXR
                        WDM  UFJ  ZTN  XIK
                        PVX  MCG  ZWK  TAE
                        YMG  XWE  WSB  OSJ
                        SJZ  VAZ  BPW  YKJ
15                      SWH  LDU  QCO  XFD  BFN
                   OSI  UYG  KOT  TPY  KEZ  BOW
                   RJH  TNY  TAE  ZEG  RDJ  UEX
                   NSE  OEN  WFH  TVU  MTN  NLH
              WYO  SWC  QXG  IKY  YDO  TAC  PCA
10            WEX  YOS  IUK  NHF  SDI  BZH  XSR
              HUE  LHG  RQT  SXW  JIK  QGF  XDV
              TLI  MFI  ZOS  WFL  SJW  TJF  JEQ
              HAX  ZQW  UFP  ERF  TIP  LHR  XHK
         YDC  LBV  QYK  QFJ  CNA  PBO  PQL  SJW  DKE
 5       JOY  TFO  PSH  ASL  LOS  CWZ  DRA  LMJ  UNH  OWV
         HBA  JXG  VOY  WCI  UBR  PCD  JPE  MSV  HCV  HUC
    YXQ  HAP  XAP  SYR  JFZ  KQL  MXQ  TFZ  BSM  DWJ  BYF  CKJ
    FRB  PXZ  HGJ  OVC  ELM  LCR  RQA  MEK  QRH  DXC  GID  ALR
    IKG  HJM  CRB  BUQ  FGI  PQU  UGT  BVL  JFP  MUH  BQZ  KDX       VPH
     -2            -1            0             1            2             3

Yes, it's still not ggplot2.

like image 138
r2evans Avatar answered Mar 29 '26 07:03

r2evans



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!