Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Avoiding overlapping of labels with direct labels and ggplot2

I'm having problems avoiding overlapping labels in my graphic. I take a look at similar questions, but none of them helped me to solve my problem. Below I provide a reproducible example. Also, I don't understand why direct.labels isn't selecting appropriate colors to each label.

mydf <- structure(list(ano = c(1970, 1975, 1980, 1985, 1990, 1995, 2000, 
2004, 2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 
1995, 2000, 2004, 2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 
1985, 1990, 1995, 2000, 2004, 2005, 2006, 2007, 2008, 2009, 1970, 
1975, 1980, 1985, 1990, 1995, 2000, 2004, 2005, 2006, 2007, 2008, 
2009, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2004, 2005, 2006, 
2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2004, 
2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 1995, 
2000, 2004, 2005, 2006, 2007, 2008, 2009), field = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Humanites", 
"Scial Sciences", "Natural Sciences", "Computer science and engineering", 
"education", "business", "Other fields"), class = "factor"), 
value = c(143549, 150736, 134139, 132891, 172485, 193404, 
214107, 254847, 261696, 265808, 274535, 278387, 280993, 193511, 
176674, 141581, 134468, 183762, 199895, 201681, 242506, 249619, 
254222, 259950, 262771, 269996, 81916, 91596, 78092, 76228, 
70209, 93443, 89772, 99370, 105899, 113077, 117200, 121009, 
125809, 52570, 52328, 90476, 139459, 104910, 102503, 117011, 
133655, 128886, 124024, 122084, 122408, 128318, 176307, 154437, 
108074, 87147, 110807, 105384, 105458, 105451, 107238, 105641, 
102582, 101708, 101265, 115396, 143171, 200521, 236700, 249165, 
226623, 263515, 311574, 318042, 327531, 335254, 347985, 358293, 
76481, 156804, 182257, 180930, 203200, 243540, 252627, 291861, 
313862, 333789, 351464, 367100, 385340)), .Names = c("ano", 
"field", "value"), row.names = c(NA, -91L), class = "data.frame")

require(ggplot2)
require(directlabels)
require(scales)

p <- ggplot(mydf, aes(y=value, x=ano, group=field)) + geom_line(aes(group=field,     colour=field), show_guide = FALSE) + 
ylab("Number of B.A. degrees awarded") + xlab("year") + theme_hyper() + 
theme(legend.key = element_rect(colour = "white")) + scale_y_continuous(labels=comma) 

p1 <- p + geom_dl(aes(label=field), list('last.points', cex = 1.3, hjust = 1))
p1

the function theme_hyper is as follow:

theme_hyper <- function(angle=0, fonte="arial", size=14) {
theme(panel.grid.major = element_blank()) +
theme(panel.background = element_blank())+
theme (panel.grid.minor = element_blank()) +
theme(legend.title = element_text(family=fonte, size = size, face = "bold")) +
theme(legend.text = element_text(family=fonte, size = size, face = "bold")) +
theme(plot.title = element_text(family=fonte, size = size, face = "bold")) +
theme(axis.text.x = element_text(family=fonte, size = size, face = "bold", angle = angle, vjust = .5)) +
theme(axis.text.y= element_text(family=fonte, size = size, face = "bold", hjust=.5)) +
theme(axis.title.x= element_text(family=fonte, size = size, face = "bold")) +
theme(axis.title.y= element_text(family=fonte, size = size, face = "bold"))

}
like image 701
Manoel Galdino Avatar asked Jan 08 '14 19:01

Manoel Galdino


3 Answers

Use "last.bumpup"

Add colour=field to add color to you labels.

geom_dl(aes(label=field, colour=field), list('last.bumpup', cex = 1.3, hjust = 1))

To remove the color legend/guide that.

scale_colour_discrete(guide="none")

Final call:

p <- ggplot(mydf, aes(y=value, x=ano, group=field)) +
  geom_line(aes(group=field, colour=field), show_guide = FALSE) + 
  ylab("Number of B.A. degrees awarded") + xlab("year") + theme_hyper() + 
  theme(legend.key = element_rect(colour = "white")) +
  scale_y_continuous(labels=comma) +
  geom_dl(aes(label=field, colour=field), list('last.bumpup', cex = 1.3, hjust = 1)) +
  scale_colour_discrete(guide="none")
like image 146
SethB Avatar answered Nov 11 '22 06:11

SethB


Have you considered using ggrepel to position text labels without overlaps?

BA degrees awarded per year

Read the data:

library(ggrepel)
#> Loading required package: ggplot2
library(scales)
library(readr)
#> 
#> Attaching package: 'readr'
#> The following object is masked from 'package:scales':
#> 
#>     col_factor
library(sitools)
library(tools)

mydf <- read_tsv("ano   field   value
1970    Humanities  143549
1975    Humanities  150736
1980    Humanities  134139
1985    Humanities  132891
1990    Humanities  172485
1995    Humanities  193404
2000    Humanities  214107
2004    Humanities  254847
2005    Humanities  261696
2006    Humanities  265808
2007    Humanities  274535
2008    Humanities  278387
2009    Humanities  280993
1970    Social Sciences 193511
1975    Social Sciences 176674
1980    Social Sciences 141581
1985    Social Sciences 134468
1990    Social Sciences 183762
1995    Social Sciences 199895
2000    Social Sciences 201681
2004    Social Sciences 242506
2005    Social Sciences 249619
2006    Social Sciences 254222
2007    Social Sciences 259950
2008    Social Sciences 262771
2009    Social Sciences 269996
1970    Natural Sciences    81916
1975    Natural Sciences    91596
1980    Natural Sciences    78092
1985    Natural Sciences    76228
1990    Natural Sciences    70209
1995    Natural Sciences    93443
2000    Natural Sciences    89772
2004    Natural Sciences    99370
2005    Natural Sciences    105899
2006    Natural Sciences    113077
2007    Natural Sciences    117200
2008    Natural Sciences    121009
2009    Natural Sciences    125809
1970    Computer science and engineering    52570
1975    Computer science and engineering    52328
1980    Computer science and engineering    90476
1985    Computer science and engineering    139459
1990    Computer science and engineering    104910
1995    Computer science and engineering    102503
2000    Computer science and engineering    117011
2004    Computer science and engineering    133655
2005    Computer science and engineering    128886
2006    Computer science and engineering    124024
2007    Computer science and engineering    122084
2008    Computer science and engineering    122408
2009    Computer science and engineering    128318
1970    education   176307
1975    education   154437
1980    education   108074
1985    education   87147
1990    education   110807
1995    education   105384
2000    education   105458
2004    education   105451
2005    education   107238
2006    education   105641
2007    education   102582
2008    education   101708
2009    education   101265
1970    business    115396
1975    business    143171
1980    business    200521
1985    business    236700
1990    business    249165
1995    business    226623
2000    business    263515
2004    business    311574
2005    business    318042
2006    business    327531
2007    business    335254
2008    business    347985
2009    business    358293
1970    Other fields    76481
1975    Other fields    156804
1980    Other fields    182257
1985    Other fields    180930
1990    Other fields    203200
1995    Other fields    243540
2000    Other fields    252627
2004    Other fields    291861
2005    Other fields    313862
2006    Other fields    333789
2007    Other fields    351464
2008    Other fields    367100
2009    Other fields    385340")

Create the plot:

p <- ggplot(mydf, aes(x = ano, y = value, group = field)) +
  geom_line(aes(group = field, color = field), size = 1.5, show.legend = FALSE) + 
  scale_x_continuous(
    expand = c(0, 1),
    limits = c(min(mydf$ano), max(mydf$ano) + 50),
    breaks = seq(1970, 2010, by = 10)
  ) +
  scale_y_continuous(labels = sitools::f2si) +
  annotate(
    geom = "rect", xmin = 2010.5, xmax = Inf, ymin = -Inf, ymax = Inf,
    fill = "white"
  ) +
  geom_point(
    data = subset(mydf, ano == max(ano)),
    aes(color = field),
    size = 3,
    show.legend = FALSE
  ) +
  geom_text_repel(
    data = subset(mydf, ano == max(ano)),
    aes(label = sprintf("%s %s", comma(value), toTitleCase(field)), color = field),
    size = 7,
    hjust = 0,
    direction = "y",
    nudge_x = 2,
    segment.color = NA,
    show.legend = FALSE
  ) +
  theme_minimal(base_size = 24) +
  theme(panel.grid.minor.x = element_blank()) +
  labs(
    x = NULL,
    y = NULL,
    title = "B.A. degrees awarded per year"
  )

ggsave(
  plot = p,
  filename = "stackoverflow-21004491.png",
  width = 12.5,
  height = 6
)

Created on 2018-12-29 by the reprex package (v0.2.1)

like image 43
Kamil Slowikowski Avatar answered Nov 11 '22 04:11

Kamil Slowikowski


Maybe try

direct.label(p, list("last.points", cex=.7, hjust=1))

or one of the many other possibilities here.

like image 2
lukeA Avatar answered Nov 11 '22 05:11

lukeA