Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

A single bar is misplaced in a barplot (ggplot2)

I'm making a 2-panel barplot, but there seems to be a problem with a single bar that is out of its place. It is the bar in the upper part of the right panel. It is filled as TR but it occupies the place of the TL.

The data is:

DATA2=structure(list(A = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("For", "Par"), class = "factor"), 
B = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("TL", "TR"), class = c("ordered", 
"factor")), C = structure(c(1L, 4L, 4L, 1L, 1L, 1L, 4L, 1L, 
1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 4L, 1L, 5L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 
5L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 3L, 4L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 5L, 4L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 
3L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 
3L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 
5L, 4L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 4L, 1L, 1L, 
1L, 1L, 2L, 4L, 5L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 4L, 1L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 5L, 3L, 1L, 1L, 
4L, 1L, 1L, 1L, 5L, 1L, 4L), .Label = c("0-2", "2-4", "4-6", 
"6-8", "8-10"), class = c("ordered", "factor"))), row.names = c(NA, 
-240L), class = "data.frame")

The plot code is as follows:

ggplot(DATA2, aes(x=C,fill=B)) +
  geom_bar(position = position_dodge(width = -0.9, preserve = "single")) +
  facet_grid(.~A) +
  theme_bw() + 
  scale_fill_manual(values = c("grey","grey40")) + 
  coord_flip()

enter image description here

The small bar (count = 1) in the upper part of the right panel is misplaced, although the fill is correct. Any tips? Thanks!

like image 372
Paulo Barradas Avatar asked Jun 13 '26 06:06

Paulo Barradas


1 Answers

There are combinations of variable values that don't show up in your data, namely c("Par","TL","8-10") and c("Par", "TR", "2-4"). When you use geom_bar(stat = "count") (which is the default), ggplot2 apparently doesn't generate these combinations where the count would be 0. For each pairing, bars are rendered from the top downward. When one of these counts doesn't exist, the other is rendered at the top of the bar pairing.

We can see the same thing is this simpler example:

library(dplyr)
library(tidyr)
library(ggplot2)

df <- data.frame(A = rep(c("Group 1","Group 2"),each=3),
                 B = c("possum",rep("dog",2),rep("possum",3)))

df
#>         A      B
#> 1 Group 1 possum
#> 2 Group 1    dog
#> 3 Group 1    dog
#> 4 Group 2 possum
#> 5 Group 2 possum
#> 6 Group 2 possum

ggplot(df, aes(x=A,fill=B)) +
  geom_bar(position = position_dodge(width = -0.9, preserve = "single"))

The way around this is to do the counting manually:

df_tally <- df %>% 
  group_by(A,B) %>% 
  tally() %>% 
  ungroup()

df_tally
#>   A       B          n
#> 1 Group 1 dog        2
#> 2 Group 1 possum     1
#> 3 Group 2 possum     3

We then need to add the missing combination:

df_tally <- complete(df_tally, A, B)

df_tally
#>   A       B          n
#> 1 Group 1 dog        2
#> 2 Group 1 possum     1
#> 3 Group 2 dog       NA
#> 4 Group 2 possum     3

Now, since we've done the counting already, we use stat = "identity":

ggplot(df_tally, aes(x=A, y=n, fill=B)) +
  geom_bar(stat = "identity",
           position = position_dodge(width = -0.9, preserve = "single"))
#> Warning: Removed 1 rows containing missing values (geom_bar).

In your case, this looks like:

DATA3 <- DATA2 %>% 
  group_by(A,B,C) %>% 
  tally() %>% 
  ungroup() %>% 
  complete(A,B,C)


ggplot(DATA3, aes(x=C,y=n,fill=B)) +
  geom_bar(stat="identity", 
           position = position_dodge(width = -0.9, preserve = "single")) +
  facet_grid(.~A) +
  theme_bw() + 
  scale_fill_manual(values = c("grey","grey40")) + 
  coord_flip()
#> Warning: Removed 2 rows containing missing values (geom_bar).

Update: dplyr 0.8.0

As of dplyr 0.8.0, group_by has the .drop parameter that maintains groups for factors without any data. This slightly simplifies the necessary code. Example:

DATA3 <- DATA2 %>% 
  group_by(A,B,C, .drop = FALSE) %>% 
  summarise(n = n())
like image 165
DanTan Avatar answered Jun 15 '26 10:06

DanTan