Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

ggplot2 and first data point in a line

Tags:

r

ggplot2

facet

I am creating two plots using ggplot2 and then using grid.arrange to merge them together. I should say that both of the plots are also using facet_grid for a visual tweaking.

enter image description here

My problem is that the bottom plot, which is really a data table, ends up being "cut off" on the BOTH the left and right sides because of the starting position and ending positions for the facets. Is there a way for me to tweak this? I would like to tweak this so the points are not getting cut off.

Here is the data to reproduce it:

   df <- structure(list(SurveyID = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 
47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 
56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 
56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 76L, 76L, 
76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 
76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 83L, 83L, 83L, 83L
), MEPSID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), ServiceID = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L), .Label = c("Army", "Navy", "Marines", "Air Force"
), class = "factor"), SurveyReturnedYear = c(2012L, 2012L, 2012L, 
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L), SurveyReturnedMonth = c(10L, 10L, 
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 
9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 
12L, 12L, 12L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L), CompletedSurvey = c(23L, 19L, 38L, 16L, 11L, 
16L, 38L, 19L, 6L, 14L, 41L, 10L, 6L, 32L, 46L, 18L, 12L, 30L, 
35L, 18L, 11L, 32L, 23L, 19L, 8L, 24L, 46L, 19L, 18L, 28L, 30L, 
19L, 12L, 27L, 32L, 15L, 20L, 31L, 34L, 26L, 30L, 25L, 26L, 17L, 
41L, 16L, 24L, 12L, 43L, 23L, 22L, 15L, 29L, 21L, 22L, 18L, 38L, 
10L, 20L, 13L, 46L, 19L, 19L, 9L, 32L, 10L, 17L, 27L, 31L, 21L, 
17L, 18L, 30L, 18L, 19L, 20L, 22L, 23L, 17L, 17L, 34L, 21L, 16L, 
4L, 34L, 29L, 20L, 18L, 25L, 21L, 24L, 19L, 15L, 16L, 18L, 13L, 
28L, 19L, 24L, 0L, 23L, 13L, 13L, 2L, 34L, 13L, 22L, 4L, 17L, 
26L, 5L, 17L, 27L, 18L, 30L, 0L, 30L, 11L, 34L, 0L, 27L, 9L, 
34L, 0L), TotalSurvey = c(41L, 19L, 47L, 22L, 43L, 21L, 49L, 
23L, 39L, 16L, 44L, 11L, 49L, 34L, 56L, 33L, 39L, 33L, 42L, 21L, 
50L, 37L, 56L, 23L, 34L, 26L, 53L, 19L, 36L, 32L, 44L, 21L, 38L, 
27L, 49L, 18L, 41L, 34L, 58L, 26L, 37L, 25L, 40L, 21L, 44L, 17L, 
51L, 16L, 51L, 24L, 32L, 22L, 34L, 21L, 37L, 20L, 44L, 10L, 36L, 
18L, 59L, 21L, 35L, 13L, 46L, 12L, 44L, 29L, 49L, 21L, 36L, 18L, 
47L, 19L, 41L, 21L, 29L, 23L, 40L, 20L, 39L, 21L, 38L, 4L, 41L, 
30L, 54L, 21L, 30L, 22L, 56L, 24L, 19L, 16L, 49L, 25L, 34L, 22L, 
54L, 20L, 33L, 14L, 40L, 10L, 37L, 14L, 43L, 23L, 27L, 30L, 40L, 
22L, 34L, 19L, 37L, 23L, 32L, 19L, 37L, 26L, 35L, 11L, 37L, 31L
), meps_labels = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("Albany", "Albuquerque", "Amarillo", 
"Anchorage", "Atlanta", "Baltimore", "Beckley", "Boise", "Boston", 
"Buffalo", "Butte", "Charlotte", "Chicago", "Cleveland", "Columbus", 
"Dallas", "Denver", "Des Moines", "Detroit", "El Paso", "Fargo", 
"Fort Dix", "Fort Jackson", "Fort Lee", "Harrisburg", "Honolulu", 
"Houston", "Indianapolis", "Jackson", "Jacksonville", "Kansas City", 
"Knoxville", "Lansing", "Little Rock", "Los Angeles", "Louisville", 
"Memphis", "Miami", "Milwaukee", "Minneapolis", "Montgomery", 
"Nashville", "New Orleans", "New York", "Oklahoma City", "Omaha", 
"Phoenix", "Pittsburgh", "Portland, ME", "Portland, OR", "Raleigh", 
"Sacramento", "Salt Lake City", "San Antonio", "San Diego", "San Jose", 
"San Juan", "Seattle", "Shreveport", "Sioux Falls", "Spokane", 
"Springfield", "St. Louis", "Syracuse", "Tampa"), class = "factor"), 
    RR = c(56, 100, 81, 73, 26, 76, 78, 83, 15, 88, 93, 91, 12, 
    94, 82, 55, 31, 91, 83, 86, 22, 86, 41, 83, 24, 92, 87, 100, 
    50, 88, 68, 90, 32, 100, 65, 83, 49, 91, 59, 100, 81, 100, 
    65, 81, 93, 94, 47, 75, 84, 96, 69, 68, 85, 100, 59, 90, 
    86, 100, 56, 72, 78, 90, 54, 69, 70, 83, 39, 93, 63, 100, 
    47, 100, 64, 95, 46, 95, 76, 100, 42, 85, 87, 100, 42, 100, 
    83, 97, 37, 86, 83, 95, 43, 79, 79, 100, 37, 52, 82, 86, 
    44, 0, 70, 93, 32, 20, 92, 93, 51, 17, 63, 87, 12, 77, 79, 
    95, 81, 0, 94, 58, 92, 0, 77, 82, 92, 0), Time = structure(c(15614, 
    15614, 15614, 15614, 15645, 15645, 15645, 15645, 15675, 15675, 
    15675, 15675, 15706, 15706, 15706, 15706, 15737, 15737, 15737, 
    15737, 15765, 15765, 15765, 15765, 15796, 15796, 15796, 15796, 
    15826, 15826, 15826, 15826, 15857, 15857, 15857, 15857, 15887, 
    15887, 15887, 15887, 15918, 15918, 15918, 15918, 15949, 15949, 
    15949, 15949, 15979, 15979, 15979, 15979, 16010, 16010, 16010, 
    16010, 16040, 16040, 16040, 16040, 16071, 16071, 16071, 16071, 
    16102, 16102, 16102, 16102, 16130, 16130, 16130, 16130, 16161, 
    16161, 16161, 16161, 16191, 16191, 16191, 16191, 16222, 16222, 
    16222, 16222, 16252, 16252, 16252, 16252, 16283, 16283, 16283, 
    16283, 16314, 16314, 16314, 16314, 16344, 16344, 16344, 16344, 
    16375, 16375, 16375, 16375, 16405, 16405, 16405, 16405, 16436, 
    16436, 16436, 16436, 16467, 16467, 16467, 16467, 16495, 16495, 
    16495, 16495, 16526, 16526, 16526, 16526), class = "Date"), 
    Year = c("2012", "2012", "2012", "2012", "2012", "2012", 
    "2012", "2012", "2012", "2012", "2012", "2012", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013", 
    "2013", "2013", "2013", "2013", "2013", "2013", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
    "2014", "2014", "2014", "2014", "2014", "2014", "2015", "2015", 
    "2015", "2015", "2015", "2015", "2015", "2015", "2015", "2015", 
    "2015", "2015", "2015", "2015", "2015", "2015")), .Names = c("SurveyID", 
"MEPSID", "ServiceID", "SurveyReturnedYear", "SurveyReturnedMonth", 
"CompletedSurvey", "TotalSurvey", "meps_labels", "RR", "Time", 
"Year"), row.names = c(1L, 2L, 3L, 4L, 261L, 262L, 263L, 264L, 
521L, 522L, 523L, 524L, 781L, 782L, 783L, 784L, 1041L, 1042L, 
1043L, 1044L, 1301L, 1302L, 1303L, 1304L, 1561L, 1562L, 1563L, 
1564L, 1821L, 1822L, 1823L, 1824L, 2081L, 2082L, 2083L, 2084L, 
2341L, 2342L, 2343L, 2344L, 2601L, 2602L, 2603L, 2604L, 2861L, 
2862L, 2863L, 2864L, 3121L, 3122L, 3123L, 3124L, 3381L, 3382L, 
3383L, 3384L, 3641L, 3642L, 3643L, 3644L, 3901L, 3902L, 3903L, 
3904L, 4161L, 4162L, 4163L, 4164L, 4421L, 4422L, 4423L, 4424L, 
4681L, 4682L, 4683L, 4684L, 4941L, 4942L, 4943L, 4944L, 5201L, 
5202L, 5203L, 5204L, 5461L, 5462L, 5463L, 5464L, 5721L, 5722L, 
5723L, 5724L, 5981L, 5982L, 5983L, 5984L, 6241L, 6242L, 6243L, 
6244L, 6501L, 6502L, 6503L, 6504L, 6761L, 6762L, 6763L, 6764L, 
7021L, 7022L, 7023L, 7024L, 7281L, 7282L, 7283L, 7284L, 7541L, 
7542L, 7543L, 7544L, 7801L, 7802L, 7803L, 7804L), class = "data.frame")

And the code:

library(ggplot2)
library(grid)
library(scales)
library(gridExtra)


p<- ggplot(data=df[df$MEPSID==1,],
           aes(x=Time, y=RR, colour=ServiceID, group=ServiceID, label=round(RR)))+
  scale_y_continuous(breaks=seq(0, 100, 10))+
  labs(y="Response Rate")+
  coord_cartesian(ylim=c(0, 110))+
  geom_line(size=.5)+
  geom_point()+
  scale_color_manual(values=c("green4","blue4","red4","dodgerblue"))+
  ggtitle("Counts")+
  theme(plot.title=element_text(size=18, face="bold", vjust=1),
        axis.title=element_text(size=16),
        axis.text.x=element_text(size=10, angle=90),
        axis.line=element_line(colour="black", size=.2),
        legend.background = element_rect(fill="transparent"),
        legend.position="top",
        legend.title=element_blank(),
        legend.margin=unit(-0.6, "cm"),
        legend.position="none",
        legend.text=element_text(size=14),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.background = element_blank(),
        panel.grid.major.y=element_line(colour="gray", linetype="solid", size=.2))+ # or theme_blank())
  scale_x_date(labels = date_format("%b"), breaks=date_breaks("month"))+
  facet_grid(~Year, scales="free", space="free")


p2<-ggplot(df[df$MEPSID==1,], aes(x = Time, y = ServiceID, label=format(round(RR), nsmall=0), colour = ServiceID)) +
  geom_text(size = 3.5) +
  theme(
    panel.grid.major = element_blank(),
    legend.position = "none",
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.text.x = element_text(),
    axis.ticks = element_blank(),
    plot.margin = unit(c(-0.5,1, 0, 0.5), "lines")) +
  xlab(NULL) + 
  ylab(NULL)+
  scale_x_date(labels=c(), breaks=date_breaks("month"), expand=c(0.05,0.05))+
  facet_grid(~Year, scales="free", space="free_x")+
  scale_y_discrete(limits=rev(levels(df$ServiceID)))+
  scale_color_manual(values=c("green4","blue4","red4","dodgerblue"))

grid.arrange(arrangeGrob(p,p2, 
                         nrow=2, heights=c(5,1)))
like image 731
vashts85 Avatar asked Jun 17 '15 13:06

vashts85


2 Answers

You can use geom_blank to fine-tune facetted scales.

grid.arrange(p + geom_blank(data = data.frame(Time = as.Date(c("2012-09-20", "2012-12-15", 
                                                               "2014-12-20", "2015-04-10")), 
                                              RR = 1:4, 
                                              Year = c(2012, 2012, 2015, 2015)),
                            aes(colour = NULL, group = NULL, label = NULL)) ,
             p2 + geom_blank(data = data.frame(Time = as.Date(c("2012-09-20", "2012-12-15", 
                                                                "2014-12-20", "2015-04-10")), 
                                               ServiceID = 1:4, 
                                               Year = c(2012, 2012, 2015, 2015)),
                             aes(colour = NULL, group = NULL, label = NULL)) , 
             nrow=2, heights=c(5,1))

resulting plot

like image 66
Roland Avatar answered Oct 25 '22 19:10

Roland


Another option is to adjust text using hjust argument as an aes. But first you should add it to the data as its own column that you will pass into the ggplot command :

library(data.table)
DX <- setDT(df[df$MEPSID==1,])
DX[,hjust:=ifelse(Time==min(Time),0.1,ifelse(Time==max(Time),0.8,0.4)),Year] #This creates a new variable called hjust

p2<-ggplot(DX, 
           aes(x = Time, y = ServiceID, label=format(round(RR), nsmall=0), 
               colour = ServiceID,hjust=hjust)) +
    ## the rest of the plot 2 

enter image description here

add some explanation:

Here you are plotting a text using (Time versus ServiceID) by year. Since we want to shift our text horizontally, we will do it according to the value of Time (x-coordinate). More precisely, will just shift left-points to the right and right-points to the left. This will be done by setting a different hjust value for each group of values ( left vs right).

So for each year( each facet ) , I will horizontally adjust the points corresponding to the min of Time ( the extreme left points of the facets), and the max of time ( the extreme right points of the facets). No need to adjust other points even I do it here.

 DX[,hjust:=ifelse(Time==min(Time),0.1,    ## extreme left point
            ifelse(Time==max(Time),0.8,    ## extreme right points
             0.4)),                        ## others 
             Year]                         ## for each facet 

You can do the trsnformation in base R using ave:

ave(as.numeric(xx$Time),xx$Year,
    FUN=function(x)
      ifelse(x==min(x),0.1,ifelse(x==max(x),0.8,0.4)))
like image 34
agstudy Avatar answered Oct 25 '22 18:10

agstudy