I am trying to manually plot model estimates on top of data. My real problem is far more complicated than this, so I want to avoid using predict
if I can, and would prefer to understand how these predictions would be calculated rather than relying on some package.
(data for a reproducible example at the bottom.)
So I first run a model, and grab the model estimates and standard errors:
library(glmmTMB)
glmmLep<-glmmTMB(Lepidoptera ~ DayL50,
data=Dat, family=nbinom2(link="log") )
dB_est<-(summary(glmmLep)$coeff$cond[2,1])
dB_SE<-(summary(glmmLep)$coeff$cond[2,2])
Int<-(summary(glmmLep)$coeff$cond[1,1])
Int_SE<-(summary(glmmLep)$coeff$cond[1,2])
Then, I create a sequence of x values to predict from
x<-seq(from=min(Dat$DayL50),to=max(Dat$DayL50),length.out = length(Dat$DayL50))
Then I predict the y values with two different methods (using predict
and writing the equation that should do the same thing)
ypred<-exp(dB_est*x+Int)
y<-predict(glmmLep,list(DayL50=x),type="response",se.fit = T)
We plot the two predicted lines (one as a smaller red line on top):
ggplot(aes(x=DayL50,y=Lepidoptera),data=Dat)+
geom_point(size=2)+
geom_line(aes(y=y$fit,x=x),size=2)+
geom_ribbon(aes(ymax=y$fit+1.96*y$se.fit,ymin=y$fit-1.96*y$se.fit,x=x),alpha=0.2)+
geom_line(aes(y=ypred,x=x),size=1,color="red")+
# geom_ribbon(aes(ymax=ymax,ymin=ymin,x=x),alpha=0.2,color="red")+
coord_cartesian(ylim=c(0,1000))
We see that the equation I wrote works the same as the predict
function. All good. However, when I go to add the SE / 95% CI ribbon around that line, I run into issues trying to recreate it (here I left as SE, since 95%CI leads to more of an unwieldy plot). I have played with the formula in many different ways, and can't seem to get it. For some reason, I cannot seem to find any posts about it, but perhaps I am not using the correct search terms. Can anyone explain to me what I am missing here. It seems as if I am missing quite a bit of complexity in my error ribbons (outlined in red).
ymin<-exp((dB_est-dB_SE)*x+(Int))
ymax<-exp((dB_est+dB_SE)*x+(Int))
ggplot(aes(x=DayL50,y=Lepidoptera),data=Dat)+
geom_point(size=2)+
geom_line(aes(y=y$fit,x=x),size=2)+
geom_ribbon(aes(ymax=y$fit+1.96*y$se.fit,ymin=y$fit-1.96*y$se.fit,x=x),alpha=0.2)+
geom_line(aes(y=ypred,x=x),size=1,color="red")+
geom_ribbon(aes(ymax=ymax,ymin=ymin,x=x),alpha=0.2,color="red")+
coord_cartesian(ylim=c(0,1000))
Or with 95%CI, like my predict
ribbon, which is even further off:
ymin<-exp((dB_est-1.96*dB_SE)*x+(Int))
ymax<-exp((dB_est+1.96*dB_SE)*x+(Int))
ggplot(aes(x=DayL50,y=Lepidoptera),data=Dat)+
geom_point(size=2)+
geom_line(aes(y=y$fit,x=x),size=2)+
geom_ribbon(aes(ymax=y$fit+1.96*y$se.fit,ymin=y$fit-1.96*y$se.fit,x=x),alpha=0.2)+
geom_line(aes(y=ypred,x=x),size=1,color="red")+
geom_ribbon(aes(ymax=ymax,ymin=ymin,x=x),alpha=0.2,color="red")+
coord_cartesian(ylim=c(0,1000))
Dat<-structure(list(Lepidoptera = c(0L, 0L, 1L, 0L, 1L, 1L, 807L,
103L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 63L, 0L, 0L, 3L, 1L, 94L,
0L, 0L, 0L, 0L, 27L, 0L, 0L, 117L, 0L, 0L, 95L, 0L, 0L, 0L, 11L,
0L, 0L, 0L, 0L, 0L, 0L, 2L, 11L, 0L, 0L, 0L, 5L, 26L, 0L, 0L,
0L, 0L, 0L, 76L, 0L, 610L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 56L, 0L,
1L, 119L, 0L, 14L, 0L, 0L, 302L, 0L, 0L, 113L, 312L, 0L, 0L,
0L, 1L, 323L, 53L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 2L, 720L, 0L,
2L, 0L, 2L, 152L, 0L, 1L, 0L, 2L, 172L, 0L, 0L, 55L, 0L, 136L,
0L, 5L, 0L, 108L, 0L, 0L, 912L, 34L, 0L, 1L, 6L, 1405L, 3L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 14L, 1236L, 7L, 8L, 11L, 231L, 1L, 0L,
163L, 531L, 7L, 2L, 155L, 3L, 0L, 16L, 69L, 2L, 1084L, 5L, 7L,
120L, 2L, 1L, 48L, 1L, 0L, 1303L, 107L, 0L, 0L, 0L, 463L, 13L,
36L, 2L, 0L, 0L, 2L, 0L, 77L, 0L, 0L, 374L, 0L, 0L, 18L, 1L,
0L, 0L, 158L, 269L, 0L, 0L, 0L, 1L, 16L, 6L, 0L, 1L, 258L, 0L,
8L, 0L, 22L, 2838L, 226L, 0L, 8L, 302L, 4196L, 16L, 1L, 0L, 0L,
1357L, 6L, 0L, 3L, 1L, 0L, 304L, 2257L, 0L, 0L, 2L, 34L, 142L,
0L, 0L, 2L, 0L, 402L, 154L, 480L, 461L, 1463L, 0L, 0L, 0L, 116L,
0L, 6L, 0L, 0L, 0L, 7L, 0L, 276L, 0L, 0L, 4L, 0L, 32L, 0L, 0L,
6L, 0L, 40L, 1L, 0L, 71L, 0L, 4L, 0L, 0L, 96L, 10L, 0L, 0L, 0L,
0L, 4L, 0L, 22L, 0L, 0L, 0L, 1L, 18L, 83L, 0L, 0L, 38L, 207L,
0L, 0L, 0L, 0L, 0L, 506L, 0L, 0L, 1L, 0L, 0L, 0L, 708L, 0L, 1L,
39L, 0L, 588L, 0L, 0L, 8L, 154L, 1L, 0L, 0L, 0L, 0L, 3L, 129L,
0L, 1L, 0L, 0L, 0L, 452L, 59L, 0L, 2L, 596L, 0L, 4L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 23L, 0L, 0L, 0L, 0L, 46L, 7L, 0L, 0L, 0L,
55L, 5L, 0L, 4L, 0L, 51L, 0L, 0L, 1L, 9L, 1L, 84L, 43L, 0L, 2L,
1L, 95L, 1L, 259L, 0L, 0L, 0L, 6L, 427L, 0L, 66L, 0L, 3L, 752L,
109L, 2L, 0L, 0L, 0L, 4L, 5L, 0L, 151L, 0L, 4L, 1L, 0L, 32L,
0L, 0L, 0L, 3L, 122L, 47L, 1L, 0L, 7L, 52L, 174L, 0L, 0L, 1L,
23L, 5L, 1L, 0L, 932L, 2L, 290L, 3L, 2078L, 48L, 0L, 3L, 0L,
0L, 37L, 0L, 169L, 0L, 0L, 142L, 2052L, 1L, 0L, 377L, 0L, 1L,
3857L, 19L, 220L, 2332L, 0L, 17L, 1L, 926L, 16L, 6815L, 39L,
0L, 6L, 289L, 626L, 1L, 1L, 0L, 1L, 0L, 30L, 0L, 0L, 395L, 0L,
450L, 1L, 679L, 0L, 0L, 17L, 817L, 4L, 10L, 300L, 41L, 1L, 1L,
164L), DayL50 = c(62.2, 45.4, 71.8, 60.4, 60.4, 60.4, 60.4, 60.4,
45.1, 45.1, 45.1, 45.1, 69.5, 71.3, 71.3, 71.3, 70.7, 74, 69.4,
69.4, 69.4, 69.4, 69.4, 67.3, 54.9, 71.5, 71.5, 71.5, 71.5, 71.5,
71.5, 74.1, 74.1, 74.1, 74.1, 66.5, 66.5, 66.5, 66.5, 66.5, 73.2,
55.8, 55.8, 70.3, 70.3, 70.3, 70.3, 68.2, 68.2, 68.2, 68.2, 68.2,
48.4, 50.6, 73.2, 73.2, 73.2, 73.2, 73.2, 52.2, 61.2, 66, 68.2,
58.1, 59.9, 59.9, 59.9, 59.9, 59.9, 54.8, 54.8, 54.8, 54.8, 54.8,
63.9, 63.9, 63.9, 63.9, 63.9, 69.8, 69.8, 69.8, 69.8, 69.8, 45.4,
47.2, 54.5, 48.8, 68.4, 39.7, 45.4, 45.4, 45.4, 45.4, 45.4, 46.8,
46.8, 46.8, 46.8, 46.8, 54.3, 54.3, 54.3, 54.3, 54.3, 49.2, 49.2,
49.2, 49.2, 49.2, 68.8, 68.8, 68.8, 68.8, 68.8, 39.6, 39.6, 39.6,
39.6, 39.6, 41.2, 70.7, 62.1, 44.5, 70.1, 49.8, 53.8, 72.5, 61.5,
61.5, 61.5, 61.5, 45.4, 45.4, 45.4, 45.4, 45.4, 69.5, 70.8, 70.8,
70.8, 70.8, 66.3, 73.2, 73.2, 73.2, 73.2, 73.2, 50.4, 50.4, 50.4,
50.4, 50.4, 54.1, 54.1, 54.1, 54.1, 54.1, 73.5, 67.9, 67.9, 67.9,
67.9, 67.9, 70.7, 74, 71.5, 74.1, 74.1, 74.1, 74.1, 74.1, 43.8,
71.5, 71.5, 71.5, 74.1, 74.1, 74.1, 74.1, 74.1, 48.7, 69, 69,
69, 69, 65.8, 45.4, 45.4, 45.4, 45.4, 47.9, 47.9, 47.9, 47.9,
39.9, 39.9, 39.9, 39.9, 39.9, 39.9, 67.7, 67.7, 67.7, 67.7, 70.9,
70.9, 70.9, 70.9, 70.9, 70.9, 57.3, 61.2, 59.9, 59.9, 59.9, 59.9,
63.9, 63.9, 63.9, 63.9, 63.9, 70, 70.4, 70.4, 63.6, 45.2, 45.2,
45.2, 45.2, 45.2, 55.1, 64.5, 64.1, 64.1, 64.1, 64.1, 54, 54,
54, 54, 54, 65, 65, 65, 65, 65, 61.9, 64.2, 62.3, 62.3, 62.3,
36.5, 64.2, 64.2, 64.2, 64.2, 64.2, 58.8, 38.3, 38.3, 38.3, 38.3,
38.3, 59.1, 59.1, 59.1, 59.1, 59.1, 58.6, 66.1, 66.1, 66.1, 66.1,
76.5, 76.5, 76.5, 76.5, 76.5, 76.5, 70.5, 72.7, 70.3, 70.3, 70.3,
70.3, 71.8, 71.8, 71.8, 71.8, 71.8, 45.4, 71, 37.2, 37.2, 37.2,
37.2, 61.2, 65, 69.8, 69.8, 69.8, 69.8, 69.8, 60.3, 60.3, 60.3,
60.3, 60.3, 64.9, 64.9, 64.9, 64.9, 64.9, 47.7, 54.3, 69.3, 54.5,
54.5, 54.5, 54.5, 54.5, 54.5, 47.8, 47.8, 47.8, 47.8, 47.8, 54.6,
54.6, 54.6, 54.6, 54.6, 69.1, 69.1, 69.1, 69.1, 69.1, 38.7, 57.1,
35.9, 35.9, 35.9, 35.9, 35.9, 56.7, 56.7, 56.7, 56.7, 56.7, 51.9,
61.8, 52.1, 52.1, 52.1, 52.1, 52.1, 63.2, 63.2, 63.2, 63.2, 63.2,
71.9, 74.7, 72, 72, 72, 72, 72, 74.6, 74.6, 74.6, 74.6, 74.6,
62, 69, 61.1, 61.1, 61.1, 61.1, 61.1, 68.4, 68.4, 68.4, 68.4,
68.4, 45.3, 58.6, 43.8, 43.8, 43.8, 43.8, 43.8, 60.3, 60.3, 60.3,
60.3, 60.3, 54, 54.4, 64.8, 55, 55, 55, 55, 55, 71, 71, 71, 71,
71, 52.8, 52.8, 52.8, 52.8, 52.8, 63.9, 63.9, 63.9, 63.9, 35.1,
35.1, 35.1, 35.1, 35.1, 35.1, 78.9, 78.9, 78.9, 78.9, 78.9, 48,
66.6, 54.2, 54.2, 54.2, 54.2, 54.2, 54.2, 49.5, 49.5, 49.5, 49.5,
49.5, 56.3, 56.3, 56.3, 56.3, 66.6, 66.6, 66.6, 66.6, 66.6)), class = "data.frame", row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L,
81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L,
94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L,
106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L,
117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L,
128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L,
139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L,
150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L,
161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L,
172L, 173L, 175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L,
184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L,
195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L,
206L, 207L, 208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L, 216L,
217L, 218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L,
228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L,
239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L, 247L, 248L, 249L,
250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L, 260L,
262L, 263L, 264L, 265L, 266L, 267L, 268L, 269L, 270L, 271L, 272L,
273L, 274L, 275L, 276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L,
284L, 285L, 286L, 287L, 288L, 289L, 290L, 291L, 292L, 293L, 294L,
295L, 296L, 297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 305L,
306L, 307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L,
317L, 318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L,
328L, 329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L,
339L, 340L, 341L, 342L, 343L, 344L, 345L, 346L, 347L, 348L, 349L,
350L, 351L, 352L, 353L, 354L, 355L, 356L, 357L, 358L, 359L, 360L,
361L, 362L, 363L, 364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L,
372L, 373L, 374L, 375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L,
383L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,
394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L, 403L, 404L,
405L, 406L, 407L, 408L, 409L, 410L, 411L, 412L, 413L, 414L, 415L,
416L, 417L, 418L, 419L, 420L, 421L, 422L, 423L, 424L, 425L, 426L,
427L, 428L, 429L, 430L, 431L, 432L, 433L, 434L, 435L, 436L, 437L,
438L, 439L, 440L, 441L, 442L, 443L, 444L, 445L, 446L, 447L, 448L,
449L, 450L, 451L, 452L, 453L, 454L, 455L))
In addition to the quantile function, the prediction interval for any standard score can be calculated by (1 − (1 − Φµ,σ2(standard score))·2). For example, a standard score of x = 1.96 gives Φµ,σ2(1.96) = 0.9750 corresponding to a prediction interval of (1 − (1 − 0.9750)·2) = 0.9500 = 95%.
Regression analysis is used to predict future trends. A prediction interval is a type of confidence interval (CI) used with predictions in regression analysis; it is a range of values that predicts the value of a new observation, based on your existing model.
The prediction interval predicts in what range a future individual observation will fall, while a confidence interval shows the likely range of values associated with some statistical parameter of the data, such as the population mean.
The predict() function in R is used to predict the values based on the input data. All the modeling aspects in the R program will make use of the predict() function in its own way, but note that the functionality of the predict() function remains the same irrespective of the case.
Determines in which way estimates are sorted in the plot: If NULL (default), no sorting is done and estimates are sorted in the same order as they appear in the model formula. If TRUE, estimates are sorted in descending order, with highest estimate at the top.
Check model assumptions. Note: For mixed models, the diagnostic plots like linear relationship or check for Homoscedasticity, do not take the uncertainty of random effects into account, but is only based on the fixed effects part of the model. A character vector, naming a function that will be applied on estimates and confidence intervals.
There are several options to customize the plot appearance: The colors -argument either takes the name of a valid colorbrewer palette (see also the related vignette ), "bw" or "gs" for black/white or greyscaled colors, or a string with a color name. value.offset and value.size adjust the positioning and size of value labels, if shown.
If the fitted model only contains one predictor, slope-line is plotted. For mixed effects models, plots the random effects. Forest-plot of standardized beta values. Forest-plot of standardized beta values, however, standardization is done by dividing by two sd (see 'Details'). Predicted values (marginal effects) for specific model terms.
This is more of a stats question than a programming question. The only thing "wrong" with your code is your method of calculating the standard errors for the fit of your model.
The se.fit
of the prediction doesn't come from a single coefficient's standard error, but is calculated from a linear combination of the model's parameters (intercept and DayL50, or β₀ and β₁) using the model's variance-covariance matrix. So if I want to know the standard error of the fit with the given intercept i.e. (1 * β₀) and x = 50, i.e. (50 * β₁), I can do
fit_at <- c(1, 50)
covariance_matrix <- vcov(mod)$cond
se_50 <- sqrt(t(fit_at) %*% covariance_matrix %*% fit_at)
se_50
#> [,1]
#> [1,] 0.03696078
So, provided you are able to get the covariance matrix for your model, you can calculate the standard errors to match se.fit
as in the following worked example:
library(glmmTMB)
library(ggplot2)
# Generate our model
mod <- glmmTMB(Lepidoptera ~ DayL50, data = Dat, family = nbinom2())
# Create sequence for x variable in predictions
x <- with(Dat, seq(min(DayL50), max(DayL50), length.out = length(DayL50)))
# Generate a data frame of prediction with upper and lower SEM using predict()
pred <- predict(mod, list(DayL50 = x), type = "link", se.fit = TRUE)
pred <- data.frame(x = x, y = exp(pred$fit),
ymin = exp(pred$fit - 1.96 * pred$se.fit),
ymax = exp(pred$fit + 1.96 * pred$se.fit))
# Generate a data frame of prediction with upper and lower SEM manually
Int <- summary(mod)$coefficients$cond[1]
dB <- summary(mod)$coefficients$cond[2]
se <- sapply(x, function(y) sqrt(t(c(1, y)) %*% vcov(mod)$cond %*% c(1, y)))
manual <- data.frame(x = x, y = exp(x * dB + Int),
ymax = exp(x * dB + Int + 1.96 * se),
ymin = exp(x * dB + Int - 1.96 * se))
Now we have our data frames, we can make the plot. Here I will show the equivalence of the predict
and manual methods by overlapping blue (pred
) and dashed red (manual
) ribbons around the confidence intervals produced by both methods:
ggplot() +
geom_point(aes(x = DayL50, y = Lepidoptera), data = Dat, size = 2) +
geom_line(aes(x = x, y = y), data = pred, size = 2) +
geom_ribbon(aes(x = x, ymin = ymin, ymax = ymax), data = pred,
colour = "blue", size = 2, alpha = 0.2) +
geom_ribbon(aes(x = x, ymin = ymin, ymax = ymax), data = manual,
colour = "red", size = 2, linetype = 2, alpha = 0) +
coord_cartesian(ylim = c(0, 1000))
Created on 2020-05-03 by the reprex package (v0.3.0)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With