Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

R - Error using summary() from speedglm package

I'm using speedglm to estimate a logistic regression model on some data. I've created a reproducible example which generates the same error that I get using my original data.

library(speedglm)
n <- 10000
dtf <- data.frame( y = sample(c(0,1), n, 1),
                  x1 = as.factor(sample(c("a","b"), n, 1)),
                  x2 = rnorm(n, 30, 10))
m <- speedglm(y ~ x1 + x2, dtf, family=binomial())
summary(m)

The output is the following:

Generalized Linear Model of class 'speedglm':

Call:  speedglm(formula = y ~ x1 + x2, data = dtf, family = binomial()) 

Coefficients:
 ------------------------------------------------------------------ 
Error in data.frame(..., check.names = FALSE) : 
  arguments imply differing number of rows: 3, 0

I've checked the source code of summary.speedglm by executing getS3method("summary", "speedglm") and found the code line which generates the error, but it didn't help to solve the problem.

PS: someone with 1500+ rep should create the speedglm tag.

UPDATE

Marco Enea, the maintainer of speedglm, asked to post the following temporary fix for summary.speedglm and print.summary.speedglm.

summary.speedglm <- function (object, correlation = FALSE, ...) 
{
  if (!inherits(object, "speedglm")) 
    stop("object is not of class speedglm")
  z <- object
  var_res <- as.numeric(z$RSS/z$df)
  dispersion <- if (z$family$family %in% c("poisson", "binomial")) 1 else var_res
  if (z$method == "qr") {
    z$XTX <- z$XTX[z$ok, z$ok]
  }
  inv <- solve(z$XTX, tol = z$tol.solve)
  covmat <- diag(inv)
  se_coef <- rep(NA, length(z$coefficients))
  se_coef[z$ok] <- sqrt(dispersion * covmat)
  if (z$family$family %in% c("binomial", "poisson")) {
    z1 <- z$coefficients/se_coef
    p <- 2 * pnorm(abs(z1), lower.tail = FALSE)
  } else {
    t1 <- z$coefficients/se_coef
    p <- 2 * pt(abs(t1), df = z$df, lower.tail = FALSE)
  }
  ip <- !is.na(p)
  p[ip] <- as.numeric(format(p[ip], digits = 3))
  dn <- c("Estimate", "Std. Error")
  if (z$family$family %in% c("binomial", "poisson")) {
    format.coef <- if (any(na.omit(abs(z$coef)) < 1e-04)) 
      format(z$coefficients, scientific = TRUE, digits = 4) else 
        round(z$coefficients, digits = 7)
    format.se <- if (any(na.omit(se_coef) < 1e-04)) 
      format(se_coef, scientific = TRUE, digits = 4) else round(se_coef, digits = 7)
    format.pv <- if (any(na.omit(p) < 1e-04)) 
      format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
    param <- data.frame(format.coef, format.se, round(z1, 
                                                      digits = 4), format.pv)
    dimnames(param) <- list(names(z$coefficients), c(dn, 
                                                     "z value", "Pr(>|z|)"))
  } else {
    format.coef <- if (any(abs(na.omit(z$coefficients)) < 
                             1e-04)) 
      format(z$coefficients, scientific = TRUE, digits = 4) else 
        round(z$coefficients, digits = 7)
    format.se <- if (any(na.omit(se_coef) < 1e-04)) 
      format(se_coef, scientific = TRUE, digits = 4) else 
        round(se_coef, digits = 7)
    format.pv <- if (any(na.omit(p) < 1e-04)) 
      format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
    param <- data.frame(format.coef, format.se, round(t1, 
                                                      digits = 4), format.pv)
    dimnames(param) <- list(names(z$coefficients), c(dn, 
                                                     "t value", "Pr(>|t|)"))
  }
  eps <- 10 * .Machine$double.eps
  if (z$family$family == "binomial") {
    if (any(z$mu > 1 - eps) || any(z$mu < eps)) 
      warning("fitted probabilities numerically 0 or 1 occurred")
  }
  if (z$family$family == "poisson") {
    if (any(z$mu < eps)) 
      warning("fitted rates numerically 0 occurred")
  }
  keep <- match(c("call", "terms", "family", "deviance", "aic", 
                  "df", "nulldev", "nulldf", "iter", "tol", "n", "convergence", 
                  "ngoodobs", "logLik", "RSS", "rank"), names(object), 
                0)
  ans <- c(object[keep], list(coefficients = param, dispersion = dispersion, 
                              correlation = correlation, cov.unscaled = inv, cov.scaled = inv * 
                                var_res))
  if (correlation) {
    ans$correl <- (inv * var_res)/outer(na.omit(se_coef), 
                                        na.omit(se_coef))
  }
  class(ans) <- "summary.speedglm"
  return(ans)
}

print.summary.speedglm <- function (x, digits = max(3, getOption("digits") - 3), ...) 
{
  cat("Generalized Linear Model of class 'speedglm':\n")
  if (!is.null(x$call)) 
    cat("\nCall: ", deparse(x$call), "\n\n")
  if (length(x$coef)) {
    cat("Coefficients:\n")
    cat(" ------------------------------------------------------------------", 
        "\n")
    sig <- function(z){
      if (!is.na(z)){
        if (z < 0.001) 
          "***"
        else if (z < 0.01) 
          "** "
        else if (z < 0.05) 
          "*  "
        else if (z < 0.1) 
          ".  "
        else "   "
      } else "   "
    }
    options(warn=-1)
    sig.1 <- sapply(as.numeric(as.character(x$coefficients[,4])), 
                    sig)
    options(warn=0)
    est.1 <- cbind(format(x$coefficients, digits = digits), 
                   sig.1)
    colnames(est.1)[ncol(est.1)] <- ""
    print(est.1)
    cat("\n")
    cat("-------------------------------------------------------------------", 
        "\n")
    cat("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1", 
        "\n")
    cat("\n")
  }
  else cat("No coefficients\n")
  cat("---\n")
  cat("null df: ", x$nulldf, "; null deviance: ", round(x$nulldev, 
                                                        digits = 2), ";\n", "residuals df: ", x$df, "; residuals deviance: ", 
      round(x$deviance, digits = 2), ";\n", "# obs.: ", x$n, 
      "; # non-zero weighted obs.: ", x$ngoodobs, ";\n", "AIC: ", 
      x$aic, "; log Likelihood: ", x$logLik, ";\n", "RSS: ", 
      round(x$RSS, digits = 1), "; dispersion: ", x$dispersion, 
      "; iterations: ", x$iter, ";\n", "rank: ", round(x$rank, 
                                                       digits = 1), "; max tolerance: ", format(x$tol, scientific = TRUE, 
                                                                                                digits = 3), "; convergence: ", x$convergence, ".\n", 
      sep = "")
  invisible(x)
  if (x$correlation) {
    cat("---\n")
    cat("Correlation of Coefficients:\n")
    x$correl[upper.tri(x$correl, diag = TRUE)] <- NA
    print(x$correl[-1, -nrow(x$correl)], na.print = "", digits = 2)
  }
}

Following 42' suggestion, I would also add the following:

environment(summary.speedglm) <- environment(speedglm)
environment(print.summary.speedglm) <- environment(speedglm)
like image 972
pietrop Avatar asked Jan 19 '16 21:01

pietrop


Video Answer


2 Answers

The print.summary.speedglm function has a tiny bug in it. If you change this line:

sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), sig))

To this line:

 sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|z|)")), sig))

And also run:

environment(print.summary.speedglm) <- environment(speedglm)

You will not see the error message anymore.

The proper way to report bugs is to contact the maintainer (I'll send him an email):

maintainer('speedglm')
[1] "Marco Enea <[email protected]>"
like image 173
IRTFM Avatar answered Oct 18 '22 00:10

IRTFM


It appears that this is a bug; in speedglm:::print.summary.speedglm there is the line:

        sig.1 <- sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), 
        sig)

but when you look at the object, you can see:

              Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.0546397  0.0655713 -0.8333    0.405
x1b         -0.0618225  0.0400126 -1.5451    0.122
x2           0.0020771  0.0019815  1.0483    0.295

which has a Pr(>|z|) instead of Pr(>|t|), so the sig stars fail.

like image 2
Neal Fultz Avatar answered Oct 18 '22 01:10

Neal Fultz