ycroissant · tappek · Jan 2, 2025 · Jan 2, 2025 · Jan 2, 2025 · Jan 2, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: plm
 Version: 2.6-9999
-Date: 2024-04-04
+Date: 2025-01-02
 Title: Linear Models for Panel Data
 Authors@R: c(person(given = "Yves",      family = "Croissant",    role = c("aut"), email = "[email protected]"),
              person(given = "Giovanni",  family = "Millo",        role = c("aut"), email = "[email protected]"),

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@ subtitle: plm - Linear Models for Panel Data - A set of estimators and tests for
 # 2.6-9999 development version, changes since 2.6-4
 
 ### Fixes:
+* `pgmm`: for one-step GMM models: fix usual (non-robust) covariance matrix/standard errors.
 * `vcovXX`: FD models with only one observation per group prior to 
            first-differencing errored ([#58](https://github.com/ycroissant/plm/issues/58)).
 * `pggls`: FD models errored with the data constellation as described for `vcovXX`. 

diff --git a/R/est_gmm.R b/R/est_gmm.R
@@ -114,12 +114,25 @@
 #' 
 #' data("EmplUK", package = "plm")
 #' 
-#' ## Arellano and Bond (1991), table 4 col. b 
-#' z1 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
-#'            + log(capital) + lag(log(output), 0:1) | lag(log(emp), 2:99),
+#'# Arellano/Bond 1991, Table 4, column (a1) [non-robust SEs]
+#'ab.a1 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
+#'              + lag(log(capital), 0:2) + lag(log(output), 0:2) | lag(log(emp), 2:99),
+#'              data = EmplUK, effect = "twoways", model = "onestep")
+#'summary(ab.a1, robust = FALSE)
+#'
+#'# Arellano/Bond 1991, Table 4, column (a2) [non-robust SEs]
+#'ab.a2 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
+#'              + lag(log(capital), 0:2) + lag(log(output), 0:2) | lag(log(emp), 2:99),
+#'              data = EmplUK, effect = "twoways", model = "twosteps")
+#'summary(ab.a2, robust = FALSE)
+#'
+## Arellano and Bond (1991), table 4 col. b / # Windmeijer (2025), table 2, std. errc
+#'ab.b <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
+#'             + log(capital) + lag(log(output), 0:1) | lag(log(emp), 2:99),
 #'             data = EmplUK, effect = "twoways", model = "twosteps")
-#' summary(z1, robust = FALSE)
-#' 
+#'summary(ab.b, robust = FALSE) # Arellano/Bond
+#'summary(ab.b, robust = TRUE)  # Windmeijer
+#'
 #' ## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
 #' z2 <- pgmm(log(emp) ~ lag(log(emp), 1)+ lag(log(wage), 0:1) +
 #'            lag(log(capital), 0:1) | lag(log(emp), 2:99) +
@@ -528,19 +541,27 @@ pgmm <- function(formula, data, subset, na.action,
   } else solve(A1)
   A1 <- A1 * length(W)
 
+  # coefficients: Roodman (2019) formula (13, upper part for beta1) with B1 = (X'Z(Z'HZ)^(-1)Z'X)^(-1), A1 = (Z'HZ)^(-1)
   t.CP.WX.A1 <- t(crossprod(WX, A1))
   B1 <- solve(crossprod(WX, t.CP.WX.A1))
   Y1 <- crossprod(t.CP.WX.A1, Wy)
   coefficients <- as.numeric(crossprod(B1, Y1))
+
   if (effect == "twoways") names.coef <- c(names.coef, namesV)
   names(coefficients) <- names.coef
 
   residuals <- lapply(yX, function(x)
                       as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coefficients)))
   outresid <- lapply(residuals, function(x) outer(x, x))
-
-  # A2 is also needed in vcovHC.pggm for "onestep" model, hence calc. here and 
-  # include in model object also for onestep model
+
+  # non-robust variance-covariance matrix of one-step GMM:
+  # see Doornik/Arellano/Bond (2012), p. 31 (formula for V^hat1 with sig2 as in (4) on p. 30)
+  CPresid <- crossprod(unlist(residuals))
+  sig2 <- as.numeric(CPresid / (pdim$nT$N - NCOL(B1)))
+  vcov <- sig2 * B1
+
+  # A2 is also needed  for "onestep" model in vcovHC.pgmm, hence calc. here and 
+  # always include in model object 
   A2 <- mapply(function(x, y) crossprod(t(crossprod(x, y)), x), W, outresid, SIMPLIFY = FALSE)
   A2 <- Reduce("+", A2)
   minevA2 <- min(eigen(A2)$values)
@@ -553,27 +574,25 @@ pgmm <- function(formula, data, subset, na.action,
     coef1s <- coefficients
     t.CP.WX.A2 <- t(crossprod(WX, A2))
     Y2 <- crossprod(t.CP.WX.A2, Wy)
-    B2 <- solve(crossprod(WX, t.CP.WX.A2))
-    coefficients <- as.numeric(crossprod(B2, Y2))
-    names(coefficients) <- names.coef
+    vcov <- solve(crossprod(WX, t.CP.WX.A2))
+    coef2s <- as.numeric(crossprod(vcov, Y2))
+    names(coef2s) <- names.coef
+    coefficients <- list("step1" = coef1s, "step2" = coef2s)
 
     # calc. residuals with coefs from 2nd step
     residuals <- lapply(yX, function(x){
                            nz <- rownames(x)
-                           z <- as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coefficients))
+                           z <- as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coef2s))
                            names(z) <- nz
                            z})
-    vcov <- B2
-  } else vcov <- B1
+  }
 
   rownames(vcov) <- colnames(vcov) <- names.coef
 
   # TODO: yX does not contain the original data (but first-diff-ed data) -> fitted.values not what you would expect
   fitted.values <- mapply(function(x, y) x[ , 1L] - y, yX, residuals)
   # fitted.values <- data[ , 1L] - unlist(residuals) # in 'data' is original data, but obs lost due to diff-ing are not dropped -> format incompatible
 
-  if(model == "twosteps") coefficients <- list(coef1s, coefficients)
-
   args <- list(model          = model,
                effect         = effect,
                transformation = transformation,
@@ -589,6 +608,7 @@ pgmm <- function(formula, data, subset, na.action,
                  W             = W,
                  A1            = A1,
                  A2            = A2,
+                 B1            = B1,
                  call          = cl,
                  args          = args)
 
@@ -783,9 +803,9 @@ summary.pgmm <- function(object, robust = TRUE, time.dummies = FALSE, ...) {
   K <- if(model == "onestep") length(object$coefficients)
        else                   length(object$coefficients[[2L]])
   object$sargan <- sargan(object, "twosteps")
-  object$m1 <- mtest(object, order = 1, vcov = vv)
+  object$m1 <- mtest(object, order = 1L, vcov = vv)
   # mtest with order = 2 is only feasible if more than 2 observations are present
-  if(NROW(object$model[[1]]) > 2) object$m2 <- mtest(object, order = 2, vcov = vv)
+  if(NROW(object$model[[1L]]) > 2L) object$m2 <- mtest(object, order = 2L, vcov = vv)
   object$wald.coef <- pwaldtest(object, param = "coef", vcov = vv)
   if(effect == "twoways") object$wald.td <- pwaldtest(object, param = "time", vcov = vv)
   Kt <- length(object$args$namest)
@@ -914,6 +934,8 @@ print.summary.pgmm <- function(x, digits = max(3, getOption("digits") - 2),
                       model.pgmm.transformation.list[transformation], sep = " ")
   cat(paste(model.text, "\n"))
   ## TODO: add info about collapse argument in printed output
+
+  ## TODO: print information about non-robust/robust SE, see, e.g., print.summary.plm
 
   cat("\nCall:\n")
   print(x$call)

diff --git a/R/tool_vcovG.R b/R/tool_vcovG.R
@@ -1225,6 +1225,7 @@ vcovHC.pgmm <- function(x, ...) {
   transformation <- describe(x, "transformation")
   A1 <- x$A1
   A2 <- x$A2
+  B1 <- x$B1
 
   if(transformation == "ld") {
 ##     yX <- lapply(x$model,function(x) rbind(diff(x),x))
@@ -1237,51 +1238,44 @@ vcovHC.pgmm <- function(x, ...) {
     yX <- x$model
     residuals <- x$residuals
   }
+
   minevA2 <- min(abs(Re(eigen(A2)$values)))
   eps <- 1E-9
 
   SA2 <- if(minevA2 < eps){
     warning("a general inverse is used")
     ginv(A2)
   } else solve(A2)
-
+
+  WX <- Reduce("+", mapply(function(w, y) crossprod(w, y[ , -1L, drop = FALSE]), x$W, yX, SIMPLIFY = FALSE))
+
+  # robust vcov for one-step GMM, see Roodman (2009), formula (15)
+  vcovr1s <- B1 %*% (t(WX) %*% A1 %*% SA2 %*% A1 %*% WX) %*% B1
+
   if(model == "twosteps") {
     coef1s <- x$coefficients[[1L]]
     res1s <- lapply(yX, function(x) x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coef1s))
     K <- ncol(yX[[1L]])
     D <- c()
-    WX <- Reduce("+",
-                 mapply(function(x, y) crossprod(x, y[ , -1L, drop = FALSE]), x$W, yX, SIMPLIFY = FALSE))
     We <- Reduce("+", mapply(function(x, y) crossprod(x, y), x$W, residuals, SIMPLIFY = FALSE))
-    B1 <- solve(t(WX) %*% A1 %*% WX)
-    vcov1s <- B1 %*% (t(WX) %*% A1 %*% SA2 %*% A1 %*% WX) %*% B1 # robust vcov for one-step model
     for (k in 2:K) {
-      exk <- mapply(
-                    function(x, y){
+      exk <- mapply(function(x, y){
                       z <- crossprod(t(x[ , k, drop = FALSE]), t(y))
                       return(- z - t(z))
                     },
                     yX, res1s, SIMPLIFY = FALSE)
-      wexkw <- Reduce("+",
-                      mapply(
-                             function(x, y)
-                             crossprod(x, crossprod(y, x)),
-                             x$W, exk, SIMPLIFY = FALSE))
+
+      wexkw <- Reduce("+", mapply(function(x, y) 
+                                    crossprod(x, crossprod(y, x)),
+                                  x$W, exk, SIMPLIFY = FALSE))
       B2 <- vcov(x)
       Dk <- -B2 %*% t(WX) %*% A2 %*% wexkw %*% A2 %*% We
       D <- cbind(D, Dk)
     }
-    vcovr <- B2 + crossprod(t(D), B2) + t(crossprod(t(D), B2)) + D %*% vcov1s %*% t(D) # robust vcov for twosteps model, Roodman (2019) form. (18)
-  }
-  else {
-    # model = "onestep"
-    res1s <- lapply(yX, function(z) z[ , 1L] - crossprod(t(z[ , -1L, drop = FALSE]), x$coefficients))
-    K <- ncol(yX[[1L]])
-    WX <- Reduce("+", mapply(function(z, y) crossprod(z[ , -1L, drop = FALSE], y), yX, x$W, SIMPLIFY = FALSE))
-    B1 <- vcov(x)
-    vcovr <- B1 %*% (WX %*% A1 %*% SA2 %*% A1 %*% t(WX)) %*% B1 # robust vcov onestep model, Roodman (2019) form. (15)
+    # robust vcov for twosteps GMM model, Roodman (2019) form. (18)
+    vcovr2s <- B2 + crossprod(t(D), B2) + t(crossprod(t(D), B2)) + D %*% vcovr1s %*% t(D)
   }
-  vcovr
+  if(model == "twosteps") vcovr2s else vcovr1s
 }
 
 

diff --git a/inst/tests/test_misc.R b/inst/tests/test_misc.R
@@ -288,7 +288,7 @@ z1 <- pgmm(log(emp) ~ log(wage) + log(capital) + log(output),
              gmm.inst = ~log(emp), lag.gmm = list(c(2,99)))
 summary(z1, robust = FALSE)
 
-## Blundell and Bond (1998) table 4 (cf DPD for OX p. 12 col. 4)
+## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
 z2 <- pgmm(dynformula(log(emp) ~ log(wage) + log(capital), list(1,1,1)),
              data = EmplUK, effect = "twoways", model = "onestep",
              gmm.inst = ~log(emp) + log(wage) + log(capital),

diff --git a/inst/tests/test_misc.Rout.save b/inst/tests/test_misc.Rout.save
@@ -1,5 +1,5 @@
 
-R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life"
+R version 4.4.2 (2024-10-31 ucrt) -- "Pile of Leaves"
 Copyright (C) 2024 The R Foundation for Statistical Computing
 Platform: x86_64-w64-mingw32/x64
 
@@ -1493,8 +1493,8 @@ lag(log(capital), 0:1)1 -0.424393   0.058479 -7.2572 3.952e-13 ***
 Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 
 Sargan test: chisq(100) = 118.763 (p-value = 0.097096)
-Autocorrelation test (1): normal = -4.808434 (p-value = 1.5212e-06)
-Autocorrelation test (2): normal = -0.2800133 (p-value = 0.77947)
+Autocorrelation test (1): normal = -5.519159 (p-value = 3.4063e-08)
+Autocorrelation test (2): normal = -0.2524777 (p-value = 0.80067)
 Wald test for coefficients: chisq(5) = 11174.82 (p-value = < 2.22e-16)
 Wald test for time dummies: chisq(7) = 14.71138 (p-value = 0.039882)
 > 
@@ -1542,7 +1542,7 @@ Autocorrelation test (2): normal = -0.3325401 (p-value = 0.73948)
 Wald test for coefficients: chisq(7) = 371.9877 (p-value = < 2.22e-16)
 Wald test for time dummies: chisq(6) = 26.9045 (p-value = 0.0001509)
 > 
-> ## Blundell and Bond (1998) table 4 (cf DPD for OX p. 12 col. 4)
+> ## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
 > z2 <- pgmm(dynformula(log(emp) ~ log(wage) + log(capital), list(1,1,1)),
 +              data = EmplUK, effect = "twoways", model = "onestep",
 +              gmm.inst = ~log(emp) + log(wage) + log(capital),
@@ -1579,8 +1579,8 @@ lag(log(capital), 1) -0.424393   0.058479 -7.2572 3.952e-13 ***
 Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 
 Sargan test: chisq(100) = 118.763 (p-value = 0.097096)
-Autocorrelation test (1): normal = -4.808434 (p-value = 1.5212e-06)
-Autocorrelation test (2): normal = -0.2800133 (p-value = 0.77947)
+Autocorrelation test (1): normal = -5.519159 (p-value = 3.4063e-08)
+Autocorrelation test (2): normal = -0.2524777 (p-value = 0.80067)
 Wald test for coefficients: chisq(5) = 11174.82 (p-value = < 2.22e-16)
 Wald test for time dummies: chisq(7) = 14.71138 (p-value = 0.039882)
 > 
@@ -1891,4 +1891,4 @@ Coefficients:
 > 
 > proc.time()
    user  system elapsed 
-  10.04    0.75   10.76 
+   9.45    0.90   10.32