Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pgmm: fix one-step covariance matrix #62

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: plm
Version: 2.6-9999
Date: 2024-04-04
Date: 2025-01-02
Title: Linear Models for Panel Data
Authors@R: c(person(given = "Yves", family = "Croissant", role = c("aut"), email = "[email protected]"),
person(given = "Giovanni", family = "Millo", role = c("aut"), email = "[email protected]"),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ subtitle: plm - Linear Models for Panel Data - A set of estimators and tests for
# 2.6-9999 development version, changes since 2.6-4

### Fixes:
* `pgmm`: for one-step GMM models: fix usual (non-robust) covariance matrix/standard errors.
* `vcovXX`: FD models with only one observation per group prior to
first-differencing errored ([#58](https://github.com/ycroissant/plm/issues/58)).
* `pggls`: FD models errored with the data constellation as described for `vcovXX`.
Expand Down
58 changes: 40 additions & 18 deletions R/est_gmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,25 @@
#'
#' data("EmplUK", package = "plm")
#'
#' ## Arellano and Bond (1991), table 4 col. b
#' z1 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
#' + log(capital) + lag(log(output), 0:1) | lag(log(emp), 2:99),
#'# Arellano/Bond 1991, Table 4, column (a1) [non-robust SEs]
#'ab.a1 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
#' + lag(log(capital), 0:2) + lag(log(output), 0:2) | lag(log(emp), 2:99),
#' data = EmplUK, effect = "twoways", model = "onestep")
#'summary(ab.a1, robust = FALSE)
#'
#'# Arellano/Bond 1991, Table 4, column (a2) [non-robust SEs]
#'ab.a2 <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
#' + lag(log(capital), 0:2) + lag(log(output), 0:2) | lag(log(emp), 2:99),
#' data = EmplUK, effect = "twoways", model = "twosteps")
#'summary(ab.a2, robust = FALSE)
#'
## Arellano and Bond (1991), table 4 col. b / # Windmeijer (2025), table 2, std. errc
#'ab.b <- pgmm(log(emp) ~ lag(log(emp), 1:2) + lag(log(wage), 0:1)
#' + log(capital) + lag(log(output), 0:1) | lag(log(emp), 2:99),
#' data = EmplUK, effect = "twoways", model = "twosteps")
#' summary(z1, robust = FALSE)
#'
#'summary(ab.b, robust = FALSE) # Arellano/Bond
#'summary(ab.b, robust = TRUE) # Windmeijer
#'
#' ## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
#' z2 <- pgmm(log(emp) ~ lag(log(emp), 1)+ lag(log(wage), 0:1) +
#' lag(log(capital), 0:1) | lag(log(emp), 2:99) +
Expand Down Expand Up @@ -528,19 +541,27 @@ pgmm <- function(formula, data, subset, na.action,
} else solve(A1)
A1 <- A1 * length(W)

# coefficients: Roodman (2019) formula (13, upper part for beta1) with B1 = (X'Z(Z'HZ)^(-1)Z'X)^(-1), A1 = (Z'HZ)^(-1)
t.CP.WX.A1 <- t(crossprod(WX, A1))
B1 <- solve(crossprod(WX, t.CP.WX.A1))
Y1 <- crossprod(t.CP.WX.A1, Wy)
coefficients <- as.numeric(crossprod(B1, Y1))

if (effect == "twoways") names.coef <- c(names.coef, namesV)
names(coefficients) <- names.coef

residuals <- lapply(yX, function(x)
as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coefficients)))
outresid <- lapply(residuals, function(x) outer(x, x))

# A2 is also needed in vcovHC.pggm for "onestep" model, hence calc. here and
# include in model object also for onestep model

# non-robust variance-covariance matrix of one-step GMM:
# see Doornik/Arellano/Bond (2012), p. 31 (formula for V^hat1 with sig2 as in (4) on p. 30)
CPresid <- crossprod(unlist(residuals))
sig2 <- as.numeric(CPresid / (pdim$nT$N - NCOL(B1)))
vcov <- sig2 * B1

# A2 is also needed for "onestep" model in vcovHC.pgmm, hence calc. here and
# always include in model object
A2 <- mapply(function(x, y) crossprod(t(crossprod(x, y)), x), W, outresid, SIMPLIFY = FALSE)
A2 <- Reduce("+", A2)
minevA2 <- min(eigen(A2)$values)
Expand All @@ -553,27 +574,25 @@ pgmm <- function(formula, data, subset, na.action,
coef1s <- coefficients
t.CP.WX.A2 <- t(crossprod(WX, A2))
Y2 <- crossprod(t.CP.WX.A2, Wy)
B2 <- solve(crossprod(WX, t.CP.WX.A2))
coefficients <- as.numeric(crossprod(B2, Y2))
names(coefficients) <- names.coef
vcov <- solve(crossprod(WX, t.CP.WX.A2))
coef2s <- as.numeric(crossprod(vcov, Y2))
names(coef2s) <- names.coef
coefficients <- list("step1" = coef1s, "step2" = coef2s)

# calc. residuals with coefs from 2nd step
residuals <- lapply(yX, function(x){
nz <- rownames(x)
z <- as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coefficients))
z <- as.vector(x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coef2s))
names(z) <- nz
z})
vcov <- B2
} else vcov <- B1
}

rownames(vcov) <- colnames(vcov) <- names.coef

# TODO: yX does not contain the original data (but first-diff-ed data) -> fitted.values not what you would expect
fitted.values <- mapply(function(x, y) x[ , 1L] - y, yX, residuals)
# fitted.values <- data[ , 1L] - unlist(residuals) # in 'data' is original data, but obs lost due to diff-ing are not dropped -> format incompatible

if(model == "twosteps") coefficients <- list(coef1s, coefficients)

args <- list(model = model,
effect = effect,
transformation = transformation,
Expand All @@ -589,6 +608,7 @@ pgmm <- function(formula, data, subset, na.action,
W = W,
A1 = A1,
A2 = A2,
B1 = B1,
call = cl,
args = args)

Expand Down Expand Up @@ -783,9 +803,9 @@ summary.pgmm <- function(object, robust = TRUE, time.dummies = FALSE, ...) {
K <- if(model == "onestep") length(object$coefficients)
else length(object$coefficients[[2L]])
object$sargan <- sargan(object, "twosteps")
object$m1 <- mtest(object, order = 1, vcov = vv)
object$m1 <- mtest(object, order = 1L, vcov = vv)
# mtest with order = 2 is only feasible if more than 2 observations are present
if(NROW(object$model[[1]]) > 2) object$m2 <- mtest(object, order = 2, vcov = vv)
if(NROW(object$model[[1L]]) > 2L) object$m2 <- mtest(object, order = 2L, vcov = vv)
object$wald.coef <- pwaldtest(object, param = "coef", vcov = vv)
if(effect == "twoways") object$wald.td <- pwaldtest(object, param = "time", vcov = vv)
Kt <- length(object$args$namest)
Expand Down Expand Up @@ -914,6 +934,8 @@ print.summary.pgmm <- function(x, digits = max(3, getOption("digits") - 2),
model.pgmm.transformation.list[transformation], sep = " ")
cat(paste(model.text, "\n"))
## TODO: add info about collapse argument in printed output

## TODO: print information about non-robust/robust SE, see, e.g., print.summary.plm

cat("\nCall:\n")
print(x$call)
Expand Down
38 changes: 16 additions & 22 deletions R/tool_vcovG.R
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,7 @@ vcovHC.pgmm <- function(x, ...) {
transformation <- describe(x, "transformation")
A1 <- x$A1
A2 <- x$A2
B1 <- x$B1

if(transformation == "ld") {
## yX <- lapply(x$model,function(x) rbind(diff(x),x))
Expand All @@ -1237,51 +1238,44 @@ vcovHC.pgmm <- function(x, ...) {
yX <- x$model
residuals <- x$residuals
}

minevA2 <- min(abs(Re(eigen(A2)$values)))
eps <- 1E-9

SA2 <- if(minevA2 < eps){
warning("a general inverse is used")
ginv(A2)
} else solve(A2)


WX <- Reduce("+", mapply(function(w, y) crossprod(w, y[ , -1L, drop = FALSE]), x$W, yX, SIMPLIFY = FALSE))

# robust vcov for one-step GMM, see Roodman (2009), formula (15)
vcovr1s <- B1 %*% (t(WX) %*% A1 %*% SA2 %*% A1 %*% WX) %*% B1

if(model == "twosteps") {
coef1s <- x$coefficients[[1L]]
res1s <- lapply(yX, function(x) x[ , 1L] - crossprod(t(x[ , -1L, drop = FALSE]), coef1s))
K <- ncol(yX[[1L]])
D <- c()
WX <- Reduce("+",
mapply(function(x, y) crossprod(x, y[ , -1L, drop = FALSE]), x$W, yX, SIMPLIFY = FALSE))
We <- Reduce("+", mapply(function(x, y) crossprod(x, y), x$W, residuals, SIMPLIFY = FALSE))
B1 <- solve(t(WX) %*% A1 %*% WX)
vcov1s <- B1 %*% (t(WX) %*% A1 %*% SA2 %*% A1 %*% WX) %*% B1 # robust vcov for one-step model
for (k in 2:K) {
exk <- mapply(
function(x, y){
exk <- mapply(function(x, y){
z <- crossprod(t(x[ , k, drop = FALSE]), t(y))
return(- z - t(z))
},
yX, res1s, SIMPLIFY = FALSE)
wexkw <- Reduce("+",
mapply(
function(x, y)
crossprod(x, crossprod(y, x)),
x$W, exk, SIMPLIFY = FALSE))

wexkw <- Reduce("+", mapply(function(x, y)
crossprod(x, crossprod(y, x)),
x$W, exk, SIMPLIFY = FALSE))
B2 <- vcov(x)
Dk <- -B2 %*% t(WX) %*% A2 %*% wexkw %*% A2 %*% We
D <- cbind(D, Dk)
}
vcovr <- B2 + crossprod(t(D), B2) + t(crossprod(t(D), B2)) + D %*% vcov1s %*% t(D) # robust vcov for twosteps model, Roodman (2019) form. (18)
}
else {
# model = "onestep"
res1s <- lapply(yX, function(z) z[ , 1L] - crossprod(t(z[ , -1L, drop = FALSE]), x$coefficients))
K <- ncol(yX[[1L]])
WX <- Reduce("+", mapply(function(z, y) crossprod(z[ , -1L, drop = FALSE], y), yX, x$W, SIMPLIFY = FALSE))
B1 <- vcov(x)
vcovr <- B1 %*% (WX %*% A1 %*% SA2 %*% A1 %*% t(WX)) %*% B1 # robust vcov onestep model, Roodman (2019) form. (15)
# robust vcov for twosteps GMM model, Roodman (2019) form. (18)
vcovr2s <- B2 + crossprod(t(D), B2) + t(crossprod(t(D), B2)) + D %*% vcovr1s %*% t(D)
}
vcovr
if(model == "twosteps") vcovr2s else vcovr1s
}


Expand Down
2 changes: 1 addition & 1 deletion inst/tests/test_misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ z1 <- pgmm(log(emp) ~ log(wage) + log(capital) + log(output),
gmm.inst = ~log(emp), lag.gmm = list(c(2,99)))
summary(z1, robust = FALSE)

## Blundell and Bond (1998) table 4 (cf DPD for OX p. 12 col. 4)
## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
z2 <- pgmm(dynformula(log(emp) ~ log(wage) + log(capital), list(1,1,1)),
data = EmplUK, effect = "twoways", model = "onestep",
gmm.inst = ~log(emp) + log(wage) + log(capital),
Expand Down
14 changes: 7 additions & 7 deletions inst/tests/test_misc.Rout.save
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life"
R version 4.4.2 (2024-10-31 ucrt) -- "Pile of Leaves"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

Expand Down Expand Up @@ -1493,8 +1493,8 @@ lag(log(capital), 0:1)1 -0.424393 0.058479 -7.2572 3.952e-13 ***
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sargan test: chisq(100) = 118.763 (p-value = 0.097096)
Autocorrelation test (1): normal = -4.808434 (p-value = 1.5212e-06)
Autocorrelation test (2): normal = -0.2800133 (p-value = 0.77947)
Autocorrelation test (1): normal = -5.519159 (p-value = 3.4063e-08)
Autocorrelation test (2): normal = -0.2524777 (p-value = 0.80067)
Wald test for coefficients: chisq(5) = 11174.82 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(7) = 14.71138 (p-value = 0.039882)
>
Expand Down Expand Up @@ -1542,7 +1542,7 @@ Autocorrelation test (2): normal = -0.3325401 (p-value = 0.73948)
Wald test for coefficients: chisq(7) = 371.9877 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(6) = 26.9045 (p-value = 0.0001509)
>
> ## Blundell and Bond (1998) table 4 (cf DPD for OX p. 12 col. 4)
> ## Blundell and Bond (1998) table 4 (cf. DPD for OX p. 12 col. 4)
> z2 <- pgmm(dynformula(log(emp) ~ log(wage) + log(capital), list(1,1,1)),
+ data = EmplUK, effect = "twoways", model = "onestep",
+ gmm.inst = ~log(emp) + log(wage) + log(capital),
Expand Down Expand Up @@ -1579,8 +1579,8 @@ lag(log(capital), 1) -0.424393 0.058479 -7.2572 3.952e-13 ***
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sargan test: chisq(100) = 118.763 (p-value = 0.097096)
Autocorrelation test (1): normal = -4.808434 (p-value = 1.5212e-06)
Autocorrelation test (2): normal = -0.2800133 (p-value = 0.77947)
Autocorrelation test (1): normal = -5.519159 (p-value = 3.4063e-08)
Autocorrelation test (2): normal = -0.2524777 (p-value = 0.80067)
Wald test for coefficients: chisq(5) = 11174.82 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(7) = 14.71138 (p-value = 0.039882)
>
Expand Down Expand Up @@ -1891,4 +1891,4 @@ Coefficients:
>
> proc.time()
user system elapsed
10.04 0.75 10.76
9.45 0.90 10.32
Loading
Loading