Skip to content

Commit

Permalink
pvcm: improved performance by collapse:rsplit, more elegant and effic…
Browse files Browse the repository at this point in the history
…ient approach tor check for sufficient data
  • Loading branch information
tappek committed Jul 6, 2024
1 parent 86b9e11 commit 7b674f6
Show file tree
Hide file tree
Showing 4 changed files with 440 additions and 432 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ importFrom(MASS,ginv)
importFrom(Rdpack,reprompt)
importFrom(bdsmatrix,bdsmatrix)
importFrom(collapse,GRP)
importFrom(collapse,GRPN)
importFrom(collapse,dapply)
importFrom(collapse,fbetween)
importFrom(collapse,fdroplevels)
Expand Down
2 changes: 1 addition & 1 deletion R/est_cce.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
#' \item{r.squared}{numeric, the R squared.}
#' @export
#' @importFrom MASS ginv
#' @importFrom collapse rsplit gsplit GRP
#' @importFrom collapse rsplit gsplit GRP GRPN
#' @author Giovanni Millo
#' @references
#'
Expand Down
45 changes: 26 additions & 19 deletions R/est_vcm.R
Original file line number Diff line number Diff line change
Expand Up @@ -496,25 +496,32 @@ print.summary.pvcm <- function(x, digits = max(3, getOption("digits") - 2),

est.ols <- function(mf, cond, effect, model) {
## helper function: estimate the single OLS regressions (used for pvcm's model = "random" as well as "within" )
ml <- split(mf, cond)
#ml <- collapse::rsplit(mf, cond) # does not yet work - TODO: check why (comment stemming from random model)
ols <- lapply(ml, function(x) {
X <- model.matrix(x)
if( (nrow(X) < ncol(X)) && model == "within"
|| (nrow(X) <= ncol(X)) && model == "random") {
## catch non-estimable model
## equality NROW NCOL: can estimate coefficients but not variance
## -> for "within" model: this is ok (output has variance NA/NaN)
# -> or "random" model: variance is needed for D2, chi-sq test, so rather be strict
error.msg <- paste0("insufficient number of observations for at least ",
"one group in ", effect, " dimension, so defined ",
"model is non-estimable")
stop(error.msg)
}
y <- pmodel.response(x)
r <- lm(y ~ X - 1, model = FALSE)
names(r$coefficients) <- colnames(X)
r})
mm <- model.matrix(mf)

## catch non-estimable model:
## check for obs. per individual vs. nr. of variables. equality: can estimate coefficients but not variance
## -> for "within" model: this is ok (output has variance NA/NaN)
# -> or "random" model: variance is needed for D2, chi-sq test, so rather be strict
error.msg <- paste0("insufficient number of observations for at least ",
"one group in ", effect, " dimension, so defined ",
"model is non-estimable")
cond <- GRP(cond)
Ti <- collapse::GRPN(cond, expand = FALSE)
ncolumns <- ncol(mm)
if(model == "within" && any(Ti < ncolumns)) stop(error.msg)
if(model == "random" && any(Ti <= ncolumns)) stop(error.msg)

# split data by group
X <- collapse::rsplit(mm, cond)
y <- collapse::rsplit(model.response(mf), cond)

# estimate OLS per group
ols <- mapply(function(X_i, y_i) {
r <- lm(y_i ~ X_i - 1, model = FALSE)
names(r$coefficients) <- colnames(X_i) # need to plug-in original coef names due to lm(., model = FALSE) losing them
r
}, X, y, SIMPLIFY = FALSE)

ols
}

Loading

0 comments on commit 7b674f6

Please sign in to comment.