Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

n_factors(): Add Variance_Cumulative (explained variance) to summary #921

Merged
merged 15 commits into from
Dec 3, 2023
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
Version: 0.21.3.4
Version: 0.21.3.5
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
* `model_parameters()` for models of package *survey* now gives informative
messages when `bootstrap = TRUE` (which is currently not supported).

* `n_factors()` now also returns the explained variance for the number of
factors as attributes.

# parameters 0.21.3

## Changes
Expand Down
9 changes: 9 additions & 0 deletions R/n_factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#' implemented in the [**see**-package](https://easystats.github.io/see/).
#' `n_components()` is a convenient short-cut for `n_factors(type = "PCA")`.
#'
#' @examplesIf require("PCDimension", quietly = TRUE) && require("nFactors", quietly = TRUE) && require("EGAnet", quietly = TRUE) && require("psych", quietly = TRUE)

Check warning on line 48 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=48,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 165 characters.
#' library(parameters)
#' n_factors(mtcars, type = "PCA")
#'
Expand Down Expand Up @@ -121,10 +121,10 @@

# Get number of observations
if (is.data.frame(x)) {
nobs <- nrow(x)

Check warning on line 124 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=124,col=5,[object_overwrite_linter] 'nobs' is an exported object from package 'stats'. Avoid re-using such symbols.
} else {
if (is.numeric(x) && !is.null(cor)) {
nobs <- x

Check warning on line 127 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=127,col=7,[object_overwrite_linter] 'nobs' is an exported object from package 'stats'. Avoid re-using such symbols.
package <- package[!package %in% c("pcdimension", "PCDimension")]
} else if (is.matrix(x) || inherits(x, "easycormatrix")) {
insight::format_error(
Expand Down Expand Up @@ -340,6 +340,15 @@
n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out)))
)

# Add cumulative percentage of variance explained
fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient)

Check warning on line 344 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=344,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 146 characters.
varex <- attributes(fa)$summary
# Extract number of factors from EFA output (usually MR1, ML1, etc.)
varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE))
# Merge (and like that filter out empty methods)
by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors")

attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting)
attr(out, "summary") <- by_factors
attr(out, "n") <- min(as.numeric(as.character(
by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"]
Expand Down Expand Up @@ -394,7 +403,7 @@


# Text
text <- paste0(

Check warning on line 406 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=406,col=3,[object_overwrite_linter] 'text' is an exported object from package 'graphics'. Avoid re-using such symbols.
"The choice of ",
as.character(best_n),
ifelse(type == "factor", " dimensions ", " clusters "),
Expand Down Expand Up @@ -751,7 +760,7 @@
CRMS <- rez[!is.na(rez$CRMS) & rez$CRMS <= target, "n"][1]
}
# BIC (this is a penalized method so we can just take the one that minimizes it)
BIC <- ifelse(all(is.na(rez$BIC)), NA, rez[!is.na(rez$BIC) & rez$BIC == min(rez$BIC, na.rm = TRUE), "n"])

Check warning on line 763 in R/n_factors.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/n_factors.R,line=763,col=3,[object_overwrite_linter] 'BIC' is an exported object from package 'stats'. Avoid re-using such symbols.

.data_frame(
n_Factors = c(fit_off, TLI, RMSEA, RMSR, CRMS, BIC),
Expand Down
16 changes: 8 additions & 8 deletions R/standardize_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
types <- parameters_type(model)
# model_matrix <- as.data.frame(stats::model.matrix(model))
model_matrix <- as.data.frame(insight::get_modelmatrix(model))
data <- insight::get_data(model, source = "mf", verbose = FALSE)

Check warning on line 55 in R/standardize_info.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/standardize_info.R,line=55,col=3,[object_overwrite_linter] 'data' is an exported object from package 'utils'. Avoid re-using such symbols.
wgts <- insight::get_weights(model, na_rm = TRUE)

# validation check for ZI
Expand Down Expand Up @@ -181,7 +181,7 @@
# Get deviations for all parameters
means <- deviations <- rep(NA_real_, times = length(params))
for (i in seq_along(params)) {
var <- params[i]

Check warning on line 184 in R/standardize_info.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/standardize_info.R,line=184,col=5,[object_overwrite_linter] 'var' is an exported object from package 'stats'. Avoid re-using such symbols.
info <- .std_info_predictor_smart(
data = data,
variable = types[types$Parameter == var, "Variable"],
Expand Down Expand Up @@ -213,7 +213,7 @@
two_sd = FALSE,
weights = NULL,
...) {
if (type == "intercept") {

Check warning on line 216 in R/standardize_info.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/standardize_info.R,line=216,col=3,[if_switch_linter] Prefer switch() statements over repeated if/else equality tests, e.g., switch(x, a = 1, b = 2) over if (x == "a") 1 else if (x == "b") 2.
info <- list(sd = 0, mean = 0)
} else if (type == "numeric") {
info <- .compute_std_info(
Expand Down Expand Up @@ -272,7 +272,7 @@
# Get deviations for all parameters
means <- deviations <- rep(NA_real_, length = length(names(model_matrix)))
for (i in seq_along(names(model_matrix))) {
var <- names(model_matrix)[i]

Check warning on line 275 in R/standardize_info.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/standardize_info.R,line=275,col=5,[object_overwrite_linter] 'var' is an exported object from package 'stats'. Avoid re-using such symbols.
if (types[i, "Type"] == "intercept") {
means[i] <- deviations[i] <- 0
} else {
Expand Down Expand Up @@ -382,12 +382,12 @@
}

if (info$is_linear) {
if (!robust) {
sd_y <- datawizard::weighted_sd(response, w)
mean_y <- datawizard::weighted_mean(response, w)
} else {
if (robust) {
sd_y <- datawizard::weighted_mad(response, w)
mean_y <- datawizard::weighted_median(response, w)
} else {
sd_y <- datawizard::weighted_sd(response, w)
mean_y <- datawizard::weighted_mean(response, w)
}
} else {
sd_y <- 1
Expand Down Expand Up @@ -567,12 +567,12 @@
response <- as.numeric(data[, variable])
}

if (!robust) {
sd_x <- datawizard::weighted_sd(response, weights)
mean_x <- datawizard::weighted_mean(response, weights)
} else {
if (robust) {
sd_x <- datawizard::weighted_mad(response, weights)
mean_x <- datawizard::weighted_median(response, weights)
} else {
sd_x <- datawizard::weighted_sd(response, weights)
mean_x <- datawizard::weighted_mean(response, weights)
}

list(sd = f * sd_x, mean = mean_x)
Expand Down
57 changes: 27 additions & 30 deletions R/utils_pca_efa.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) {


x <- as.data.frame(t(x[, cols]))
x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x)
x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x)
names(x) <- c("Parameter", attributes(object)$summary$Component)
row.names(x) <- NULL

Expand Down Expand Up @@ -149,40 +149,37 @@ predict.parameters_efa <- function(object,
verbose = TRUE,
...) {
attri <- attributes(object)
if (is.null(newdata)) {
if ("scores" %in% names(attri)) {
out <- as.data.frame(attri$scores)
if (isTRUE(keep_na)) {
out <- .merge_na(object, out, verbose)
}
} else {
if ("dataset" %in% names(attri)) {
out <- as.data.frame(stats::predict(attri$model, data = attri$dataset))

if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) {
if (is.null(newdata)) {
if ("scores" %in% names(attri)) {
out <- as.data.frame(attri$scores)
if (isTRUE(keep_na)) {
# Because pre-made scores don't preserve NA
out <- .merge_na(object, out)
}
} else {
insight::format_error(
"Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}."
)
d <- attri$data_set
d <- d[vapply(d, is.numeric, logical(1))]
out <- as.data.frame(stats::predict(attri$model, data = d))
}
}
} else {
if (inherits(attri$model, c("psych", "fa"))) {
# Clean-up newdata (keep only the variables used in the model)
newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there
# psych:::predict.fa(object, data)
out <- as.data.frame(stats::predict(attri$model, data = newdata))
} else if (inherits(attri$model, "spca")) {
# https://github.com/erichson/spca/issues/7
newdata <- newdata[names(attri$model$center)]
if (attri$standardize) {
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
}
out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
} else {
out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...))
# psych:::predict.principal(object, data)
out <- stats::predict(attri$model, data = newdata)
}
} else if (inherits(attri$model, "spca")) {
# https://github.com/erichson/spca/issues/7
newdata <- newdata[names(attri$model$center)]
if (attri$standardize) {
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
}
out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
} else {
out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...))
}

if (!is.null(names)) {
names(out)[seq_along(names)] <- names
}
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-n_factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", {
)
)
})

test_that("n_factors, variance explained", {
skip_on_cran()
skip_if_not_installed("nFactors")
skip_if_not_installed("psych")
set.seed(333)
x <- n_factors(mtcars[, 1:4], type = "PCA")
expect_equal(
attributes(x)$Variance_Explained$Variance_Cumulative,
c(0.84126, 0.85088, 0.85859, 0.85859),
tolerance = 1e-4
)
})
44 changes: 9 additions & 35 deletions tests/testthat/test-pca.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ test_that("principal_components", {

test_that("principal_components, n", {
data(iris)
x <- parameters::principal_components(iris[1:4], n = 2)
x <- principal_components(iris[1:4], n = 2)
expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))

x <- parameters::principal_components(iris[1:4], n = 1)
x <- principal_components(iris[1:4], n = 1)
expect_named(x, c("Variable", "PC1", "Complexity"))
})


test_that("principal_components", {
x <- parameters::principal_components(mtcars[, 1:7])
x <- principal_components(mtcars[, 1:7])

expect_equal(
x$PC1,
Expand All @@ -55,38 +55,17 @@ test_that("principal_components", {
)

expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
})

test_that("principal_components", {
x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2))
expect_equal(
x$RC1,
c(
-0.836114674884308,
0.766808147590597,
0.85441780762136,
0.548502661888057,
-0.889046093964722,
0.931879020871552,
-0.030485507571411
),
tolerance = 0.01
)

expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness"))
expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L))
expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L))
expect_identical(dim(predict(x)), c(32L, 2L))
})


# predict ----------------------
# N.B tests will fail if `GPArotation` package is not installed

d <- na.omit(psych::bfi[, 1:25])
model <- psych::fa(d, nfactors = 5)
mp <- model_parameters(model, sort = TRUE, threshold = "max")

test_that("predict model_parameters fa", {
d <- na.omit(psych::bfi[, 1:25])
model <- psych::fa(d, nfactors = 5)
mp <- model_parameters(model, sort = TRUE, threshold = "max")
pr <- suppressMessages(
predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness"))
)
Expand All @@ -103,17 +82,12 @@ test_that("predict model_parameters fa", {
)
expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L)
expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
})


test_that("predict factor_analysis", {
model <- factor_analysis(d, n = 5)
expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
expect_named(
predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE),
c("A", "B", "C", "D", "E")
)
model <- factor_analysis(d, n = 5)
expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
})

unloadNamespace("GPArotation")
Loading