easystats · strengejacke · Dec 3, 2023 · Nov 30, 2023 · Nov 30, 2023 · Nov 30, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: parameters
 Title: Processing of Model Parameters
-Version: 0.21.3.4
+Version: 0.21.3.5
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",

diff --git a/NEWS.md b/NEWS.md
@@ -11,6 +11,9 @@
 * `model_parameters()` for models of package *survey* now gives informative
   messages when `bootstrap = TRUE` (which is currently not supported).
 
+* `n_factors()` now also returns the explained variance for the number of
+  factors as attributes.
+
 # parameters 0.21.3
 
 ## Changes

diff --git a/R/n_factors.R b/R/n_factors.R
@@ -45,7 +45,7 @@
 #'   implemented in the [**see**-package](https://easystats.github.io/see/).
 #'   `n_components()` is a convenient short-cut  for `n_factors(type = "PCA")`.
 #'
 #' @examplesIf require("PCDimension", quietly = TRUE) && require("nFactors", quietly = TRUE) && require("EGAnet", quietly = TRUE) && require("psych", quietly = TRUE)
 #' library(parameters)
 #' n_factors(mtcars, type = "PCA")
 #'
@@ -121,10 +121,10 @@

  # Get number of observations
  if (is.data.frame(x)) {
    nobs <- nrow(x)
  } else {
    if (is.numeric(x) && !is.null(cor)) {
      nobs <- x
      package <- package[!package %in% c("pcdimension", "PCDimension")]
    } else if (is.matrix(x) || inherits(x, "easycormatrix")) {
      insight::format_error(
@@ -340,6 +340,15 @@
     n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out)))
   )
 
+  # Add cumulative percentage of variance explained
+  fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient)
+  varex <- attributes(fa)$summary
+  # Extract number of factors from EFA output (usually MR1, ML1, etc.)
+  varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE))
+  # Merge (and like that filter out empty methods)
+  by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors")
+
+  attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting)
   attr(out, "summary") <- by_factors
   attr(out, "n") <- min(as.numeric(as.character(
     by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"]
@@ -394,7 +403,7 @@


  # Text
  text <- paste0(
    "The choice of ",
    as.character(best_n),
    ifelse(type == "factor", " dimensions ", " clusters "),
@@ -751,7 +760,7 @@
    CRMS <- rez[!is.na(rez$CRMS) & rez$CRMS <= target, "n"][1]
  }
  # BIC (this is a penalized method so we can just take the one that minimizes it)
  BIC <- ifelse(all(is.na(rez$BIC)), NA, rez[!is.na(rez$BIC) & rez$BIC == min(rez$BIC, na.rm = TRUE), "n"])

  .data_frame(
    n_Factors = c(fit_off, TLI, RMSEA, RMSR, CRMS, BIC),

diff --git a/R/standardize_info.R b/R/standardize_info.R
@@ -52,7 +52,7 @@
  types <- parameters_type(model)
  # model_matrix <- as.data.frame(stats::model.matrix(model))
  model_matrix <- as.data.frame(insight::get_modelmatrix(model))
  data <- insight::get_data(model, source = "mf", verbose = FALSE)
  wgts <- insight::get_weights(model, na_rm = TRUE)

  # validation check for ZI
@@ -181,7 +181,7 @@
  # Get deviations for all parameters
  means <- deviations <- rep(NA_real_, times = length(params))
  for (i in seq_along(params)) {
    var <- params[i]
    info <- .std_info_predictor_smart(
      data = data,
      variable = types[types$Parameter == var, "Variable"],
@@ -213,7 +213,7 @@
                                      two_sd = FALSE,
                                      weights = NULL,
                                      ...) {
  if (type == "intercept") {
    info <- list(sd = 0, mean = 0)
  } else if (type == "numeric") {
    info <- .compute_std_info(
@@ -272,7 +272,7 @@
  # Get deviations for all parameters
  means <- deviations <- rep(NA_real_, length = length(names(model_matrix)))
  for (i in seq_along(names(model_matrix))) {
    var <- names(model_matrix)[i]
    if (types[i, "Type"] == "intercept") {
      means[i] <- deviations[i] <- 0
    } else {
@@ -382,12 +382,12 @@
   }
 
   if (info$is_linear) {
-    if (!robust) {
-      sd_y <- datawizard::weighted_sd(response, w)
-      mean_y <- datawizard::weighted_mean(response, w)
-    } else {
+    if (robust) {
       sd_y <- datawizard::weighted_mad(response, w)
       mean_y <- datawizard::weighted_median(response, w)
+    } else {
+      sd_y <- datawizard::weighted_sd(response, w)
+      mean_y <- datawizard::weighted_mean(response, w)
     }
   } else {
     sd_y <- 1
@@ -567,12 +567,12 @@
     response <- as.numeric(data[, variable])
   }
 
-  if (!robust) {
-    sd_x <- datawizard::weighted_sd(response, weights)
-    mean_x <- datawizard::weighted_mean(response, weights)
-  } else {
+  if (robust) {
     sd_x <- datawizard::weighted_mad(response, weights)
     mean_x <- datawizard::weighted_median(response, weights)
+  } else {
+    sd_x <- datawizard::weighted_sd(response, weights)
+    mean_x <- datawizard::weighted_mean(response, weights)
   }
 
   list(sd = f * sd_x, mean = mean_x)

diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R
@@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) {
 
 
   x <- as.data.frame(t(x[, cols]))
-  x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x)
+  x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x)
   names(x) <- c("Parameter", attributes(object)$summary$Component)
   row.names(x) <- NULL
 
@@ -149,40 +149,37 @@ predict.parameters_efa <- function(object,
                                    verbose = TRUE,
                                    ...) {
   attri <- attributes(object)
-  if (is.null(newdata)) {
-    if ("scores" %in% names(attri)) {
-      out <- as.data.frame(attri$scores)
-      if (isTRUE(keep_na)) {
-        out <- .merge_na(object, out, verbose)
-      }
-    } else {
-      if ("dataset" %in% names(attri)) {
-        out <- as.data.frame(stats::predict(attri$model, data = attri$dataset))
+
+  if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) {
+    if (is.null(newdata)) {
+      if ("scores" %in% names(attri)) {
+        out <- as.data.frame(attri$scores)
+        if (isTRUE(keep_na)) {
+          # Because pre-made scores don't preserve NA
+          out <- .merge_na(object, out)
+        }
       } else {
-        insight::format_error(
-          "Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}."
-        )
+        d <- attri$data_set
+        d <- d[vapply(d, is.numeric, logical(1))]
+        out <- as.data.frame(stats::predict(attri$model, data = d))
       }
-    }
-  } else {
-    if (inherits(attri$model, c("psych", "fa"))) {
-      # Clean-up newdata (keep only the variables used in the model)
-      newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there
-      # psych:::predict.fa(object, data)
-      out <- as.data.frame(stats::predict(attri$model, data = newdata))
-    } else if (inherits(attri$model, "spca")) {
-      # https://github.com/erichson/spca/issues/7
-      newdata <- newdata[names(attri$model$center)]
-      if (attri$standardize) {
-        newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
-        newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
-      }
-      out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
-      out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
     } else {
-      out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...))
+      # psych:::predict.principal(object, data)
+      out <- stats::predict(attri$model, data = newdata)
+    }
+  } else if (inherits(attri$model, "spca")) {
+    # https://github.com/erichson/spca/issues/7
+    newdata <- newdata[names(attri$model$center)]
+    if (attri$standardize) {
+      newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
+      newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
     }
+    out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
+    out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
+  } else {
+    out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...))
   }
+
   if (!is.null(names)) {
     names(out)[seq_along(names)] <- names
   }

diff --git a/tests/testthat/test-n_factors.R b/tests/testthat/test-n_factors.R
@@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", {
     )
   )
 })
+
+test_that("n_factors, variance explained", {
+  skip_on_cran()
+  skip_if_not_installed("nFactors")
+  skip_if_not_installed("psych")
+  set.seed(333)
+  x <- n_factors(mtcars[, 1:4], type = "PCA")
+  expect_equal(
+    attributes(x)$Variance_Explained$Variance_Cumulative,
+    c(0.84126, 0.85088, 0.85859, 0.85859),
+    tolerance = 1e-4
+  )
+})
diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R
@@ -29,16 +29,16 @@ test_that("principal_components", {
 
 test_that("principal_components, n", {
   data(iris)
-  x <- parameters::principal_components(iris[1:4], n = 2)
+  x <- principal_components(iris[1:4], n = 2)
   expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
 
-  x <- parameters::principal_components(iris[1:4], n = 1)
+  x <- principal_components(iris[1:4], n = 1)
   expect_named(x, c("Variable", "PC1", "Complexity"))
 })
 
 
 test_that("principal_components", {
-  x <- parameters::principal_components(mtcars[, 1:7])
+  x <- principal_components(mtcars[, 1:7])
 
   expect_equal(
     x$PC1,
@@ -55,38 +55,17 @@ test_that("principal_components", {
   )
 
   expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
-})
-
-test_that("principal_components", {
-  x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2))
-  expect_equal(
-    x$RC1,
-    c(
-      -0.836114674884308,
-      0.766808147590597,
-      0.85441780762136,
-      0.548502661888057,
-      -0.889046093964722,
-      0.931879020871552,
-      -0.030485507571411
-    ),
-    tolerance = 0.01
-  )
-
-  expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness"))
-  expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L))
-  expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L))
+  expect_identical(dim(predict(x)), c(32L, 2L))
 })
 
 
 # predict ----------------------
 # N.B tests will fail if `GPArotation` package is not installed
 
-d <- na.omit(psych::bfi[, 1:25])
-model <- psych::fa(d, nfactors = 5)
-mp <- model_parameters(model, sort = TRUE, threshold = "max")
-
 test_that("predict model_parameters fa", {
+  d <- na.omit(psych::bfi[, 1:25])
+  model <- psych::fa(d, nfactors = 5)
+  mp <- model_parameters(model, sort = TRUE, threshold = "max")
   pr <- suppressMessages(
     predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness"))
   )
@@ -103,17 +82,12 @@ test_that("predict model_parameters fa", {
   )
   expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L)
   expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
-})
-
-
-test_that("predict factor_analysis", {
-  model <- factor_analysis(d, n = 5)
-  expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
-  expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
   expect_named(
     predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE),
     c("A", "B", "C", "D", "E")
   )
+  model <- factor_analysis(d, n = 5)
+  expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
 })
 
 unloadNamespace("GPArotation")