From 87a86803a5bed331ee7f77866cbfab69acfd3850 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 30 Nov 2023 15:48:32 +0000 Subject: [PATCH 01/13] Add Variance_Cumulative (explained variance) to summary of n_factors() --- R/n_factors.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/n_factors.R b/R/n_factors.R index ca70ee042..5d14b3855 100644 --- a/R/n_factors.R +++ b/R/n_factors.R @@ -340,6 +340,14 @@ n_factors <- function(x, n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out))) ) + # Add cumulative percentage of variance explained + fa <- factor_analysis(x, cor=cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) + varex <- attributes(fa)$summary + # Extract number of factors from EFA output (usually MR1, ML1, etc.) + varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl=TRUE)) + by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors") + + attr(out, "summary") <- by_factors attr(out, "n") <- min(as.numeric(as.character( by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"] From 1c89a11dc9b057bbea644c82f792af792204692a Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 30 Nov 2023 16:05:25 +0000 Subject: [PATCH 02/13] please lintr --- R/n_factors.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/n_factors.R b/R/n_factors.R index 5d14b3855..eaa210e39 100644 --- a/R/n_factors.R +++ b/R/n_factors.R @@ -341,10 +341,10 @@ n_factors <- function(x, ) # Add cumulative percentage of variance explained - fa <- factor_analysis(x, cor=cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) + fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) varex <- attributes(fa)$summary # Extract number of factors from EFA output (usually MR1, ML1, etc.) - varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl=TRUE)) + varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE)) by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors") From c815a159d6d1b8f9cb9676edaa87c57e2d9cbe54 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 30 Nov 2023 16:17:05 +0000 Subject: [PATCH 03/13] add also as attribute --- R/n_factors.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/n_factors.R b/R/n_factors.R index eaa210e39..6d6644258 100644 --- a/R/n_factors.R +++ b/R/n_factors.R @@ -345,9 +345,10 @@ n_factors <- function(x, varex <- attributes(fa)$summary # Extract number of factors from EFA output (usually MR1, ML1, etc.) varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE)) + # Merge (and like that filter out empty methods) by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors") - + attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting) attr(out, "summary") <- by_factors attr(out, "n") <- min(as.numeric(as.character( by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"] From 9a0dcc649e8240897a2e1742b272ccacb595385a Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 09:01:55 +0100 Subject: [PATCH 04/13] add test --- tests/testthat/test-n_factors.R | 13 +++++++++++++ tests/testthat/test-pca.R | 7 +++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-n_factors.R b/tests/testthat/test-n_factors.R index 5480fdfaa..41198e9f1 100644 --- a/tests/testthat/test-n_factors.R +++ b/tests/testthat/test-n_factors.R @@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", { ) ) }) + +test_that("n_factors, variance explained", { + skip_on_cran() + skip_if_not_installed("nFactors") + skip_if_not_installed("psych") + set.seed(333) + x <- n_factors(mtcars[, 1:4], type = "PCA") + expect_equal( + attributes(x)$Variance_Explained$Variance_Cumulative, + c(0.84126, 0.85088, 0.85859, 0.85859), + tolerance = 1e-4 + ) +}) diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index b6dba5bd1..85dbcd8f2 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -57,11 +57,10 @@ test_that("principal_components", { # predict ---------------------- # N.B tests will fail if `GPArotation` package is not installed -d <- na.omit(psych::bfi[, 1:25]) -model <- psych::fa(d, nfactors = 5) -mp <- model_parameters(model, sort = TRUE, threshold = "max") - test_that("predict model_parameters fa", { + d <- na.omit(psych::bfi[, 1:25]) + model <- psych::fa(d, nfactors = 5) + mp <- model_parameters(model, sort = TRUE, threshold = "max") pr <- suppressMessages( predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness")) ) From af9871ff930ba60fca10d4b9c70eb4bd99b212ef Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 09:02:05 +0100 Subject: [PATCH 05/13] update description --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index de8902ec4..731de5c86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.21.3.4 +Version: 0.21.3.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", From bdcd05ef42964a1e260f27905af394e057fff9aa Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 09:02:33 +0100 Subject: [PATCH 06/13] news --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 778eb912a..b8cb37e53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,9 @@ * `model_parameters()` for models of package *survey* now gives informative messages when `bootstrap = TRUE` (which is currently not supported). +* `n_factors()` now also returns the explained variance for the number of + factors as attributes. + # parameters 0.21.3 ## Changes From efa201b6e495ad60fbf0c4a4ed1fc6cb7871d196 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 14:24:52 +0100 Subject: [PATCH 07/13] fix test --- tests/testthat/test-pca.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index 006f5c18f..0e67750ac 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -102,17 +102,12 @@ test_that("predict model_parameters fa", { ) expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L) expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L) -}) - - -test_that("predict factor_analysis", { - model <- factor_analysis(d, n = 5) - expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L) - expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L) expect_named( predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE), c("A", "B", "C", "D", "E") ) + model <- factor_analysis(d, n = 5) + expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L) }) unloadNamespace("GPArotation") From 47f0597bdbb2fa9da21124857ffecc8022a5a81d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 16:23:38 +0100 Subject: [PATCH 08/13] implement #755 --- R/standardize_info.R | 16 ++++++------ R/utils_pca_efa.R | 59 ++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/R/standardize_info.R b/R/standardize_info.R index 9993c60b2..0d6fe5b9f 100644 --- a/R/standardize_info.R +++ b/R/standardize_info.R @@ -382,12 +382,12 @@ standardize_info.default <- function(model, } if (info$is_linear) { - if (!robust) { - sd_y <- datawizard::weighted_sd(response, w) - mean_y <- datawizard::weighted_mean(response, w) - } else { + if (robust) { sd_y <- datawizard::weighted_mad(response, w) mean_y <- datawizard::weighted_median(response, w) + } else { + sd_y <- datawizard::weighted_sd(response, w) + mean_y <- datawizard::weighted_mean(response, w) } } else { sd_y <- 1 @@ -567,12 +567,12 @@ standardize_info.default <- function(model, response <- as.numeric(data[, variable]) } - if (!robust) { - sd_x <- datawizard::weighted_sd(response, weights) - mean_x <- datawizard::weighted_mean(response, weights) - } else { + if (robust) { sd_x <- datawizard::weighted_mad(response, weights) mean_x <- datawizard::weighted_median(response, weights) + } else { + sd_x <- datawizard::weighted_sd(response, weights) + mean_x <- datawizard::weighted_mean(response, weights) } list(sd = f * sd_x, mean = mean_x) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index 56bfaf932..1f22586a8 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) { x <- as.data.frame(t(x[, cols])) - x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x) + x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x) names(x) <- c("Parameter", attributes(object)$summary$Component) row.names(x) <- NULL @@ -149,40 +149,45 @@ predict.parameters_efa <- function(object, verbose = TRUE, ...) { attri <- attributes(object) - if (is.null(newdata)) { - if ("scores" %in% names(attri)) { - out <- as.data.frame(attri$scores) - if (isTRUE(keep_na)) { - out <- .merge_na(object, out, verbose) + + if (inherits(attri$model, c("psych", "principal"))) { + if (is.null(newdata)) { + if ("scores" %in% names(attri)) { + out <- as.data.frame(attri$scores) + if (isTRUE(keep_na)) { + # Because pre-made scores don't preserve NA + out <- .merge_na(object, out) + } + } else { + d <- attri$data_set + d <- d[vapply(d, is.numeric, logical(1))] + out <- as.data.frame(stats::predict(attri$model, data = d)) } } else { - if ("dataset" %in% names(attri)) { - out <- as.data.frame(stats::predict(attri$model, data = attri$dataset)) + # psych:::predict.principal(object, data) + out <- predict(attri$model, data = newdata) + } + } else if (inherits(attri$model, c("psych", "fa"))) { + if (is.null(newdata)) { + if ("scores" %in% names(attri)) { + out <- as.data.frame(attri$scores) + if (isTRUE(keep_na)) { + # Because pre-made scores don't preserve NA + out <- .merge_na(object, out) + } } else { - insight::format_error( - "Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}." - ) + d <- attri$data_set + d <- d[vapply(d, is.numeric, logical(1))] + out <- as.data.frame(stats::predict(attri$model, data = d)) } - } - } else { - if (inherits(attri$model, c("psych", "fa"))) { - # Clean-up newdata (keep only the variables used in the model) - newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there + } else { # psych:::predict.fa(object, data) out <- as.data.frame(stats::predict(attri$model, data = newdata)) - } else if (inherits(attri$model, "spca")) { - # https://github.com/erichson/spca/issues/7 - newdata <- newdata[names(attri$model$center)] - if (attri$standardize) { - newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE) - newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE) - } - out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings) - out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out)))) - } else { - out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...)) } + } else { + out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...)) } + if (!is.null(names)) { names(out)[seq_along(names)] <- names } From de8f464b52ff0376f2da2e2621d42b1450e13dfb Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 Dec 2023 16:25:43 +0100 Subject: [PATCH 09/13] fix spca --- R/utils_pca_efa.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index 1f22586a8..6df0bc53b 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -184,6 +184,15 @@ predict.parameters_efa <- function(object, # psych:::predict.fa(object, data) out <- as.data.frame(stats::predict(attri$model, data = newdata)) } + } else if (inherits(attri$model, "spca")) { + # https://github.com/erichson/spca/issues/7 + newdata <- newdata[names(attri$model$center)] + if (attri$standardize) { + newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE) + newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE) + } + out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings) + out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out)))) } else { out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...)) } From c0837c35fdd80e58ff4feb0c38d1eda533de8d4c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 2 Dec 2023 14:48:05 +0100 Subject: [PATCH 10/13] fix? --- R/utils_pca_efa.R | 4 ++-- tests/testthat/test-pca.R | 28 ++++------------------------ 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index 6df0bc53b..c95fb1167 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -182,7 +182,7 @@ predict.parameters_efa <- function(object, } } else { # psych:::predict.fa(object, data) - out <- as.data.frame(stats::predict(attri$model, data = newdata)) + out <- as.data.frame(stats::predict(attri$model, newdata = newdata)) } } else if (inherits(attri$model, "spca")) { # https://github.com/erichson/spca/issues/7 @@ -194,7 +194,7 @@ predict.parameters_efa <- function(object, out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings) out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out)))) } else { - out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...)) + out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...)) } if (!is.null(names)) { diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index 0e67750ac..014a3c102 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -29,16 +29,16 @@ test_that("principal_components", { test_that("principal_components, n", { data(iris) - x <- parameters::principal_components(iris[1:4], n = 2) + x <- principal_components(iris[1:4], n = 2) expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) - x <- parameters::principal_components(iris[1:4], n = 1) + x <- principal_components(iris[1:4], n = 1) expect_named(x, c("Variable", "PC1", "Complexity")) }) test_that("principal_components", { - x <- parameters::principal_components(mtcars[, 1:7]) + x <- principal_components(mtcars[, 1:7]) expect_equal( x$PC1, @@ -55,27 +55,7 @@ test_that("principal_components", { ) expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) -}) - -test_that("principal_components", { - x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2)) - expect_equal( - x$RC1, - c( - -0.836114674884308, - 0.766808147590597, - 0.85441780762136, - 0.548502661888057, - -0.889046093964722, - 0.931879020871552, - -0.030485507571411 - ), - tolerance = 0.01 - ) - - expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness")) - expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L)) - expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L)) + expect_identical(dim(predict(x)), c(32L, 2L)) }) From 94cc5cd9ebe7238aee91975b73b6537e184ebf2b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Dec 2023 12:19:02 +0100 Subject: [PATCH 11/13] fix --- R/utils_pca_efa.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index c95fb1167..0336c3de4 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -165,7 +165,7 @@ predict.parameters_efa <- function(object, } } else { # psych:::predict.principal(object, data) - out <- predict(attri$model, data = newdata) + out <- stats::predict(attri$model, data = newdata) } } else if (inherits(attri$model, c("psych", "fa"))) { if (is.null(newdata)) { From 4de562fa21aa38a9f97755673d8005e79e0b193b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Dec 2023 12:23:28 +0100 Subject: [PATCH 12/13] simplify --- R/utils_pca_efa.R | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index 0336c3de4..b8d0211fa 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -150,7 +150,7 @@ predict.parameters_efa <- function(object, ...) { attri <- attributes(object) - if (inherits(attri$model, c("psych", "principal"))) { + if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) { if (is.null(newdata)) { if ("scores" %in% names(attri)) { out <- as.data.frame(attri$scores) @@ -167,23 +167,6 @@ predict.parameters_efa <- function(object, # psych:::predict.principal(object, data) out <- stats::predict(attri$model, data = newdata) } - } else if (inherits(attri$model, c("psych", "fa"))) { - if (is.null(newdata)) { - if ("scores" %in% names(attri)) { - out <- as.data.frame(attri$scores) - if (isTRUE(keep_na)) { - # Because pre-made scores don't preserve NA - out <- .merge_na(object, out) - } - } else { - d <- attri$data_set - d <- d[vapply(d, is.numeric, logical(1))] - out <- as.data.frame(stats::predict(attri$model, data = d)) - } - } else { - # psych:::predict.fa(object, data) - out <- as.data.frame(stats::predict(attri$model, newdata = newdata)) - } } else if (inherits(attri$model, "spca")) { # https://github.com/erichson/spca/issues/7 newdata <- newdata[names(attri$model$center)] From 4eb18d0c769bd58a2cdb115f7c9175586666eaf1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Dec 2023 12:29:59 +0100 Subject: [PATCH 13/13] styler --- R/n_factors.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/n_factors.R b/R/n_factors.R index 6d6644258..9b6c335af 100644 --- a/R/n_factors.R +++ b/R/n_factors.R @@ -341,14 +341,14 @@ n_factors <- function(x, ) # Add cumulative percentage of variance explained - fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) + fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) varex <- attributes(fa)$summary # Extract number of factors from EFA output (usually MR1, ML1, etc.) varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE)) # Merge (and like that filter out empty methods) by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors") - attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting) + attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting) attr(out, "summary") <- by_factors attr(out, "n") <- min(as.numeric(as.character( by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"]