From 0977008ade73a1b12a600e2f30869afb097a0539 Mon Sep 17 00:00:00 2001 From: Alex Strashny Date: Fri, 29 Mar 2024 11:32:50 -0400 Subject: [PATCH] p_adjust --- NAMESPACE | 1 + NEWS.md | 2 + R/surveytable.R | 2 +- R/tab.R | 7 ++- R/tab_subset.R | 16 ++++++- R/z_test_factor.R | 12 ++++- R/z_test_table.R | 12 ++++- R/zzz.R | 1 + docs/404.html | 2 +- docs/LICENSE.html | 2 +- ...tory-Medical-Care-Survey-NAMCS-tables.html | 2 +- docs/articles/index.html | 2 +- docs/articles/surveytable.html | 44 +++++++++---------- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 6 ++- docs/pkgdown.yml | 2 +- docs/reference/codebook.html | 2 +- docs/reference/index.html | 2 +- docs/reference/namcs2019sv.html | 2 +- docs/reference/print.surveytable_table.html | 2 +- docs/reference/set_count_1k.html | 2 +- docs/reference/set_output.html | 2 +- docs/reference/set_survey.html | 2 +- docs/reference/show_options.html | 5 ++- docs/reference/survey_subset.html | 2 +- docs/reference/surveytable-options.html | 2 +- docs/reference/surveytable-package.html | 2 +- docs/reference/svyciprop_adjusted.html | 2 +- docs/reference/tab.html | 9 +++- docs/reference/tab_rate.html | 2 +- docs/reference/tab_subset.html | 21 +++++---- docs/reference/tab_subset_rate.html | 2 +- docs/reference/total.html | 2 +- docs/reference/total_rate.html | 2 +- docs/reference/uspop2019.html | 2 +- docs/reference/var_all.html | 2 +- docs/reference/var_any.html | 2 +- docs/reference/var_case.html | 2 +- docs/reference/var_collapse.html | 2 +- docs/reference/var_copy.html | 2 +- docs/reference/var_cross.html | 2 +- docs/reference/var_cut.html | 2 +- docs/reference/var_list.html | 2 +- docs/reference/var_not.html | 2 +- docs/search.json | 2 +- man/tab.Rd | 3 ++ man/tab_subset.Rd | 3 ++ 48 files changed, 134 insertions(+), 76 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 13d2284..a90bb41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ importFrom(kableExtra,kbl) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,confint) +importFrom(stats,p.adjust) importFrom(stats,pt) importFrom(stats,qt) importFrom(utils,capture.output) diff --git a/NEWS.md b/NEWS.md index 31bf311..05b0d64 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # surveytable (development version) +* Optionally adjust p-values for multiple comparisons (`p_adjust` argument) + # surveytable 0.9.3 * `codebook()` diff --git a/R/surveytable.R b/R/surveytable.R index 8e1c929..4971a1d 100644 --- a/R/surveytable.R +++ b/R/surveytable.R @@ -3,7 +3,7 @@ #' @import survey #' @importFrom huxtable guess_knitr_output_format hux set_all_borders caption caption<- number_format number_format<- fmt_pretty add_footnote print_screen print_html #' @importFrom kableExtra kbl kable_styling footnote column_spec -#' @importFrom stats as.formula confint qt coef pt +#' @importFrom stats as.formula confint qt coef pt p.adjust #' @importFrom utils write.table tail capture.output #' @keywords internal "_PACKAGE" diff --git a/R/tab.R b/R/tab.R index 466c848..4679e92 100644 --- a/R/tab.R +++ b/R/tab.R @@ -21,6 +21,7 @@ #' @param ... names of variables (in quotes) #' @param test perform hypothesis tests? #' @param alpha significance level for tests +#' @param p_adjust adjust p-values for multiple comparisons? #' @param drop_na drop missing values (`NA`)? Categorical variables only. #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file @@ -40,7 +41,7 @@ #' # Hypothesis testing with categorical variables #' tab("AGER", test = TRUE) tab = function(... - , test = FALSE, alpha = 0.05 + , test = FALSE, alpha = 0.05, p_adjust = FALSE , drop_na = getOption("surveytable.drop_na") , max_levels = getOption("surveytable.max_levels") , csv = getOption("surveytable.csv") @@ -48,7 +49,8 @@ tab = function(... ret = list() if (...length() > 0) { assert_that(test %in% c(TRUE, FALSE) - , alpha > 0, alpha < 0.5) + , alpha > 0, alpha < 0.5 + , p_adjust %in% c(TRUE, FALSE)) design = .load_survey() nm = names(design$variables) for (ii in 1:...length()) { @@ -69,6 +71,7 @@ tab = function(... , vr = vr , drop_na = drop_na , alpha = alpha + , p_adjust = p_adjust , csv = csv) } } else if (is.numeric(design$variables[,vr])) { diff --git a/R/tab_subset.R b/R/tab_subset.R index 3ec8713..22c72de 100644 --- a/R/tab_subset.R +++ b/R/tab_subset.R @@ -21,6 +21,7 @@ #' @param lvls (optional) only show these levels of `vrby` #' @param test perform hypothesis tests? #' @param alpha significance level for tests +#' @param p_adjust adjust p-values for multiple comparisons? #' @param drop_na drop missing values (`NA`)? Categorical variables only. #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file @@ -46,7 +47,7 @@ #' # Hypothesis testing #' tab_subset("NUMMED", "AGER", test = TRUE) tab_subset = function(vr, vrby, lvls = c() - , test = FALSE, alpha = 0.05 + , test = FALSE, alpha = 0.05, p_adjust = FALSE # , test_pairs = "depends" , drop_na = getOption("surveytable.drop_na") , max_levels = getOption("surveytable.max_levels") @@ -54,6 +55,7 @@ tab_subset = function(vr, vrby, lvls = c() ) { assert_that(test %in% c(TRUE, FALSE) , alpha > 0, alpha < 0.5 + , p_adjust %in% c(TRUE, FALSE) # , test_pairs %in% c("depends", "yes", "no") ) design = .load_survey() @@ -124,7 +126,8 @@ tab_subset = function(vr, vrby, lvls = c() # if (test && do_pairs) { if (test) { frm = as.formula(paste0("~ `", vr, "` + `", vrby, "`")) - fo = svychisq(frm, design, statistic = getOption("surveytable.svychisq_statistic")) + fo = svychisq(frm, design + , statistic = getOption("surveytable.svychisq_statistic", default = "F")) rT = data.frame(`p-value` = fo$p.value, check.names = FALSE) test_name = fo$method test_title = paste0("Association between " @@ -157,6 +160,7 @@ tab_subset = function(vr, vrby, lvls = c() , vr = vr , drop_na = drop_na , alpha = alpha + , p_adjust = p_adjust , csv = csv) } @@ -174,6 +178,7 @@ tab_subset = function(vr, vrby, lvls = c() , vr = vrby , drop_na = drop_na , alpha = alpha + , p_adjust = p_adjust , csv = csv) } } @@ -238,6 +243,13 @@ tab_subset = function(vr, vrby, lvls = c() } } test_name = xx$method + if (p_adjust) { + method = getOption("surveytable.p.adjust_method", default = "bonferroni") + rT$`p-adjusted` = p.adjust(rT$`p-value` + , method = method) + test_name %<>% paste0("; ", method, " adjustment") + } + test_title = paste0("Comparison of " , .getvarname(design, vr) , " across all possible pairs of ", .getvarname(design, vrby)) diff --git a/R/z_test_factor.R b/R/z_test_factor.R index 56ac951..d286322 100644 --- a/R/z_test_factor.R +++ b/R/z_test_factor.R @@ -1,5 +1,6 @@ -.test_factor = function(design, vr, drop_na, alpha, csv) { - assert_that(alpha > 0, alpha < 0.5) +.test_factor = function(design, vr, drop_na, alpha, p_adjust, csv) { + assert_that(alpha > 0, alpha < 0.5 + , p_adjust %in% c(TRUE, FALSE)) if ( !(alpha %in% c(0.05, 0.01, 0.001)) ) { warning("Value of alpha is not typical: ", alpha) } @@ -49,6 +50,13 @@ # survey:::svyttest.default test_name = "Design-based t-test" + if (p_adjust) { + method = getOption("surveytable.p.adjust_method", default = "bonferroni") + rT$`p-adjusted` = p.adjust(rT$`p-value` + , method = method) + test_name %<>% paste0("; ", method, " adjustment") + } + test_title = paste0("Comparison of all possible pairs of " , .getvarname(design, vr) ) .test_table(rT = rT diff --git a/R/z_test_table.R b/R/z_test_table.R index 1d70d43..74fec77 100644 --- a/R/z_test_table.R +++ b/R/z_test_table.R @@ -1,14 +1,22 @@ .test_table = function(rT, test_name, test_title, alpha, csv) { assert_that("p-value" %in% names(rT)) + bool.adj = ("p-adjusted" %in% names(rT)) rT$Flag = "" - idx = which(rT$`p-value` <= alpha) + idx = if (bool.adj) { + which(rT$`p-adjusted` <= alpha) + } else { + which(rT$`p-value` <= alpha) + } rT$Flag[idx] = "*" rT$`p-value` %<>% round(3) + if (bool.adj) { + rT$`p-adjusted` %<>% round(3) + } attr(rT, "title") = test_title - attr(rT, "footer") = paste0(test_name, ". *: p-value <= ", alpha) + attr(rT, "footer") = paste0(test_name, ". *: p <= ", alpha) .write_out(rT, csv = csv) } diff --git a/R/zzz.R b/R/zzz.R index de58c4c..8920ab9 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -47,6 +47,7 @@ env = new.env() , surveytable.adjust_svyciprop.df_method = "NHIS" , surveytable.svychisq_statistic = "F" + , surveytable.p.adjust_method = "bonferroni" ) # No - creates a startup message which cannot be suppressed. # set_count_1k() diff --git a/docs/404.html b/docs/404.html index d299e0e..8a12e50 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ surveytable - 0.9.3 + 0.9.3.9000