p_adjust

CDCgov · Mar 29, 2024 · 0977008 · 0977008
1 parent 3de96fb
commit 0977008
Show file tree

Hide file tree

Showing 48 changed files with 134 additions and 76 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -48,6 +48,7 @@ importFrom(kableExtra,kbl)
 importFrom(stats,as.formula)
 importFrom(stats,coef)
 importFrom(stats,confint)
+importFrom(stats,p.adjust)
 importFrom(stats,pt)
 importFrom(stats,qt)
 importFrom(utils,capture.output)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # surveytable (development version)
 
+* Optionally adjust p-values for multiple comparisons (`p_adjust` argument)
+
 # surveytable 0.9.3
 
 * `codebook()`

diff --git a/R/surveytable.R b/R/surveytable.R
@@ -3,7 +3,7 @@
 #' @import survey
 #' @importFrom huxtable guess_knitr_output_format hux set_all_borders caption caption<- number_format number_format<- fmt_pretty add_footnote print_screen print_html
 #' @importFrom kableExtra kbl kable_styling footnote column_spec
-#' @importFrom stats as.formula confint qt coef pt
+#' @importFrom stats as.formula confint qt coef pt p.adjust
 #' @importFrom utils write.table tail capture.output
 #' @keywords internal
 "_PACKAGE"

diff --git a/R/tab.R b/R/tab.R
@@ -21,6 +21,7 @@
 #' @param ...     names of variables (in quotes)
 #' @param test    perform hypothesis tests?
 #' @param alpha   significance level for tests
+#' @param p_adjust adjust p-values for multiple comparisons?
 #' @param drop_na drop missing values (`NA`)? Categorical variables only.
 #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables.
 #' @param csv     name of a CSV file
@@ -40,15 +41,16 @@
 #' # Hypothesis testing with categorical variables
 #' tab("AGER", test = TRUE)
 tab = function(...
-               , test = FALSE, alpha = 0.05
+               , test = FALSE, alpha = 0.05, p_adjust = FALSE
                , drop_na = getOption("surveytable.drop_na")
                , max_levels = getOption("surveytable.max_levels")
                , csv = getOption("surveytable.csv")
                ) {
 	ret = list()
 	if (...length() > 0) {
 	  assert_that(test %in% c(TRUE, FALSE)
-	              , alpha > 0, alpha < 0.5)
+	              , alpha > 0, alpha < 0.5
+	              , p_adjust %in% c(TRUE, FALSE))
 	  design = .load_survey()
 	  nm = names(design$variables)
 		for (ii in 1:...length()) {
@@ -69,6 +71,7 @@ tab = function(...
                                             , vr = vr
                                             , drop_na = drop_na
                                             , alpha = alpha
+                                            , p_adjust = p_adjust
                                             , csv = csv)
 			  }
 			} else if (is.numeric(design$variables[,vr])) {

diff --git a/R/tab_subset.R b/R/tab_subset.R
@@ -21,6 +21,7 @@
 #' @param lvls    (optional) only show these levels of `vrby`
 #' @param test    perform hypothesis tests?
 #' @param alpha   significance level for tests
+#' @param p_adjust adjust p-values for multiple comparisons?
 #' @param drop_na drop missing values (`NA`)? Categorical variables only.
 #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables.
 #' @param csv     name of a CSV file
@@ -46,14 +47,15 @@
 #' # Hypothesis testing
 #' tab_subset("NUMMED", "AGER", test = TRUE)
 tab_subset = function(vr, vrby, lvls = c()
-                , test = FALSE, alpha = 0.05
+                , test = FALSE, alpha = 0.05, p_adjust = FALSE
                 # , test_pairs = "depends"
                 , drop_na = getOption("surveytable.drop_na")
                 , max_levels = getOption("surveytable.max_levels")
                 , csv = getOption("surveytable.csv")
               ) {
   assert_that(test %in% c(TRUE, FALSE)
               , alpha > 0, alpha < 0.5
+              , p_adjust %in% c(TRUE, FALSE)
               # , test_pairs %in% c("depends", "yes", "no")
               )
   design = .load_survey()
@@ -124,7 +126,8 @@ tab_subset = function(vr, vrby, lvls = c()
     # if (test && do_pairs) {
     if (test) {
       frm = as.formula(paste0("~ `", vr, "` + `", vrby, "`"))
-      fo = svychisq(frm, design, statistic = getOption("surveytable.svychisq_statistic"))
+      fo = svychisq(frm, design
+        , statistic = getOption("surveytable.svychisq_statistic", default = "F"))
       rT = data.frame(`p-value` = fo$p.value, check.names = FALSE)
       test_name = fo$method
       test_title = paste0("Association between "
@@ -157,6 +160,7 @@ tab_subset = function(vr, vrby, lvls = c()
                                                     , vr = vr
                                                     , drop_na = drop_na
                                                     , alpha = alpha
+                                                    , p_adjust = p_adjust
                                                     , csv = csv)
       }
 
@@ -174,6 +178,7 @@ tab_subset = function(vr, vrby, lvls = c()
                                                     , vr = vrby
                                                     , drop_na = drop_na
                                                     , alpha = alpha
+                                                    , p_adjust = p_adjust
                                                     , csv = csv)
       }
     }
@@ -238,6 +243,13 @@ tab_subset = function(vr, vrby, lvls = c()
         }
       }
       test_name = xx$method
+      if (p_adjust) {
+        method = getOption("surveytable.p.adjust_method", default = "bonferroni")
+        rT$`p-adjusted` = p.adjust(rT$`p-value`
+                                   , method = method)
+        test_name %<>% paste0("; ", method, " adjustment")
+      }
+
       test_title = paste0("Comparison of "
                           , .getvarname(design, vr)
                           , " across all possible pairs of ", .getvarname(design, vrby))

diff --git a/R/z_test_factor.R b/R/z_test_factor.R
@@ -1,5 +1,6 @@
-.test_factor = function(design, vr, drop_na, alpha, csv) {
-  assert_that(alpha > 0, alpha < 0.5)
+.test_factor = function(design, vr, drop_na, alpha, p_adjust, csv) {
+  assert_that(alpha > 0, alpha < 0.5
+              , p_adjust %in% c(TRUE, FALSE))
   if ( !(alpha %in% c(0.05, 0.01, 0.001)) ) {
     warning("Value of alpha is not typical: ", alpha)
   }
@@ -49,6 +50,13 @@
 
   # survey:::svyttest.default
   test_name = "Design-based t-test"
+  if (p_adjust) {
+    method = getOption("surveytable.p.adjust_method", default = "bonferroni")
+    rT$`p-adjusted` = p.adjust(rT$`p-value`
+      , method = method)
+    test_name %<>% paste0("; ", method, " adjustment")
+  }
+
   test_title = paste0("Comparison of all possible pairs of "
                       , .getvarname(design, vr) )
   .test_table(rT = rT

diff --git a/R/z_test_table.R b/R/z_test_table.R
@@ -1,14 +1,22 @@
 .test_table = function(rT, test_name, test_title, alpha, csv) {
   assert_that("p-value" %in% names(rT))
+  bool.adj = ("p-adjusted" %in% names(rT))
 
   rT$Flag = ""
-  idx = which(rT$`p-value` <= alpha)
+  idx = if (bool.adj) {
+    which(rT$`p-adjusted` <= alpha)
+  } else {
+    which(rT$`p-value` <= alpha)
+  }
   rT$Flag[idx] = "*"
 
   rT$`p-value` %<>% round(3)
+  if (bool.adj) {
+    rT$`p-adjusted` %<>% round(3)
+  }
 
   attr(rT, "title") = test_title
-  attr(rT, "footer") = paste0(test_name, ". *: p-value <= ", alpha)
+  attr(rT, "footer") = paste0(test_name, ". *: p <= ", alpha)
 
   .write_out(rT, csv = csv)
 }
diff --git a/R/zzz.R b/R/zzz.R
@@ -47,6 +47,7 @@ env = new.env()
     , surveytable.adjust_svyciprop.df_method = "NHIS"
 
     , surveytable.svychisq_statistic = "F"
+    , surveytable.p.adjust_method = "bonferroni"
   )
   # No - creates a startup message which cannot be suppressed.
   # set_count_1k()

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
diff --git a/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html b/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html
diff --git a/docs/articles/index.html b/docs/articles/index.html