From 889d7fb1f1bd8d3174bbcad828ea6e5c54e0ac68 Mon Sep 17 00:00:00 2001 From: erawijantari <45685102+erawijantari@users.noreply.github.com> Date: Wed, 3 Apr 2024 12:19:18 +0300 Subject: [PATCH 1/4] Fix error, notes, and warning based on BiocCheck() (#469) Co-authored-by: Leo Lahti Co-authored-by: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Co-authored-by: ake <40662956+ake123@users.noreply.github.com> --- R/estimateDivergence.R | 4 +- R/estimateDiversity.R | 2 +- R/estimateDominance.R | 2 +- R/estimateRichness.R | 2 +- R/getExperimentCrossAssociation.R | 22 ++-- R/loadFromMetaphlan.R | 4 +- R/merge.R | 4 +- R/mergeSEs.R | 4 +- R/runCCA.R | 2 +- R/splitOn.R | 1 + R/subsampleCounts.R | 163 +++++++++++++++--------------- R/summaries.R | 2 +- R/transformCounts.R | 2 +- man/estimateDiversity.Rd | 2 +- man/estimateDominance.Rd | 2 +- man/estimateRichness.Rd | 2 +- man/subsampleCounts.Rd | 11 +- tests/testthat/test-8subsample.R | 11 +- 18 files changed, 119 insertions(+), 123 deletions(-) diff --git a/R/estimateDivergence.R b/R/estimateDivergence.R index 91b18c765..b37910cb0 100644 --- a/R/estimateDivergence.R +++ b/R/estimateDivergence.R @@ -145,13 +145,13 @@ setMethod("estimateDivergence", signature = c(x="SummarizedExperiment"), if( "median" %in% reference || "mean" %in% reference ){ reference <- apply(mat, 1, reference) } else if( !reference %in% colnames(mat) ) { - stop(paste("Reference", reference, "not recognized.")) + stop("Reference ", reference, " not recognized.", call. = FALSE) } } # Distance between all samples against one reference sample # FIXME: could be be optimzed with sweep / parallelization v <- seq_len(ncol(mat)) - sapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)}) + vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)},FUN.VALUE = numeric(1)) } diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index e9fcc4a50..ac722f4c9 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -216,7 +216,7 @@ #' plotColData(tse, "Shannon") #' # ... by sample type #' plotColData(tse, "Shannon", "SampleType") -#' \dontrun{ +#' \donttest{ #' # combining different plots #' library(patchwork) #' plot_index <- c("Shannon","GiniSimpson") diff --git a/R/estimateDominance.R b/R/estimateDominance.R index 934b129d0..733de5538 100644 --- a/R/estimateDominance.R +++ b/R/estimateDominance.R @@ -184,7 +184,7 @@ #' #' # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +#' \donttest{esophagus <- estimateDominance(esophagus, index="dbp")} #' # Calculates dbp and Core Abundance indices #' esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) #' # Shows all indices diff --git a/R/estimateRichness.R b/R/estimateRichness.R index a7d438cf6..99f9c1d6d 100644 --- a/R/estimateRichness.R +++ b/R/estimateRichness.R @@ -178,7 +178,7 @@ #' #' # Indices must be written correctly (all lowercase), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +#' \donttest{esophagus <- estimateRichness(esophagus, index="ace")} #' #' # Calculates Chao1 and ACE indices only #' esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 5730da27d..2d816003f 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -1002,7 +1002,7 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), # If assays were identical, and duplicate variable pairs were dropped if( assays_identical ){ # Change names so that they are not equal to colnames of variable_pairs - colnames(variable_pairs)[1:2] <- c("Var1_", "Var2_") + colnames(variable_pairs)[c(1,2)] <- c("Var1_", "Var2_") # Combine feature-pair names with correlation values and p-values correlations_and_p_values <- cbind(variable_pairs, correlations_and_p_values) @@ -1170,9 +1170,9 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), do.call(association_FUN, args = c(list(feature_mat), list(...))) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } else { @@ -1180,17 +1180,17 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), suppressWarnings( do.call(association_FUN, args = c(list(feature_mat), list(...))) ) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } # If temp's length is not 1, then function does not return single numeric value for each pair if( length(temp) != 1 ){ - stop(paste0("Error occurred during calculation. Check that ", - "'association_FUN' fulfills requirements."), + stop("Error occurred during calculation. Check that ", + "'association_FUN' fulfills requirements.", call. = FALSE) } return(temp) @@ -1298,8 +1298,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) @@ -1308,8 +1308,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R index 2d974df22..0db66b343 100644 --- a/R/loadFromMetaphlan.R +++ b/R/loadFromMetaphlan.R @@ -307,13 +307,13 @@ loadFromMetaphlan <- function( sample_names <- rownames(coldata) names(sample_names) <- sample_names } else{ - sample_names <- sapply(rownames(coldata), function(x){ + sample_names <- vapply(rownames(coldata), function(x){ x <- colnames(tse)[grep(x, colnames(tse))] if( length(x) != 1 ){ x <- NULL } return(x) - }) + },FUN.VALUE = character(1)) sample_names <- unlist(sample_names) } diff --git a/R/merge.R b/R/merge.R index 31966b5f8..c9713e722 100644 --- a/R/merge.R +++ b/R/merge.R @@ -210,14 +210,14 @@ setGeneric("mergeSamples", .check_assays_for_merge <- function(assay.type, assay){ # Check if assays include binary or negative values if( all(assay == 0 | assay == 1) ){ - warning(paste0("'",assay.type,"'", " includes binary values."), + warning("'",assay.type,"'", " includes binary values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", call. = FALSE) } if( !all( assay >= 0 | is.na(assay) ) ){ - warning(paste0("'",assay.type,"'", " includes negative values."), + warning("'",assay.type,"'", " includes negative values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 1f80a59e5..4b4cc5969 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -369,7 +369,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Loop through individual TreeSEs and add them to tse if( length(x) > 0 ){ - for( i in 1:length(x) ){ + for( i in seq_len(length(x)) ){ # Give message if TRUE if( verbose ){ message("\r", i+1, "/", length(x)+1, appendLF = FALSE) @@ -765,7 +765,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Get the shared class that is highest in hierarchy if( all( classes %in% allowed_classes[1] ) ){ class <- allowed_classes[1] - } else if( all( classes %in% allowed_classes[1:2] ) ){ + } else if( all( classes %in% allowed_classes[c(1,2)] ) ){ class <- allowed_classes[2] } else { class <- allowed_classes[3] diff --git a/R/runCCA.R b/R/runCCA.R index 4040c230f..acfae300f 100644 --- a/R/runCCA.R +++ b/R/runCCA.R @@ -452,7 +452,7 @@ setMethod("runCCA", "SingleCellExperiment", # Get the dissimilarity matrix based on original dissimilarity index # provided by user. If the analysis is CCA, disable method; calculate # always euclidean distances because CCA is based on Euclidean distances. - if( length(class(rda)) == 1 && class(rda) == "cca" ){ + if( length(class(rda)) == 1 && is(rda, 'cca') ){ dist_mat <- vegdist(mat, method = "euclidean") } else{ dist_mat <- vegdist(mat, method = method, ...) diff --git a/R/splitOn.R b/R/splitOn.R index 7464d0bdb..a81d9d9f6 100644 --- a/R/splitOn.R +++ b/R/splitOn.R @@ -291,6 +291,7 @@ setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"), # If the returned value is a list, go through all of them if( is(x, 'SimpleList') ){ x <- SimpleList(lapply(x, .agglomerate_trees)) + } else { # Otherwise, the returned value is TreeSE x <- .agglomerate_trees(x) diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R index bca54cef8..e21909b42 100644 --- a/R/subsampleCounts.R +++ b/R/subsampleCounts.R @@ -10,6 +10,8 @@ #' instances where it can be useful. #' Note that the output of \code{subsampleCounts} is not the equivalent as the #' input and any result have to be verified with the original dataset. +#' To maintain the reproducibility, please define the seed using set.seed() +#' before implement this function. #' #' @param x A #' \code{SummarizedExperiment} object. @@ -28,7 +30,7 @@ #' simulated this can equal to lowest number of total counts #' found in a sample or a user specified number. #' -#' @param seed A random number seed for reproducibility of sampling. +#' #' #' @param replace Logical Default is \code{TRUE}. The default is with #' replacement (\code{replace=TRUE}). @@ -68,10 +70,11 @@ #' # they will be removed. #' data(GlobalPatterns) #' tse <- GlobalPatterns +#' set.seed(123) #' tse.subsampled <- subsampleCounts(tse, #' min_size = 60000, -#' name = "subsampled", -#' seed = 123) +#' name = "subsampled" +#' ) #' tse.subsampled #' dim(tse) #' dim(tse.subsampled) @@ -84,7 +87,7 @@ NULL setGeneric("subsampleCounts", signature = c("x"), function(x, assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, + replace = TRUE, name = "subsampled", verbose = TRUE, ...) standardGeneric("subsampleCounts")) @@ -94,86 +97,78 @@ setGeneric("subsampleCounts", signature = c("x"), #' @aliases rarifyCounts #' @export setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = "counts", - min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, - name = "subsampled", verbose = TRUE, ...){ - - warning("Subsampling/Rarefying may undermine downstream analyses ", - "and have unintended consequences. Therefore, make sure ", - "this normalization is appropriate for your data.", - call. = FALSE) - .check_assay_present(assay.type, x) - if(any(assay(x, assay.type) %% 1 != 0)){ - warning("assay contains non-integer values. Only counts table ", - "is applicable...") - } - if(!is.logical(verbose)){ - stop("`verbose` has to be logical i.e. TRUE or FALSE") - } - if(verbose){ - # Print to screen this value - message("`set.seed(", seed, ")` was used to initialize repeatable ", - "random subsampling.","\nPlease record this for your ", - "records so others can reproduce.") - } - if(!.is_numeric_string(seed)){ - stop("`seed` has to be an numeric value See `?set.seed`") - } - if(!is.logical(replace)){ - stop("`replace` has to be logical i.e. TRUE or FALSE") - } - # Check name - if(!.is_non_empty_string(name) || - name == assay.type){ - stop("'name' must be a non-empty single character value and be ", - "different from `assay.type`.", - call. = FALSE) - } - set.seed(seed) - # Make sure min_size is of length 1. - if(length(min_size) > 1){ - stop("`min_size` had more than one value. ", - "Specifiy a single integer value.") - min_size <- min_size[1] - } - if(!is.numeric(min_size) || - as.integer(min_size) != min_size && min_size <= 0){ - stop("min_size needs to be a positive integer value.") - } - # get samples with less than min number of reads - if(min(colSums2(assay(x, assay.type))) < min_size){ - rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] - # Return NULL, if no samples were found after subsampling - if( !any(!colnames(x) %in% rmsams) ){ - stop("No samples were found after subsampling.", - call. = FALSE) - } - if(verbose){ - message(length(rmsams), " samples removed ", - "because they contained fewer reads than `min_size`.") - } - # remove sample(s) - newtse <- x[, !colnames(x) %in% rmsams] - } else { - newtse <- x - } - newassay <- apply(assay(newtse, assay.type), 2, - .subsample_assay, - min_size=min_size, replace=replace) - rownames(newassay) <- rownames(newtse) - # remove features not present in any samples after subsampling - message(paste(length(which(rowSums2(newassay) == 0)), "features", - "removed because they are not present in all samples", - "after subsampling.")) - newassay <- newassay[rowSums2(newassay)>0,] - newtse <- newtse[rownames(newassay),] - assay(newtse, name, withDimnames=FALSE) <- newassay - newtse <- .add_values_to_metadata(newtse, - "subsampleCounts_min_size", - min_size) - return(newtse) - } + function(x, assay.type = assay_name, assay_name = "counts", + min_size = min(colSums2(assay(x))), + replace = TRUE, + name = "subsampled", verbose = TRUE, ...){ + + warning("Subsampling/Rarefying may undermine downstream analyses ", + "and have unintended consequences. Therefore, make sure ", + "this normalization is appropriate for your data.", + call. = FALSE) + .check_assay_present(assay.type, x) + if(any(assay(x, assay.type) %% 1 != 0)){ + warning("assay contains non-integer values. Only counts table ", + "is applicable...") + } + if(!is.logical(verbose)){ + stop("`verbose` has to be logical i.e. TRUE or FALSE") + } + + if(!is.logical(replace)){ + stop("`replace` has to be logical i.e. TRUE or FALSE") + } + # Check name + if(!.is_non_empty_string(name) || + name == assay.type){ + stop("'name' must be a non-empty single character value and be ", + "different from `assay.type`.", + call. = FALSE) + } + #set.seed(seed) + # Make sure min_size is of length 1. + if(length(min_size) > 1){ + stop("`min_size` had more than one value. ", + "Specifiy a single integer value.") + min_size <- min_size[1] + } + if(!is.numeric(min_size) || + as.integer(min_size) != min_size && min_size <= 0){ + stop("min_size needs to be a positive integer value.") + } + # get samples with less than min number of reads + if(min(colSums2(assay(x, assay.type))) < min_size){ + rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] + # Return NULL, if no samples were found after subsampling + if( !any(!colnames(x) %in% rmsams) ){ + stop("No samples were found after subsampling.", + call. = FALSE) + } + if(verbose){ + message(length(rmsams), " samples removed ", + "because they contained fewer reads than `min_size`.") + } + # remove sample(s) + newtse <- x[, !colnames(x) %in% rmsams] + } else { + newtse <- x + } + newassay <- apply(assay(newtse, assay.type), 2, + .subsample_assay, + min_size=min_size, replace=replace) + rownames(newassay) <- rownames(newtse) + # remove features not present in any samples after subsampling + message(paste(length(which(rowSums2(newassay) == 0)), "features", + "removed because they are not present in all samples", + "after subsampling.")) + newassay <- newassay[rowSums2(newassay)>0,] + newtse <- newtse[rownames(newassay),] + assay(newtse, name, withDimnames=FALSE) <- newassay + newtse <- .add_values_to_metadata(newtse, + "subsampleCounts_min_size", + min_size) + return(newtse) + } ) diff --git a/R/summaries.R b/R/summaries.R index a6ddad2e0..3a742db43 100644 --- a/R/summaries.R +++ b/R/summaries.R @@ -479,7 +479,7 @@ setMethod("summary", signature = c(object = "SummarizedExperiment"), .check_NAs_assay_counts <- function(x, assay.type){ assay.x <- .get_assay(x, assay.type) if(any(is.na(assay.x))) { - stop(paste0("There are samples with NAs in 'assay': ", assay.type), + stop("There are samples with NAs in 'assay': ", assay.type, " . This function is limited to sequencing data only. ", "Where raw counts do not usually have NAs. ", "Try to supply raw counts", diff --git a/R/transformCounts.R b/R/transformCounts.R index bd4cddacd..bb5d40b8a 100644 --- a/R/transformCounts.R +++ b/R/transformCounts.R @@ -565,7 +565,7 @@ setMethod("relAbundanceCounts", signature = c(x = "SummarizedExperiment"), pseudocount <- ifelse(pseudocount, min(mat[mat>0]), 0) # Report pseudocount if positive value if ( pseudocount > 0 ){ - message(paste("A pseudocount of", pseudocount, "was applied.")) + message("A pseudocount of ", pseudocount, " was applied.") } } # Give warning if pseudocount should not be added diff --git a/man/estimateDiversity.Rd b/man/estimateDiversity.Rd index bbe78e48d..ac033b912 100644 --- a/man/estimateDiversity.Rd +++ b/man/estimateDiversity.Rd @@ -241,7 +241,7 @@ library(scater) plotColData(tse, "Shannon") # ... by sample type plotColData(tse, "Shannon", "SampleType") -\dontrun{ +\donttest{ # combining different plots library(patchwork) plot_index <- c("Shannon","GiniSimpson") diff --git a/man/estimateDominance.Rd b/man/estimateDominance.Rd index 48f2b56c8..b53304c8f 100644 --- a/man/estimateDominance.Rd +++ b/man/estimateDominance.Rd @@ -187,7 +187,7 @@ colData(esophagus) # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error # gets thrown -\dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +\donttest{esophagus <- estimateDominance(esophagus, index="dbp")} # Calculates dbp and Core Abundance indices esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) # Shows all indices diff --git a/man/estimateRichness.Rd b/man/estimateRichness.Rd index 09c051792..2073d4c1f 100644 --- a/man/estimateRichness.Rd +++ b/man/estimateRichness.Rd @@ -172,7 +172,7 @@ colData(esophagus) <- NULL # Indices must be written correctly (all lowercase), otherwise an error # gets thrown -\dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +\donttest{esophagus <- estimateRichness(esophagus, index="ace")} # Calculates Chao1 and ACE indices only esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), diff --git a/man/subsampleCounts.Rd b/man/subsampleCounts.Rd index 7e9fabfc8..b93e28ed8 100644 --- a/man/subsampleCounts.Rd +++ b/man/subsampleCounts.Rd @@ -11,7 +11,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -23,7 +22,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -48,8 +46,6 @@ will be disabled.)} simulated this can equal to lowest number of total counts found in a sample or a user specified number.} -\item{seed}{A random number seed for reproducibility of sampling.} - \item{replace}{Logical Default is \code{TRUE}. The default is with replacement (\code{replace=TRUE}). See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} @@ -77,6 +73,8 @@ we include the \code{subsampleCounts} function because there may be some instances where it can be useful. Note that the output of \code{subsampleCounts} is not the equivalent as the input and any result have to be verified with the original dataset. +To maintain the reproducibility, please define the seed using set.seed() +before implement this function. } \examples{ # When samples in TreeSE are less than specified min_size, they will be removed. @@ -84,10 +82,11 @@ input and any result have to be verified with the original dataset. # they will be removed. data(GlobalPatterns) tse <- GlobalPatterns +set.seed(123) tse.subsampled <- subsampleCounts(tse, min_size = 60000, - name = "subsampled", - seed = 123) + name = "subsampled" + ) tse.subsampled dim(tse) dim(tse.subsampled) diff --git a/tests/testthat/test-8subsample.R b/tests/testthat/test-8subsample.R index de0d77304..19a703162 100644 --- a/tests/testthat/test-8subsample.R +++ b/tests/testthat/test-8subsample.R @@ -1,13 +1,13 @@ context("subsampleCounts") test_that("subsampleCounts", { - + seed = 1938 + set.seed(seed) data(GlobalPatterns, package="mia") expect_warning(tse.subsampled <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = TRUE, - seed = 1938)) + replace = TRUE)) # check class expect_s4_class(tse.subsampled, "TreeSummarizedExperiment") expect_equal(nrow(tse.subsampled), 12403) @@ -35,11 +35,12 @@ test_that("subsampleCounts", { expect_equal(unname(colSums2(assay(tse.subsampled, "subsampled"))), expColSums) # When replace = FALSE + seed = 1938 + set.seed(seed) expect_warning(tse.subsampled.rp <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = FALSE, - seed = 1938)) + replace = FALSE)) # check number of features removed is correct expnFeaturesRemovedRp <- 6731 From 266637d6047b55b8bccde68326443e40400ed02e Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Wed, 3 Apr 2024 14:11:54 +0300 Subject: [PATCH 2/4] Start using rworkflows (#508) --- .github/workflows/check-bioc-devel.yml | 286 ------------------------- .github/workflows/rworkflows.yml | 58 +++++ 2 files changed, 58 insertions(+), 286 deletions(-) delete mode 100644 .github/workflows/check-bioc-devel.yml create mode 100644 .github/workflows/rworkflows.yml diff --git a/.github/workflows/check-bioc-devel.yml b/.github/workflows/check-bioc-devel.yml deleted file mode 100644 index 8ba724667..000000000 --- a/.github/workflows/check-bioc-devel.yml +++ /dev/null @@ -1,286 +0,0 @@ -## Read more about GitHub actions the features of this GitHub Actions workflow -## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action -## -## For more details, check the biocthis developer notes vignette at -## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html -## -## You can add this workflow to other packages using: -## > biocthis::use_bioc_github_action() -## -## Using GitHub Actions exposes you to many details about how R packages are -## compiled and installed in several operating system.s -### If you need help, please follow the steps listed at -## https://github.com/r-lib/actions#where-to-find-help -## -## If you found an issue specific to biocthis's GHA workflow, please report it -## with the information that will make it easier for others to help you. -## Thank you! - -## Acronyms: -## * GHA: GitHub Action -## * OS: operating system - -on: - push: - branches: - - master - pull_request: - branches: - - master - -name: R-CMD-check-bioc-devel - -## These environment variables control whether to run GHA code later on that is -## specific to testthat, covr, and pkgdown. -## -## If you need to clear the cache of packages, update the number inside -## cache-version as discussed at https://github.com/r-lib/actions/issues/86. -## Note that you can always run a GHA test without the cache by using the word -## "/nocache" in the commit message. -env: - has_testthat: 'true' - run_covr: 'true' - run_pkgdown: 'true' - has_RUnit: 'false' - cache-version: 'cache-v1' - -jobs: - build-check: - runs-on: ${{ matrix.config.os }} - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - container: ${{ matrix.config.cont }} - ## Environment variables unique to this job. - - strategy: - fail-fast: false - matrix: - config: - - { os: ubuntu-latest, r: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - # - { os: macOS-latest, r: 'devel', bioc: 'devel'} - # - { os: windows-latest, r: 'devel', bioc: 'devel'} - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - NOT_CRAN: true - TZ: UTC - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - ## Set the R library to the directory matching the - ## R packages cache step further below when running on Docker (Linux). - - name: Set R Library home on Linux - if: runner.os == 'Linux' - run: | - mkdir /__w/_temp/Library - echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile - - ## Most of these steps are the same as the ones in - ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml - ## If they update their steps, we will also need to update ours. - - name: Checkout Repository - uses: actions/checkout@v2 - - ## R is already included in the Bioconductor docker images - - name: Setup R from r-lib - if: runner.os != 'Linux' - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - ## pandoc is already included in the Bioconductor docker images - - name: Setup pandoc from r-lib - if: runner.os != 'Linux' - uses: r-lib/actions/setup-pandoc@v2 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - shell: Rscript {0} - - - name: Cache R packages - if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" - uses: actions/cache@v1 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-new3-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-new3- - - - name: Cache R packages on Linux - if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " - uses: actions/cache@v1 - with: - path: /home/runner/work/_temp/Library - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-new3-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-new3- - - - name: Install Linux system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - sudo apt-get install -y libcurl4-openssl-dev libglpk-dev - - - name: Install macOS system dependencies - if: matrix.config.os == 'macOS-latest' - run: | - ## Enable installing XML from source if needed - brew install libxml2 - - ## Required to install magick as noted at - ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 - brew install imagemagick@6 - - - name: Install Windows system dependencies - if: runner.os == 'Windows' - run: | - ## Edit below if you have any Windows system dependencies - shell: Rscript {0} - - - name: Install BiocManager - if: matrix.config.cont == null - run: | - message(paste('****', Sys.time(), 'installing BiocManager ****')) - remotes::install_cran("BiocManager") - shell: Rscript {0} - - - name: Set BiocVersion - if: matrix.config.cont == null - run: | - BiocManager::install(version = "devel", ask = FALSE) - shell: Rscript {0} - - - name: Install dependencies - run: | - ## Try installing the package dependencies in steps. First the local - ## dependencies, then any remaining dependencies to avoid the - ## issues described at - ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html - ## https://github.com/r-lib/remotes/issues/296 - ## Ideally, all dependencies should get installed in the first pass. - - # rspm is out of date - install.packages("matrixStats", repos = "https://cran.rstudio.com/") - # BiocManager::install("adw96/breakaway") - - BiocManager::install("vegandevs/vegan") - BiocManager::install("fionarhuang/TreeSummarizedExperiment") - BiocManager::install("stan-dev/rstantools") - BiocManager::install("cran/reldist") - BiocManager::install("cran/densEstBayes") - - # For loading data in test-3agglomerate.R - remotes::install_github("microbiome/miaTime") - - # For dmm examples - remotes::install_github("LTLA/bluster") - - ## Pass #1 at installing dependencies - message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) - remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) - - ## Pass #2 at installing dependencies - message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) - remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) - - remotes::install_deps(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) - - ## For running the checks - message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) - remotes::install_cran("rcmdcheck") - BiocManager::install("BiocCheck") - shell: Rscript {0} - - - name: Install BiocGenerics - if: env.has_RUnit == 'true' - run: | - ## Install BiocGenerics - BiocManager::install("BiocGenerics") - shell: Rscript {0} - - - name: Install covr - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' - run: | - remotes::install_cran("covr") - shell: Rscript {0} - - - name: Install pkgdown - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' - run: | - remotes::install_cran("pkgdown") - shell: Rscript {0} - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - name: Run CMD check - env: - _R_CHECK_CRAN_INCOMING_: false - run: | - rcmdcheck::rcmdcheck( - args = c("--no-build-vignettes", "--no-manual", "--timings"), - build_args = c("--no-manual", "--no-resave-data"), - error_on = "warning", - check_dir = "check" - ) - shell: Rscript {0} - - ## Might need an to add this to the if: && runner.os == 'Linux' - - name: Reveal testthat details - if: env.has_testthat == 'true' - run: find . -name testthat.Rout -exec cat '{}' ';' - - - name: Run RUnit tests - if: env.has_RUnit == 'true' - run: | - BiocGenerics:::testPackage() - shell: Rscript {0} - - # - name: Run BiocCheck - # run: | - # BiocCheck::BiocCheck( - # dir('check', 'tar.gz$', full.names = TRUE), - # `quit-with-status` = TRUE, - # `no-check-R-ver` = TRUE, - # `no-check-bioc-help` = TRUE - # ) - # shell: Rscript {0} - - - name: Test coverage - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' - run: | - covr::codecov() - shell: Rscript {0} - - - name: Install package - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' - run: R CMD INSTALL . - - - name: Deploy package - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' - run: | - git config --global user.email "action@github.com" - git config --global user.name "GitHub Action" - git config --global --add safe.directory /__w/mia/mia - Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)" - shell: bash {0} - ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) - ## at least one locally before this will work. This creates the gh-pages - ## branch (erasing anything you haven't version controlled!) and - ## makes the git history recognizable by pkgdown. - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@master - with: - name: ${{ runner.os }}-biocversion-devel-r-4.3-results - path: check diff --git a/.github/workflows/rworkflows.yml b/.github/workflows/rworkflows.yml new file mode 100644 index 000000000..ce93b914d --- /dev/null +++ b/.github/workflows/rworkflows.yml @@ -0,0 +1,58 @@ +name: rworkflows +'on': + push: + branches: + - master + - main + - devel + - RELEASE_** + pull_request: + branches: + - master + - main + - devel + - RELEASE_** +jobs: + rworkflows: + permissions: + contents: write + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + strategy: + fail-fast: ${{ false }} + matrix: + config: + - os: ubuntu-latest + bioc: devel + r: auto + cont: bioconductor/bioconductor_docker:devel + rspm: https://packagemanager.rstudio.com/cran/__linux__/latest/release + - os: macOS-latest + bioc: devel + r: auto + cont: ~ + rspm: ~ + - os: windows-latest + bioc: devel + r: auto + cont: ~ + rspm: ~ + steps: + - uses: neurogenomics/rworkflows@master + with: + run_bioccheck: ${{ false }} + run_rcmdcheck: ${{ true }} + as_cran: ${{ true }} + run_vignettes: ${{ true }} + has_testthat: ${{ true }} + run_covr: ${{ true }} + run_pkgdown: ${{ true }} + has_runit: ${{ false }} + has_latex: ${{ false }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run_docker: ${{ false }} + DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} + runner_os: ${{ runner.os }} + cache_version: cache-v1 + enable_act: ${{ false }} From 32d85c6bfe311c9160a72bf718d5f36987bdc261 Mon Sep 17 00:00:00 2001 From: ake <40662956+ake123@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:29:41 +0300 Subject: [PATCH 3/4] taxonomyranks (#501) --- NAMESPACE | 3 +++ NEWS | 1 + R/loadFromMetaphlan.R | 24 +++++++++++++++++++++++- R/taxonomy.R | 40 +++++++++++++++++++++++++++++++++++++++- man/taxonomy-methods.Rd | 19 +++++++++++++++++++ 5 files changed, 85 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c1bb49521..154782d03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(getPrevalentFeatures) export(getPrevalentTaxa) export(getRareFeatures) export(getRareTaxa) +export(getTaxonomyRanks) export(getTopFeatures) export(getTopTaxa) export(getUniqueFeatures) @@ -78,6 +79,7 @@ export(runJSD) export(runNMDS) export(runOverlap) export(runUnifrac) +export(setTaxonomyRanks) export(splitByRanks) export(splitOn) export(subsampleCounts) @@ -308,6 +310,7 @@ importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) +importFrom(utils,assignInMyNamespace) importFrom(utils,combn) importFrom(utils,head) importFrom(utils,read.delim) diff --git a/NEWS b/NEWS index b052af1cd..e97cded4f 100644 --- a/NEWS +++ b/NEWS @@ -104,3 +104,4 @@ Changes in version 1.11.x + cluster: Overwrite old results instead of failing + getPrevalence: bugfix, if assay contains NA values, it does not end up to NA anymore. + getExperimentCrossCorrelation fix: enable using of sampleMap in MAE. ++ Implemented the setTaxonomyRanks function to specify which ranks are recognized as taxonomy ranks. diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R index 0db66b343..dccf00a28 100644 --- a/R/loadFromMetaphlan.R +++ b/R/loadFromMetaphlan.R @@ -89,7 +89,7 @@ NULL loadFromMetaphlan <- function( - file, colData = sample_meta, sample_meta = NULL, phy_tree = NULL, ...){ + file, colData = sample_meta, sample_meta = NULL, phy_tree = NULL,...){ ################################ Input check ################################ if(!.is_non_empty_string(file)){ stop("'file' must be a single character value.", @@ -137,6 +137,8 @@ loadFromMetaphlan <- function( altExp(tse, rank) <- se_objects[[rank]] } } + # Set taxonomy ranks using .set_taxonomy_ranks + .set_ranks_based_on_rowdata(tse,...) # Load sample meta data if it is provided if( !is.null(colData) ) { @@ -392,3 +394,23 @@ loadFromMetaphlan <- function( } return(data) } +.set_ranks_based_on_rowdata <- function(tse,...){ + # Get ranks from rowData + ranks <- colnames(rowData(tse)) + # Ranks must be character columns + is_char <- lapply(rowData(tse), function(x) is.character(x) || is.factor(x)) + is_char <- unlist(is_char) + ranks <- ranks[ is_char ] + # rowData is empty, cannot set ranks + if( length(ranks) == 0 ){ + warning( + "Ranks cannot be set. rowData(x) does not include columns ", + "specifying character values.", call. = FALSE) + return(NULL) + } + # Finally, set ranks and give message + tse <- setTaxonomyRanks(ranks) + message("TAXONOMY_RANKS set to: '", paste0(ranks, collapse = "', '"), "'") + return(NULL) +} + diff --git a/R/taxonomy.R b/R/taxonomy.R index cc8d473c1..97fc0e6ca 100644 --- a/R/taxonomy.R +++ b/R/taxonomy.R @@ -71,7 +71,8 @@ #' (default: \code{FALSE}) #' #' @param ... optional arguments not used currently. -#' +#' +#' @param ranks Avector of ranks to be set #' @details #' Taxonomic information from the \code{IdTaxa} function of \code{DECIPHER} #' package are returned as a special class. With \code{as(taxa,"DataFrame")} @@ -116,6 +117,17 @@ #' mapTaxonomy(GlobalPatterns, taxa = "Escherichia") #' # returns information on a single output #' mapTaxonomy(GlobalPatterns, taxa = "Escherichia",to="Family") +#' +#' # setTaxonomyRanks +#' tse <- GlobalPatterns +#' colnames(rowData(tse))[1] <- "TAXA1" +#' +#' setTaxonomyRanks(colnames(rowData(tse))) +#' # Taxonomy ranks set to: taxa1 phylum class order family genus species +#' +#' # getTaxonomyRanks is to get/check if the taxonomic ranks is set to "TAXA1" +#' getTaxonomyRanks() +#' NULL #' @rdname taxonomy-methods @@ -197,6 +209,32 @@ setMethod("checkTaxonomy", signature = c(x = "SummarizedExperiment"), } ) +#' @rdname taxonomy-methods +#' @importFrom utils assignInMyNamespace +#' @aliases checkTaxonomy +#' @export +# Function to set taxonomy ranks +setTaxonomyRanks <- function(ranks) { + ranks <- tolower(ranks) + # Check if rank is a character vector with length >= 1 + if (!is.character(ranks) || length(ranks) < 1 + || any(ranks == "" | ranks == " " | ranks == "\t" | ranks == "-" | ranks == "_") + || any(grepl("\\s{2,}", ranks))) { + stop("Input 'rank' should be a character vector with non-empty strings, + no spaces, tabs, hyphens, underscores, and non-continuous spaces." + , call. = FALSE) + } + #Replace default value of mia::TAXONOMY_RANKS + assignInMyNamespace("TAXONOMY_RANKS", ranks) +} + +#' @rdname taxonomy-methods +#' @export +# Function to get taxonomy ranks +getTaxonomyRanks <- function() { + return(TAXONOMY_RANKS) +} + .check_taxonomic_rank <- function(rank, x){ if(length(rank) != 1L){ stop("'rank' must be a single character value.",call. = FALSE) diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd index d224bfa10..2e9c2a6b3 100644 --- a/man/taxonomy-methods.Rd +++ b/man/taxonomy-methods.Rd @@ -10,6 +10,8 @@ \alias{taxonomyRankEmpty,SummarizedExperiment-method} \alias{checkTaxonomy} \alias{checkTaxonomy,SummarizedExperiment-method} +\alias{setTaxonomyRanks} +\alias{getTaxonomyRanks} \alias{getTaxonomyLabels} \alias{getTaxonomyLabels,SummarizedExperiment-method} \alias{mapTaxonomy} @@ -43,6 +45,10 @@ checkTaxonomy(x, ...) \S4method{checkTaxonomy}{SummarizedExperiment}(x) +setTaxonomyRanks(ranks) + +getTaxonomyRanks() + getTaxonomyLabels(x, ...) \S4method{getTaxonomyLabels}{SummarizedExperiment}( @@ -74,6 +80,8 @@ removed if \code{na.rm = TRUE} before agglomeration} \item{...}{optional arguments not used currently.} +\item{ranks}{Avector of ranks to be set} + \item{with_rank}{\code{TRUE} or \code{FALSE}: Should the level be add as a suffix? For example: "Phylum:Crenarchaeota" (default: \code{with_rank = FALSE})} @@ -170,6 +178,17 @@ mapTaxonomy(GlobalPatterns) mapTaxonomy(GlobalPatterns, taxa = "Escherichia") # returns information on a single output mapTaxonomy(GlobalPatterns, taxa = "Escherichia",to="Family") + +# setTaxonomyRanks +tse <- GlobalPatterns +colnames(rowData(tse))[1] <- "TAXA1" + +setTaxonomyRanks(colnames(rowData(tse))) +# Taxonomy ranks set to: taxa1 phylum class order family genus species + +# getTaxonomyRanks is to get/check if the taxonomic ranks is set to "TAXA1" +getTaxonomyRanks() + } \seealso{ \code{\link[=agglomerate-methods]{agglomerateByRank}}, From 1d01162117125c4f38b8fcd87b9f8ef3584f4242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9otime=20Pralas?= <151254073+thpralas@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:34:37 +0300 Subject: [PATCH 4/4] Rename cluster to addCluster (#502) --- NAMESPACE | 3 +- NEWS | 1 + R/calculateDMM.R | 166 ++++++++++++++++-------------- R/cluster.R | 19 ++-- R/deprecate.R | 36 +++++-- man/{cluster.Rd => addCluster.Rd} | 16 +-- man/deprecate.Rd | 18 ++-- pkgdown/_pkgdown.yml | 2 +- tests/testthat/test-9cluster.R | 58 +++++------ 9 files changed, 177 insertions(+), 142 deletions(-) rename man/{cluster.Rd => addCluster.Rd} (89%) diff --git a/NAMESPACE b/NAMESPACE index 154782d03..5151873c5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(IdTaxaToDataFrame) export(TAXONOMY_RANKS) export(ZTransform) +export(addCluster) export(addContaminantQC) export(addNotContaminantQC) export(addPerSampleDominantFeatures) @@ -17,7 +18,6 @@ export(calculateJSD) export(calculateNMDS) export(calculateOverlap) export(calculateUnifrac) -export(cluster) export(countDominantFeatures) export(countDominantTaxa) export(estimateDivergence) @@ -100,6 +100,7 @@ export(unsplitByRanks) export(unsplitOn) exportMethods("relabundance<-") exportMethods(ZTransform) +exportMethods(addCluster) exportMethods(addContaminantQC) exportMethods(addHierarchyTree) exportMethods(addNotContaminantQC) diff --git a/NEWS b/NEWS index e97cded4f..65af1cb98 100644 --- a/NEWS +++ b/NEWS @@ -105,3 +105,4 @@ Changes in version 1.11.x + getPrevalence: bugfix, if assay contains NA values, it does not end up to NA anymore. + getExperimentCrossCorrelation fix: enable using of sampleMap in MAE. + Implemented the setTaxonomyRanks function to specify which ranks are recognized as taxonomy ranks. ++ Rename cluster to addCluster diff --git a/R/calculateDMM.R b/R/calculateDMM.R index c73409383..179737424 100644 --- a/R/calculateDMM.R +++ b/R/calculateDMM.R @@ -104,20 +104,20 @@ NULL #' @rdname calculateDMN #' @export setGeneric("calculateDMN", signature = c("x"), - function(x, ...) - standardGeneric("calculateDMN")) + function(x, ...) + standardGeneric("calculateDMN")) #' @importFrom DirichletMultinomial dmn #' @importFrom stats runif .calculate_DMN <- function(x, k = 1, BPPARAM = SerialParam(), - seed = runif(1, 0, .Machine$integer.max), ...){ + seed = runif(1, 0, .Machine$integer.max), ...){ if(!is.numeric(k) || - length(k) == 0 || - anyNA(k) || - any(k <= 0) || - any(k != as.integer(k))){ + length(k) == 0 || + anyNA(k) || + any(k <= 0) || + any(k != as.integer(k))){ stop("'k' must be an integer vector with positive values only.", - call. = FALSE) + call. = FALSE) } # old <- getAutoBPPARAM() @@ -127,10 +127,10 @@ setGeneric("calculateDMN", signature = c("x"), bpstart(BPPARAM) on.exit(bpstop(BPPARAM), add = TRUE) } - + ans <- BiocParallel::bplapply(k, DirichletMultinomial::dmn, count = x, - seed = seed, ..., - BPPARAM = BPPARAM) + seed = seed, ..., + BPPARAM = BPPARAM) ans } @@ -141,10 +141,11 @@ setMethod("calculateDMN", signature = c(x = "ANY"), .calculate_DMN) #' @rdname calculateDMN #' @export setMethod("calculateDMN", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", - transposed = FALSE, ...){ + function(x, assay.type = assay_name, assay_name = exprs_values, + exprs_values = "counts", transposed = FALSE, ...){ .Deprecated(old="calculateDMN", new="cluster", - "Now calculateDMN is deprecated. Use cluster with DMMParam parameter instead.") + "Now calculateDMN is deprecated. + Use cluster with DMMParam parameter instead.") mat <- assay(x, assay.type) if(!transposed){ mat <- t(mat) @@ -158,7 +159,8 @@ setMethod("calculateDMN", signature = c(x = "SummarizedExperiment"), #' @export runDMN <- function(x, name = "DMN", ...){ .Deprecated(old="runDMN", new="cluster", - "Now runDMN is deprecated. Use cluster with DMMParam parameter instead.") + "Now runDMN is deprecated. + Use cluster with DMMParam parameter instead.") if(!is(x,"SummarizedExperiment")){ stop("'x' must be a SummarizedExperiment") } @@ -185,27 +187,29 @@ runDMN <- function(x, name = "DMN", ...){ .get_dmn_fit_FUN <- function(type){ type <- match.arg(type, c("laplace","AIC","BIC")) fit_FUN <- switch(type, - laplace = DirichletMultinomial::laplace, - AIC = DirichletMultinomial::AIC, - BIC = DirichletMultinomial::BIC) + laplace = DirichletMultinomial::laplace, + AIC = DirichletMultinomial::AIC, + BIC = DirichletMultinomial::BIC) fit_FUN } #' @rdname calculateDMN #' @export setGeneric("getDMN", signature = "x", - function(x, name = "DMN", ...) - standardGeneric("getDMN")) + function(x, name = "DMN", ...) + standardGeneric("getDMN")) #' @rdname calculateDMN #' @importFrom DirichletMultinomial laplace AIC BIC #' @export setMethod("getDMN", signature = c(x = "SummarizedExperiment"), - function(x, name = "DMN"){ - .Deprecated(old="getDMN", new="cluster", - "Now getDMN is deprecated. Use cluster with DMMParam parameter and full parameter set as true instead.") - .get_dmn(x, name) - } + function(x, name = "DMN"){ + .Deprecated(old="getDMN", new="cluster", + "Now getDMN is deprecated. + Use cluster with DMMParam parameter + and full parameter set as true instead.") + .get_dmn(x, name) + } ) @@ -217,41 +221,45 @@ setMethod("getDMN", signature = c(x = "SummarizedExperiment"), #' @rdname calculateDMN #' @export setGeneric("bestDMNFit", signature = "x", - function(x, name = "DMN", type = c("laplace","AIC","BIC"), ...) - standardGeneric("bestDMNFit")) + function(x, name = "DMN", type = c("laplace","AIC","BIC"), ...) + standardGeneric("bestDMNFit")) #' @rdname calculateDMN #' @importFrom DirichletMultinomial laplace AIC BIC #' @export setMethod("bestDMNFit", signature = c(x = "SummarizedExperiment"), - function(x, name = "DMN", type = c("laplace","AIC","BIC")){ - .Deprecated(old="bestDMNFit", new="cluster", - "Now bestDMNFit is deprecated. Use cluster with DMMParam parameter and full parameter set as true instead.") - # - dmn <- getDMN(x, name) - fit_FUN <- .get_dmn_fit_FUN(type) - # - .get_best_dmn_fit(dmn, fit_FUN) - } + function(x, name = "DMN", type = c("laplace","AIC","BIC")){ + .Deprecated(old="bestDMNFit", new="cluster", + "Now bestDMNFit is deprecated. + Use cluster with DMMParam parameter + and full parameter set as true instead.") + # + dmn <- getDMN(x, name) + fit_FUN <- .get_dmn_fit_FUN(type) + # + .get_best_dmn_fit(dmn, fit_FUN) + } ) #' @rdname calculateDMN #' @export setGeneric("getBestDMNFit", signature = "x", - function(x, name = "DMN", type = c("laplace","AIC","BIC"), ...) - standardGeneric("getBestDMNFit")) + function(x, name = "DMN", type = c("laplace","AIC","BIC"), ...) + standardGeneric("getBestDMNFit")) #' @rdname calculateDMN #' @importFrom DirichletMultinomial laplace AIC BIC #' @export setMethod("getBestDMNFit", signature = c(x = "SummarizedExperiment"), - function(x, name = "DMN", type = c("laplace","AIC","BIC")){ - .Deprecated(old="getBestDMNFit", new="cluster", - "Now getBestDMNFit is deprecated. Use cluster with DMMParam parameter and full parameter set as true instead.") - dmn <- getDMN(x, name) - fit_FUN <- .get_dmn_fit_FUN(type) - dmn[[.get_best_dmn_fit(dmn, fit_FUN)]] - } + function(x, name = "DMN", type = c("laplace","AIC","BIC")){ + .Deprecated(old="getBestDMNFit", new="cluster", + "Now getBestDMNFit is deprecated. + Use cluster with DMMParam parameter + and full parameter set as true instead.") + dmn <- getDMN(x, name) + fit_FUN <- .get_dmn_fit_FUN(type) + dmn[[.get_best_dmn_fit(dmn, fit_FUN)]] + } ) ################################################################################ @@ -260,8 +268,8 @@ setMethod("getBestDMNFit", signature = c(x = "SummarizedExperiment"), #' @rdname calculateDMN #' @export setGeneric("calculateDMNgroup", signature = c("x"), - function(x, ...) - standardGeneric("calculateDMNgroup")) + function(x, ...) + standardGeneric("calculateDMNgroup")) #' @importFrom DirichletMultinomial dmngroup #' @importFrom stats runif @@ -285,20 +293,20 @@ setMethod("calculateDMNgroup", signature = c(x = "ANY"), .calculate_DMNgroup) #' @rdname calculateDMN #' @export setMethod("calculateDMNgroup", signature = c(x = "SummarizedExperiment"), - function(x, variable, - assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", - transposed = FALSE, ...){ - mat <- assay(x, assay.type) - if(!transposed){ - mat <- t(mat) - } - variable <- colData(x)[,variable] - if(is.null(variable)){ - stop("No data found in '",variable,"' column of colData(x).", - call. = FALSE) - } - calculateDMNgroup(x = mat, variable = variable, ...) - } + function(x, variable, + assay.type = assay_name, assay_name = exprs_values, + exprs_values = "counts", transposed = FALSE, ...){ + mat <- assay(x, assay.type) + if(!transposed){ + mat <- t(mat) + } + variable <- colData(x)[,variable] + if(is.null(variable)){ + stop("No data found in '",variable, + "' column of colData(x).", call. = FALSE) + } + calculateDMNgroup(x = mat, variable = variable, ...) + } ) ################################################################################ @@ -307,13 +315,13 @@ setMethod("calculateDMNgroup", signature = c(x = "SummarizedExperiment"), #' @rdname calculateDMN #' @export setGeneric("performDMNgroupCV", signature = c("x"), - function(x, ...) - standardGeneric("performDMNgroupCV")) + function(x, ...) + standardGeneric("performDMNgroupCV")) #' @importFrom DirichletMultinomial cvdmngroup #' @importFrom stats runif .perform_DMNgroup_cv <- function(x, variable, k = 1, - seed = runif(1, 0, .Machine$integer.max), ...){ + seed = runif(1, 0, .Machine$integer.max), ...){ # input check if(!is.factor(variable) && is.character(variable)){ variable <- factor(variable, unique(variable)) @@ -323,7 +331,7 @@ setGeneric("performDMNgroupCV", signature = c("x"), variable <- droplevels(variable) if(is.null(names(k)) || !all(names(k) %in% levels(variable))){ stop("'k' must be named. Names must fit the levels of 'variable'.", - call. = FALSE) + call. = FALSE) } # cvdmngroup(nrow(x), x, variable, k = k, seed = seed, ...) @@ -336,18 +344,18 @@ setMethod("performDMNgroupCV", signature = c(x = "ANY"), .perform_DMNgroup_cv) #' @rdname calculateDMN #' @export setMethod("performDMNgroupCV", signature = c(x = "SummarizedExperiment"), - function(x, variable, - assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", - transposed = FALSE, ...){ - mat <- assay(x, assay.type) - if(!transposed){ - mat <- t(mat) - } - variable <- colData(x)[,variable] - if(is.null(variable)){ - stop("No data found in '",variable,"' column of colData(x).", - call. = FALSE) - } - performDMNgroupCV(x = mat, variable = variable, ...) - } + function(x, variable, + assay.type = assay_name, assay_name = exprs_values, + exprs_values = "counts", transposed = FALSE, ...){ + mat <- assay(x, assay.type) + if(!transposed){ + mat <- t(mat) + } + variable <- colData(x)[,variable] + if(is.null(variable)){ + stop("No data found in '",variable, + "' column of colData(x).", call. = FALSE) + } + performDMNgroupCV(x = mat, variable = variable, ...) + } ) diff --git a/R/cluster.R b/R/cluster.R index 3b4b51132..737053d34 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -27,11 +27,11 @@ #' By default, clustering is done on the features. #' #' @return -#' \code{cluster} returns an object of the same type as the \code{x} parameter +#' \code{addCluster} returns an object of the same type as the \code{x} parameter #' with clustering information named \code{clusters} stored in \code{colData} #' or \code{rowData}. #' -#' @name cluster +#' @name addCluster #' @export #' #' @author Basil Courbayre @@ -42,10 +42,10 @@ #' tse <- GlobalPatterns #' #' # Cluster on rows using Kmeans -#' tse <- cluster(tse, KmeansParam(centers = 3)) +#' tse <- addCluster(tse, KmeansParam(centers = 3)) #' #' # Clustering done on the samples using Hclust -#' tse <- cluster(tse, +#' tse <- addCluster(tse, #' MARGIN = "samples", #' HclustParam(metric = "bray", dist.fun = vegan::vegdist)) #' @@ -54,19 +54,20 @@ #' NULL -#' @rdname cluster +#' @rdname addCluster #' @export -setGeneric("cluster", signature = c("x"), +setGeneric("addCluster", signature = c("x"), function( x, BLUSPARAM, assay.type = assay_name, assay_name = "counts", MARGIN = "features", full = FALSE, name = "clusters", clust.col = "clusters", ...) - standardGeneric("cluster")) + standardGeneric("addCluster")) -#' @rdname cluster + +#' @rdname addCluster #' @export #' @importFrom bluster clusterRows -setMethod("cluster", signature = c(x = "SummarizedExperiment"), +setMethod("addCluster", signature = c(x = "SummarizedExperiment"), function( x, BLUSPARAM, assay.type = assay_name, assay_name = "counts", MARGIN = "features", full = FALSE, diff --git a/R/deprecate.R b/R/deprecate.R index c2cbce75c..b6aa35b9d 100644 --- a/R/deprecate.R +++ b/R/deprecate.R @@ -1,12 +1,30 @@ #' These functions will be deprecated. Please use other functions instead. #' -#' @param x a \code{\link{SummarizedExperiment}} object - -#' -#' @param ... - +#' @param x A +#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +#' object. +#' +#' @param ... Additional parameters. See dedicated function. #' #' @name deprecate NULL +#' @rdname deprecate +setGeneric("cluster", signature = c("x"), + function(x,...) + standardGeneric("cluster")) + +#' @rdname deprecate +#' @export +#' @importFrom bluster clusterRows +setMethod("cluster", signature = c(x = "SummarizedExperiment"), + function(x,...){ + .Deprecated(msg = paste0("'cluster' is deprecated. ", + "Use 'addCluster' instead.")) + addCluster(x,...) + } +) + #' @rdname deprecate setGeneric("addTaxonomyTree", signature = "x", @@ -16,10 +34,10 @@ setGeneric("addTaxonomyTree", #' @rdname deprecate #' @export setMethod("addTaxonomyTree", signature = c(x = "SummarizedExperiment"), - function(x){ - .Deprecated(msg = paste0("'addTaxonomyTree' is deprecated.", + function(x,...){ + .Deprecated(msg = paste0("'addTaxonomyTree' is deprecated. ", "Use 'addHierarchyTree' instead.")) - addHierarchyTree(x) + addHierarchyTree(x,...) } ) @@ -32,9 +50,9 @@ setGeneric("taxonomyTree", #' @rdname deprecate #' @export setMethod("taxonomyTree", signature = c(x = "SummarizedExperiment"), - function(x){ - .Deprecated(msg = paste0("'taxonomyTree' is deprecated.", + function(x,...){ + .Deprecated(msg = paste0("'taxonomyTree' is deprecated. ", "Use 'getHierarchyTree' instead.")) - getHierarchyTree(x) + getHierarchyTree(x,...) } ) diff --git a/man/cluster.Rd b/man/addCluster.Rd similarity index 89% rename from man/cluster.Rd rename to man/addCluster.Rd index 0ab9513f6..b5699c46f 100644 --- a/man/cluster.Rd +++ b/man/addCluster.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster.R -\name{cluster} -\alias{cluster} -\alias{cluster,SummarizedExperiment-method} +\name{addCluster} +\alias{addCluster} +\alias{addCluster,SummarizedExperiment-method} \title{Clustering wrapper} \usage{ -cluster( +addCluster( x, BLUSPARAM, assay.type = assay_name, @@ -17,7 +17,7 @@ cluster( ... ) -\S4method{cluster}{SummarizedExperiment}( +\S4method{addCluster}{SummarizedExperiment}( x, BLUSPARAM, assay.type = assay_name, @@ -59,7 +59,7 @@ transformation is applied sample (column) or feature (row) wise. \item{...}{Additional parameters to use altExps for example} } \value{ -\code{cluster} returns an object of the same type as the \code{x} parameter +\code{addCluster} returns an object of the same type as the \code{x} parameter with clustering information named \code{clusters} stored in \code{colData} or \code{rowData}. } @@ -83,10 +83,10 @@ data(GlobalPatterns, package = "mia") tse <- GlobalPatterns # Cluster on rows using Kmeans -tse <- cluster(tse, KmeansParam(centers = 3)) +tse <- addCluster(tse, KmeansParam(centers = 3)) # Clustering done on the samples using Hclust -tse <- cluster(tse, +tse <- addCluster(tse, MARGIN = "samples", HclustParam(metric = "bray", dist.fun = vegan::vegdist)) diff --git a/man/deprecate.Rd b/man/deprecate.Rd index 56ec43586..4c394e4fc 100644 --- a/man/deprecate.Rd +++ b/man/deprecate.Rd @@ -2,26 +2,32 @@ % Please edit documentation in R/deprecate.R \name{deprecate} \alias{deprecate} +\alias{cluster} +\alias{cluster,SummarizedExperiment-method} \alias{addTaxonomyTree} \alias{addTaxonomyTree,SummarizedExperiment-method} \alias{taxonomyTree} \alias{taxonomyTree,SummarizedExperiment-method} \title{These functions will be deprecated. Please use other functions instead.} \usage{ +cluster(x, ...) + +\S4method{cluster}{SummarizedExperiment}(x, ...) + addTaxonomyTree(x, ...) -\S4method{addTaxonomyTree}{SummarizedExperiment}(x) +\S4method{addTaxonomyTree}{SummarizedExperiment}(x, ...) taxonomyTree(x, ...) -\S4method{taxonomyTree}{SummarizedExperiment}(x) +\S4method{taxonomyTree}{SummarizedExperiment}(x, ...) } \arguments{ -\item{x}{a \code{\link{SummarizedExperiment}} object -} +\item{x}{A +\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +object.} -\item{...}{\itemize{ -\item -}} +\item{...}{Additional parameters. See dedicated function.} } \description{ These functions will be deprecated. Please use other functions instead. diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index dd53ec81a..b5dc9c0b4 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -58,7 +58,7 @@ reference: - subtitle: Clustering - contents: - calculateDMN - - cluster + - addCluster - subtitle: decontam - contents: - isContaminant diff --git a/tests/testthat/test-9cluster.R b/tests/testthat/test-9cluster.R index 035a7ca42..2319b0993 100644 --- a/tests/testthat/test-9cluster.R +++ b/tests/testthat/test-9cluster.R @@ -4,46 +4,46 @@ test_that("subsampleCounts", { data(GlobalPatterns, package="mia") # Parameters validity check - expect_error(cluster(GlobalPatterns, - KmeansParam(centers = 3), - assay.type = "error")) + expect_error(addCluster(GlobalPatterns, + KmeansParam(centers = 3), + assay.type = "error")) # Checking wrong MARGIN (char) - expect_error(cluster(GlobalPatterns, - KmeansParam(centers = 3), - MARGIN = "error")) + expect_error(addCluster(GlobalPatterns, + KmeansParam(centers = 3), + MARGIN = "error")) # Checking wrong MARGIN (number) - expect_error(cluster(GlobalPatterns, + expect_error(addCluster(GlobalPatterns, KmeansParam(centers = 3), MARGIN = 3)) - tse <- cluster(GlobalPatterns, + tse <- addCluster(GlobalPatterns, KmeansParam(centers = 3), name = "custommetadata", full = TRUE, clust.col = "customdataname") altExp(tse, "test") <- tse[1:1000,] # Checking same name that is already present - expect_warning(cluster(tse, + expect_warning(addCluster(tse, KmeansParam(centers = 3), name = "custommetadata", full = TRUE)) # Checking wrong clust.col with already-present name - expect_warning(cluster(tse, + expect_warning(addCluster(tse, KmeansParam(centers = 3), clust.col = "customdataname")) # Checking wrong altexp - expect_error(cluster(tse, - KmeansParam(centers = 3), - altexp = "error")) + expect_error(addCluster(tse, + KmeansParam(centers = 3), + altexp = "error")) # Parameters check tse <- GlobalPatterns altExp(tse, "test") <- tse[1:1000,] - tse <- cluster(tse, + tse <- addCluster(tse, KmeansParam(centers = 3), name = "custommetadata", full = TRUE, clust.col = "customdataname") - tse <- cluster(tse, + tse <- addCluster(tse, KmeansParam(centers = 3), name = "custommetadata", full = TRUE, @@ -56,14 +56,14 @@ test_that("subsampleCounts", { expect_true("customdataname" %in% names(rowData(altExp(tse, "test")))) # Checking existing custom metadata/dataname in main/altExp - expect_warning(cluster(tse, + expect_warning(addCluster(tse, KmeansParam(centers = 3), name = "custommetadata", full = TRUE)) - expect_warning(cluster(tse, + expect_warning(addCluster(tse, KmeansParam(centers = 3), clust.col = "customdataname")) - expect_warning(cluster(tse, + expect_warning(addCluster(tse, KmeansParam(centers = 3), name = "custommetadata", full = TRUE, @@ -72,23 +72,23 @@ test_that("subsampleCounts", { # Checking working MARGIN tse <- GlobalPatterns altExp(tse, "test") <- tse[1:1000,] - tse <- cluster(tse, - KmeansParam(centers = 3), - MARGIN = "col") - tse <- cluster(tse, - KmeansParam(centers = 3), - altexp = "test", - MARGIN = 2) + tse <- addCluster(tse, + KmeansParam(centers = 3), + MARGIN = "col") + tse <- addCluster(tse, + KmeansParam(centers = 3), + altexp = "test", + MARGIN = 2) expect_true("clusters" %in% names(colData(tse))) expect_true("clusters" %in% names(colData(altExp(tse, "test")))) # Checking wrapper operational tse <- GlobalPatterns altExp(tse, "test") <- tse[1:2000,] - tse <- cluster(tse, + tse <- addCluster(tse, HclustParam(), MARGIN = "col") - tse <- cluster(tse, + tse <- addCluster(tse, HclustParam(), MARGIN = "row", altexp = "test", @@ -100,8 +100,8 @@ test_that("subsampleCounts", { expect_identical(expectedCol, colData(tse)$clusters) # Checking same output on rows expect_identical(expectedRow$clusters, - rowData(altExp(tse, "test"))$clusters) + rowData(altExp(tse, "test"))$clusters) # Checking same metdata output on rows expect_identical(expectedRow$objects, - metadata(altExp(tse, "test"))$clusters) + metadata(altExp(tse, "test"))$clusters) })