Skip to content

Commit

Permalink
Merge pull request #128 from gdrplatform/GDR-2558.2
Browse files Browse the repository at this point in the history
Gdr 2558.2
  • Loading branch information
bczech authored Aug 20, 2024
2 parents 79ba8c0 + 65ddfe3 commit 9d7cbf2
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 20 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gDRutils
Type: Package
Title: A package with helper functions for processing drug response data
Version: 1.3.8
Date: 2024-08-06
Version: 1.3.9
Date: 2024-08-14
Authors@R: c(person("Bartosz", "Czech", role=c("aut"),
comment = c(ORCID = "0000-0002-9908-3007")),
person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="[email protected]",
Expand Down Expand Up @@ -60,7 +60,7 @@ biocViews: Software, Infrastructure
VignetteBuilder: knitr
ByteCompile: TRUE
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
SwitchrLibrary: gDRutils
DeploySubPath: gDRutils
Encoding: UTF-8
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(aggregate_assay)
export(apply_bumpy_function)
export(assert_choices)
export(average_biological_replicates_dt)
export(calc_sd)
export(capVals)
export(cap_xc50)
export(convert_colData_to_json)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRutils 1.3.9 - 2024-08-14
* extend the logic of `average_biological_replicates_dt` to calculate standard deviation

## gDRutils 1.3.8 - 2024-08-06
* add functions for setting unique identifiers in the colData and rowData of SE

Expand Down
3 changes: 2 additions & 1 deletion R/packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ if (getRversion() >= "2.15.1") {
"cId",
"concs",
"type",
"name"
"name",
"count"
),
utils::packageName())
}
58 changes: 45 additions & 13 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,6 @@ geometric_mean <- function(x, fixed = TRUE, maxlog10Concentration = 1) {
exp(mean(log(x)))
}

#' Average biological replicates.
#'
#' Average biological replicates on the data table side.
#'
#' @param dt data.table with Metric data
Expand All @@ -435,6 +433,7 @@ geometric_mean <- function(x, fixed = TRUE, maxlog10Concentration = 1) {
#' @param fixed Flag indicating whether to add a fix for -Inf in the geometric mean.
#' @param geometric_average_fields Character vector of column names in \code{dt}
#' to take the geometric average of.
#' @param add_sd Flag indicating whether to add standard deviation and count columns.
#'
#' @examples
#' dt <- data.table::data.table(a = c(1:10, 1),
Expand All @@ -449,28 +448,39 @@ average_biological_replicates_dt <- function(
var,
prettified = FALSE,
fixed = TRUE,
geometric_average_fields = get_header("metric_average_fields")$geometric_mean) {
geometric_average_fields = get_header("metric_average_fields")$geometric_mean,
add_sd = FALSE) {

data <- data.table::copy(dt)

if (prettified) {
pidfs <- get_prettified_identifiers()
iso_cols <- prettify_flat_metrics(get_header("iso_position"),
human_readable = TRUE)
iso_cols <- prettify_flat_metrics(get_header("iso_position"), human_readable = TRUE)
id_cols <- prettify_flat_metrics(get_header("id"), human_readable = TRUE)

} else {
pidfs <- get_env_identifiers()
iso_cols <- get_header("iso_position")
id_cols <- prettify_flat_metrics(get_header("id"))
}

average_fields <- setdiff(names(Filter(is.numeric, data)), c(unlist(pidfs),
var,
iso_cols))
average_fields <- setdiff(names(Filter(is.numeric, data)), c(unlist(pidfs), var, iso_cols))
geometric_average_fields <- intersect(geometric_average_fields, names(dt))
group_by <- setdiff(names(data),
c(average_fields, var, id_cols))
group_by <- grep("Fit Type", group_by, invert = TRUE, value = TRUE)
group_by <- setdiff(names(data), c(average_fields, var, id_cols, "fit_type", "Fit Type"))

if (add_sd) {
# Calculate standard deviation for both average_fields and geometric_average_fields
sd_fields <- paste0(average_fields, "_sd")
geom_sd_fields <- paste0(geometric_average_fields, "_sd")

data <- data[, (sd_fields) := lapply(.SD, calc_sd),
.SDcols = average_fields, by = group_by]
data <- data[, (geom_sd_fields) := lapply(.SD, calc_sd),
.SDcols = geometric_average_fields, by = group_by]

# Calculate count and add as a single column
data <- data[, count := .N, by = group_by]
}

data <- data[, (var) := NULL][,
(average_fields) := lapply(.SD, mean, na.rm = TRUE),
.SDcols = average_fields,
Expand All @@ -480,9 +490,9 @@ average_biological_replicates_dt <- function(
}),
.SDcols = geometric_average_fields,
by = group_by]

unique(data, by = group_by)
}

#' Helper function to find duplicated rows
#'
#' @param x data frame
Expand Down Expand Up @@ -733,3 +743,25 @@ get_additional_variables <- function(dt_list,
}
}

#' Calculate Standard Deviation or Return Zero
#'
#' This function calculates the standard deviation of a numeric vector.
#' If the vector has a length of 1 and it is numeric, it returns 0.
#'
#' @param x A numeric vector.
#' @return The standard deviation of the vector if its length is greater than 1 or it is not numeric, otherwise 0.
#' @examples
#' calc_sd(c(1, 2, 3, 4, 5)) # Should return the standard deviation
#' calc_sd(c(1)) # Should return 0
#' calc_sd(numeric(0)) # Should return NA
#' calc_sd(c("a", "b", "c")) # Should return NA
#' @keywords package_utils
#' @export
calc_sd <- function(x) {
if (length(x) == 1 && is.numeric(x) && !is.na(x)) {
return(0)
} else {
return(stats::sd(x, na.rm = TRUE))
}
}

7 changes: 5 additions & 2 deletions man/average_biological_replicates_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/calc_sd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ test_that("average_biological_replicates_dt works as expected", {
expect_equal(dim(metrics_data), c(60, 29))
expect_equal(dim(avg_metrics_data), c(40, 28))
expect_true(!"Ligand" %in% names(avg_metrics_data))

avg_metrics_data2 <- average_biological_replicates_dt(dt = metrics_data,
var = "Ligand",
prettified = TRUE,
add_sd = TRUE)

expect_equal(dim(avg_metrics_data2), c(40, 44))
expect_equal(sum(grepl("_sd", names(avg_metrics_data2))), 15)
expect_true("count" %in% names(avg_metrics_data2))
})

test_that("get_duplicated_rows works as expected", {
Expand Down Expand Up @@ -557,4 +566,15 @@ test_that("capVals works as expected", {
expect_equal(dt1c, dt2)

expect_error(capVals(as.list(dt1)), "Must be a data.table")
})
})

test_that("calc_sd works as expected", {
expect_equal(calc_sd(c(1, 2, 3, 4, 5)), sd(c(1, 2, 3, 4, 5), na.rm = TRUE))
expect_equal(calc_sd(c(10, 20, 30)), sd(c(10, 20, 30), na.rm = TRUE))
expect_equal(calc_sd(c(1)), 0)
expect_true(is.na(calc_sd("2")))
expect_true(is.na(calc_sd(TRUE)))
expect_true(is.na(calc_sd(numeric(0))))
expect_equal(calc_sd(c(1, 2, NA, 4, 5)), sd(c(1, 2, NA, 4, 5), na.rm = TRUE))
expect_true(is.na(calc_sd(c(NA, NA, NA))))
})

0 comments on commit 9d7cbf2

Please sign in to comment.