Skip to content

Commit

Permalink
Merge pull request #141 from gdrplatform/GDR-2801
Browse files Browse the repository at this point in the history
feat: make split_SE_components working correctly for sa assay data, m…
  • Loading branch information
gladkia authored Dec 17, 2024
2 parents 3650f71 + d735bc0 commit 10a0dcc
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 13 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gDRutils
Type: Package
Title: A package with helper functions for processing drug response data
Version: 1.5.4
Date: 2024-12-09
Version: 1.5.5
Date: 2024-12-10
Authors@R: c(person("Bartosz", "Czech", role=c("aut"),
comment = c(ORCID = "0000-0002-9908-3007")),
person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="[email protected]",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRutils 1.5.5 - 2024-12-10
* make split_SE_components working correctly for sa assay data, modified with `avearge_biological_duplicates`

## gDRutils 1.5.4 - 2024-12-09
* minor improvement in the logic of `average_biological_replicates` (new blacklisted column)

Expand Down
11 changes: 8 additions & 3 deletions R/fit_curves.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ fit_curves <- function(df_,
if (length(setdiff(opt_fields, colnames(df_))) > 0L) {
df_[, setdiff(opt_fields, colnames(df_))] <- NA
}
df_metrics <- .apllyLogisticFit(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit,
df_metrics <- .applyLogisticFit(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit,
pcutoff, cap, n_point_cutoff)

is_unique_normalization_type_and_fit_source <-
Expand All @@ -94,7 +94,7 @@ fit_curves <- function(df_,
}

#' @keywords internal
.apllyLogisticFit <- function(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit,
.applyLogisticFit <- function(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit,
pcutoff, cap, n_point_cutoff) {

df_metrics <- NULL
Expand Down Expand Up @@ -487,7 +487,12 @@ logistic_metrics <- function(c, x_metrics) {
#' @keywords fit_curves
#' @export
.setup_metric_output <- function() {
resp_metric_cols <- c(get_header("response_metrics"), "maxlog10Concentration", "N_conc")
resp_metric_all_cols <- c(get_header("response_metrics"), "maxlog10Concentration", "N_conc")
# remove cols ending with "_sd"
# they are not present in the primary assays
# but only with the assays followed by averaging of biological replicates
resp_metric_cols <- resp_metric_all_cols[!endsWith(resp_metric_all_cols, "_sd")]

out <- as.list(rep(NA, length(resp_metric_cols)))
names(out) <- resp_metric_cols
out
Expand Down
4 changes: 3 additions & 1 deletion R/flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ flatten <- function(tbl, groups, wide_cols, sep = "_") {
uniquifying <- unique(uniquifying)

out <- split(subset(tbl, select = -idx), subset(tbl, select = idx), sep = sep)
missing <- setdiff(wide_cols, colnames(tbl))

# in original assays there are no columns with SD-related data (with names ending with "_sd")
missing <- setdiff(wide_cols[!grepl("_sd$", wide_cols)], colnames(tbl))
if (length(missing) != 0L) {
warning(sprintf("missing listed wide_cols columns: '%s'", paste0(missing, collapse = ", ")))
}
Expand Down
48 changes: 44 additions & 4 deletions R/headers_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@

HEADERS_LIST[["metrics_results"]] <- c(
"maxlog10Concentration",
"maxlog10Concentration_sd",
"N_conc",
"N_conc_sd",
"cotrt_value",
"source",
"count",
HEADERS_LIST[["response_metrics"]],
as.character(HEADERS_LIST[["metrics_names"]])
)
Expand Down Expand Up @@ -112,7 +115,11 @@
"x",
"x_std",
"std_RelativeViability",
"std_GRvalue"
"std_GRvalue",
# after averaging for biological replicates
"count",
"x_sd",
"x_std_sd"
)
}

Expand All @@ -132,7 +139,18 @@
"p_value",
"rss",
"x_sd_avg",
"fit_type"
"fit_type",
"x_mean_sd",
"x_AOC_sd",
"x_AOC_range_sd",
"xc50_sd",
"x_max_sd",
"ec50_sd",
"x_inf_sd",
"x_0_sd",
"h_sd",
"r2_sd",
"x_sd_avg_sd"
)
}

Expand All @@ -153,7 +171,18 @@
"RV_p_value",
"RV_rss",
"RV_sd_avg",
"fit_type_RV"
"fit_type_RV",
"RV_mean_sd",
"RV_AOC_sd",
"RV_AOC_range_sd",
"IC50_sd",
"E_max_sd",
"EC50_sd",
"E_inf_sd",
"E_0_sd",
"h_RV_sd",
"RV_r2_sd",
"RV_sd_avg_sd"
),
GR = c(
"GR_mean",
Expand All @@ -169,7 +198,18 @@
"GR_p_value",
"GR_rss",
"GR_sd_avg",
"fit_type_GR"
"fit_type_GR",
"GR_mean_sd",
"GR_AOC_sd",
"GR_AOC_range_sd",
"GR50_sd",
"GR_max_sd",
"GEC50_sd",
"GR_inf_sd",
"GR_0_sd",
"h_GR_sd",
"GR_r2_sd",
"GR_sd_avg_sd"
)
)
}
Expand Down
5 changes: 2 additions & 3 deletions R/split_SE_components.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,11 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) {
df_ <- S4Vectors::DataFrame(df_, check.names = FALSE)
all_cols <- colnames(df_)
# Identify known data fields.
data_fields <- c(get_header("raw_data"), get_header("normalized_results"),
data_fields <- unique(c(get_header("raw_data"), get_header("normalized_results"),
get_header("averaged_results"),
get_header("metrics_results"), get_env_identifiers("concentration", simplify = TRUE),
identifiers_md$well_position, identifiers_md$template, nested_keys,
get_header("scores"), get_header("excess"), get_header("isobolograms"))
data_fields <- unique(data_fields)
get_header("scores"), get_header("excess"), get_header("isobolograms")))
data_cols <- data_fields[data_fields %in% all_cols]
md_cols <- setdiff(all_cols, data_cols)
md <- unique(df_[, md_cols])
Expand Down

0 comments on commit 10a0dcc

Please sign in to comment.