Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gdr 2224 #98

Merged
merged 30 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b608f46
fix: bug with wrong refining rowdata
bczech Nov 14, 2023
b567cbe
chore: bump version and update NEWS.md
bczech Nov 15, 2023
1f16cb4
refactor: get rid of transforming rowData into S4Vectors
bczech Nov 15, 2023
e2d2410
test: fix unit tests
bczech Nov 20, 2023
d06c76d
refactor: add more headers
bczech Nov 24, 2023
dca7dbc
refactor: fix typo
bczech Nov 29, 2023
f799317
refactor: move average biological replicates function
bczech Nov 29, 2023
9d70c72
refactor: move functions for supporting additional metadata
bczech Dec 1, 2023
8ff9d41
chore: bump version
bczech Dec 1, 2023
38231f7
test: fix unit tests
bczech Dec 1, 2023
1328469
test: update unit tests
bczech Dec 1, 2023
8f20b9f
refactor: add missing asserts
bczech Dec 1, 2023
01444b9
doc: reoxygenate
bczech Dec 1, 2023
ff6992a
refactor: fix averaging
bczech Dec 1, 2023
dc76673
refactor: add support for ids
bczech Dec 1, 2023
2e88c68
refactor: extend the list of metrics for averaging
bczech Dec 4, 2023
32b9f95
refactor: update headers
bczech Dec 4, 2023
5545967
refactor: update logic for averaging
bczech Dec 4, 2023
5f1a607
test: fix unit tests
bczech Dec 4, 2023
8aab64c
test: add unit tests
bczech Dec 4, 2023
2ed154a
test: add unit tests
bczech Dec 4, 2023
8e37463
test: fix unit tests
bczech Dec 5, 2023
2efbf46
test: update unit tests
bczech Dec 5, 2023
73667aa
refactor: add missing namespace
bczech Dec 5, 2023
2356975
ci: trigger build
bczech Dec 5, 2023
92adbb7
refactor: move function
bczech Dec 6, 2023
3ac0dc8
refactor: update namespace
bczech Dec 6, 2023
cd0591d
test: update unit test
bczech Dec 6, 2023
5f2920b
doc: add missing examples
bczech Dec 6, 2023
42a60f4
doc: reoxygenate
bczech Dec 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gDRutils
Type: Package
Title: A package with helper functions for processing drug response data
Version: 1.1.1
Date: 2023-11-22
Version: 1.1.2
Date: 2023-12-01
Authors@R: c(person("Bartosz", "Czech", role=c("aut")),
person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="[email protected]"),
person("Aleksander", "Chlebowski", role=c("aut")),
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
# Generated by roxygen2: do not edit by hand

S3method(modifyData,data_source)
S3method(modifyData,default)
S3method(modifyData,drug_name2)
export(.calculate_complement)
export(.clean_key_inputs)
export(.set_constant_fit_params)
export(.set_invalid_fit_params)
export(.set_mean_params)
export(.setup_metric_output)
export(MAEpply)
export(addClass)
export(aggregate_assay)
export(apply_bumpy_function)
export(assert_choices)
export(average_biological_replicates_dt)
export(cap_xc50)
export(convert_combo_data_to_dt)
export(convert_mae_assay_to_dt)
Expand All @@ -36,6 +41,7 @@ export(get_combo_base_assay_names)
export(get_combo_col_settings)
export(get_combo_score_assay_names)
export(get_default_identifiers)
export(get_duplicated_rows)
export(get_env_assay_names)
export(get_env_identifiers)
export(get_expect_one_identifiers)
Expand All @@ -59,6 +65,7 @@ export(mcolData)
export(merge_SE)
export(merge_assay)
export(merge_metadata)
export(modifyData)
export(mrowData)
export(predict_conc_from_efficacy)
export(predict_efficacy_from_conc)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.1.2 (2023-12-01)
- fix bug with refining rowData
- extend the list of headers

## 1.1.1 (2023-11-22)
- sync master with devel branch
- update schema to support NA in reference division time
Expand Down
35 changes: 31 additions & 4 deletions R/headers_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
normalized_results = .getNormalizedResultsList(),
averaged_results = .getAveragedResultsList(),
response_metrics = .getResponseMetricsList(),
metric_average_filds = .getMetricAverageFilds(),
metric_average_fields = .getMetricAverageFields(),
# corresponds to the field "celllinename", "primarytissue", "doublingtime" from gneDB CLIDs
add_clid = get_env_identifiers(c("cellline_name", "cellline_tissue",
"cellline_parental_identifier",
Expand Down Expand Up @@ -42,6 +42,7 @@
get_env_identifiers("drug_name", simplify = TRUE),
get_env_identifiers("masked_tag", simplify = TRUE),
paste0(get_env_identifiers("drug_name", simplify = TRUE), "_", 2:10),
paste0(get_env_identifiers("drug_moa", simplify = TRUE), "_", 2:10),
HEADERS_LIST[["raw_data"]],
HEADERS_LIST[["normalized_results"]],
HEADERS_LIST[["averaged_results"]],
Expand All @@ -51,6 +52,26 @@

HEADERS_LIST[["ordered_1"]] <- .orderHeaderList(HEADERS_LIST, 1)
HEADERS_LIST[["ordered_2"]] <- .orderHeaderList(HEADERS_LIST, 2)

HEADERS_LIST[["id"]] <- c("rId", "cId")


HEADERS_LIST[["iso_position"]] <- c("iso_level",
"pos_x",
"pos_y",
"pos_x_ref",
"pos_y_ref")


HEADERS_LIST[["isobolograms"]] <- c("normalization_type",
HEADERS_LIST[["iso_position"]],
"log2_CI",
"log10_ratio_conc")

HEADERS_LIST[["obsolete"]] <- c("RV",
"GR",
"Excess")


HEADERS_LIST
}
Expand Down Expand Up @@ -83,7 +104,9 @@
.getAveragedResultsList <- function() {
c(
"x",
"x_std"
"x_std",
"std_RelativeViability",
"std_GRvalue"
)
}

Expand Down Expand Up @@ -140,7 +163,7 @@
}

#' @keywords internal
.getMetricAverageFilds <- function() {
.getMetricAverageFields <- function() {
list(
mean = c(
"x_mean",
Expand All @@ -152,7 +175,11 @@
),
geometric_mean = c(
"xc50",
"ec50"
"ec50",
"GR50",
"GEC50",
"IC50",
"EC50"
)
)
}
Expand Down
199 changes: 199 additions & 0 deletions R/manage_additional_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#' add arbitrary S3 class to an object
#'
#' Modify and object's \code{class} attribute.
#'
#' This is a simple convenience function that an item to the \code{class} attribute of an object
#' so that it can be dispatched to a proper S3 method. This is purely for code clarity,
#' so that individual methods do not clutter the definitions of higher order functions.
#'
#' @param x an object
#' @param newClass character string; class to be added
#'
#' @return The same object with an added S3 class.
#'
#' @examples
#' addClass(data.table::data.table(), "someClass")
#'
#' @export
#'
addClass <- function(x, newClass) {
checkmate::assert_string(newClass)
if (!is(x, newClass)) {
class(x) <- c(newClass, class(x))
}
return(x)
}

#' modify assay with additional data
#'
#' Reduce dimensionality of an assay by dropping extra data or combining variables.
#'
#' If an essay extracted from a \code{SummarizedExperiment} contains additional information,
#' i.e. factors beyond \code{DrugName} and \code{CellLineName}, that information will be treated
#' in one of three ways, depending on the value of \code{option}:
#'
#' \itemize{
#' \item{\code{drop}: Some information will be discarded and only one value
#' of the additional variable (chosen by the user) will be kept.
#' }
#' \item{\code{toDrug}: The information is pasted together with the primary drug name.
#' All observations are kept.
#' }
#' \item{\code{toCellLine}: As above, but pasting is done with cell line name.
#' }

#' }
#'
#' Depending on the type of additional information, the exact details will differ.
#' This is handled in the app by adding special classes to the data tables and dispatching to S3 methods.
#'
#' Following modification, the additional columns are discarded.
#'
#' @param x a \code{data.table} containing an assay
#' @param ... additional arguments passed to methods
#' @param option character string specifying the action to be taken, see \code{Details}
#' @param keep character string specifying the value of the active variable that will be kept
#'
#' @examples
#' dt <- data.table::data.table(a = as.character(1:10), b = "data")
#' dt <- addClass(dt, "a")
#' modifyData(dt, "average", keep = "b")
#'
#' @export
#'
modifyData <- function(x, ...) {
UseMethod("modifyData")
}

#' @export
#' @describeIn modifyData includes the name and concentration of the second drug
modifyData.drug_name2 <- function(x, option, keep, ...) {
checkmate::assert_data_table(x)
checkmate::assert_string(option)
checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
checkmate::assert_string(keep, null.ok = TRUE)

pidfs <- get_prettified_identifiers(simplify = TRUE)
drug_name <- pidfs[["drug_name"]]
drug_name2 <- pidfs[["drug_name2"]]
conc2 <- pidfs[["concentration2"]]
drug2 <- pidfs[["drug2"]]
cell_name <- pidfs[["cellline_name"]]

if (option == "average") {
# drop data and keep only the requested value
x <- average_biological_replicates_dt(x, drug_name2)
} else {
# ensure concentration of co-drug is a numeric value
if (is.factor(x[[conc2]])) {
x[[conc2]] <- as.character(x[[conc2]])
}
if (is.character(x[[conc2]])) {
x[[conc2]] <- as.numeric(x[[conc2]])
}
if (option == "toDrug") {
x[[drug_name]] <-
sprintf("%s (%s = %s at %.4f &mu;M)", x[[drug_name]], drug_name2, x[[drug_name2]], x[[conc2]])
x[[drug_name]] <-
sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[drug_name]])
} else if (option == "toCellLine") {
x[[cell_name]] <-
sprintf("%s (%s = %s at %.4f &mu;M)", x[[cell_name]], drug_name2, x[[drug_name2]], x[[conc2]])
x[[cell_name]] <-
sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[cell_name]])
}
}

# drop the additional columns
x[c(drug_name2, conc2, drug2)] <- NULL
# remove special class
class(x) <- setdiff(class(x), "drug_name2")
return(x)
}

#' @export
#' @describeIn modifyData includes the data source
modifyData.data_source <- function(x, option, keep, ...) {
checkmate::assert_data_table(x)
checkmate::assert_string(option)
checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
checkmate::assert_string(keep, null.ok = TRUE)

pidfs <- get_prettified_identifiers(simplify = TRUE)
dt_src <- pidfs[["data_source"]]
drug <- pidfs[["drug_name"]]
clid <- pidfs[["cellline"]]
cl_name <- pidfs[["cellline_name"]]

if (option == "average") {
# drop data and keep only the requested value
x <- average_biological_replicates_dt(x, dt_src)
} else {
duplicated_rows <- get_duplicated_rows(x, c(drug, clid))
if (option == "toDrug") {
drugs_to_combine <- unique(x[duplicated_rows, drug])
drug_idx <- which(x[[drug]] %in% drugs_to_combine)
drug_to_replace <- x[drug_idx, drug]
x[drug_idx, drug] <-
vapply(
seq_len(length(drug_to_replace)),
function(y) sprintf("%s (%s)", drug_to_replace[y], x[, dt_src][y]), "string")
} else if (option == "toCellLine") {
cell_lines_to_combine <- unique(x[duplicated_rows, cl_name])
cell_line_idx <- which(x[[cl_name]] %in% cell_lines_to_combine)
x[cell_line_idx, cl_name] <-
sprintf("%s (%s)", x[cell_line_idx, cl_name], x[cell_line_idx, dt_src])
}
}
# drop the additional columns
x[, c(dt_src) := NULL]
# remove special class
class(x) <- setdiff(class(x), dt_src)
return(x)
}


#' @export
#' @describeIn modifyData includes the name of other additional variables
modifyData.default <- function(x, option, keep, ...) {
checkmate::assert_data_table(x)
checkmate::assert_string(option)
checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
checkmate::assert_string(keep, null.ok = TRUE)
pidfs <- get_prettified_identifiers(simplify = TRUE)
additional_var_names <- class(x)[[1]]
additional_var <- ifelse(additional_var_names %in% names(pidfs),
pidfs[[additional_var_names]],
additional_var_names)
cell_name <- pidfs[["cellline_name"]]
drug_name <- pidfs[["drug_name"]]

if (option == "average") {
# drop data and keep only the requested value
x <- average_biological_replicates_dt(x, additional_var)
} else {
if (option == "toDrug") {
x <- modify_label(x, drug_name, additional_var)
} else if (option == "toCellLine") {
x <- modify_label(x, cell_name, additional_var)
}
}
# drop the additional columns
if ("data.table" %in% class(x)) {
x[[additional_var]] <- NULL
} else {
x[additional_var] <- NULL
}
# remove special class
class(x) <- setdiff(class(x), additional_var_names)
return(x)
}

#' @keywords internal
modify_label <- function(x, option, var_name) {
bczech marked this conversation as resolved.
Show resolved Hide resolved
x[[option]] <-
sprintf("%s (%s = %s)", x[[option]], var_name, as.character(x[[var_name]]))
x[[option]] <-
sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[option]])
x
}
7 changes: 5 additions & 2 deletions R/standardize_MAE.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ rename_bumpy <- function(bumpy, mapping_vector) {
#' @return a charvec containing the names of the optional identifiers in the SE colData
#'
get_optional_coldata_fields <- function(se) {
checkmate::assert_class(se, "SummarizedExperiment")
idfs <- get_SE_identifiers(se)

as.character(idfs["cellline_tissue"])
Expand All @@ -197,14 +198,16 @@ get_optional_coldata_fields <- function(se) {
#' @return a charvec containing the names of the optional identifiers in the SE rowData
#'
get_optional_rowdata_fields <- function(se) {
bczech marked this conversation as resolved.
Show resolved Hide resolved
checkmate::assert_class(se, "SummarizedExperiment")
idfs <- get_SE_identifiers(se)
rowdata <- SummarizedExperiment::rowData(se)

out <- c(idfs["drug_moa"])

if (!is.null(idfs["drug2"])) {
if (!is.null(rowdata[[idfs[["drug2"]]]])) {
out <- c(out, idfs["drug_moa2"])
}
if (!is.null(idfs["drug3"])) {
if (!is.null(rowdata[[idfs[["drug3"]]]])) {
out <- c(out, idfs["drug_moa3"])
}

Expand Down
Loading
Loading