gdrplatform · bczech · Dec 8, 2023 · Nov 14, 2023 · Nov 15, 2023 · Nov 15, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: gDRutils
 Type: Package
 Title: A package with helper functions for processing drug response data
-Version: 1.1.1
-Date: 2023-11-22
+Version: 1.1.2
+Date: 2023-12-01
 Authors@R: c(person("Bartosz", "Czech", role=c("aut")),
              person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="[email protected]"),
              person("Aleksander", "Chlebowski", role=c("aut")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,15 +1,20 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(modifyData,data_source)
+S3method(modifyData,default)
+S3method(modifyData,drug_name2)
 export(.calculate_complement)
 export(.clean_key_inputs)
 export(.set_constant_fit_params)
 export(.set_invalid_fit_params)
 export(.set_mean_params)
 export(.setup_metric_output)
 export(MAEpply)
+export(addClass)
 export(aggregate_assay)
 export(apply_bumpy_function)
 export(assert_choices)
+export(average_biological_replicates_dt)
 export(cap_xc50)
 export(convert_combo_data_to_dt)
 export(convert_mae_assay_to_dt)
@@ -36,6 +41,7 @@ export(get_combo_base_assay_names)
 export(get_combo_col_settings)
 export(get_combo_score_assay_names)
 export(get_default_identifiers)
+export(get_duplicated_rows)
 export(get_env_assay_names)
 export(get_env_identifiers)
 export(get_expect_one_identifiers)
@@ -59,6 +65,7 @@ export(mcolData)
 export(merge_SE)
 export(merge_assay)
 export(merge_metadata)
+export(modifyData)
 export(mrowData)
 export(predict_conc_from_efficacy)
 export(predict_efficacy_from_conc)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+## 1.1.2 (2023-12-01)
+- fix bug with refining rowData
+- extend the list of headers
+
 ## 1.1.1 (2023-11-22)
 - sync master with devel branch
 - update schema to support NA in reference division time

diff --git a/R/headers_list.R b/R/headers_list.R
@@ -9,7 +9,7 @@
     normalized_results = .getNormalizedResultsList(),
     averaged_results = .getAveragedResultsList(),
     response_metrics = .getResponseMetricsList(),
-    metric_average_filds = .getMetricAverageFilds(),
+    metric_average_fields = .getMetricAverageFields(),
     # corresponds to the field "celllinename", "primarytissue", "doublingtime" from gneDB CLIDs
     add_clid = get_env_identifiers(c("cellline_name", "cellline_tissue",
                                      "cellline_parental_identifier",
@@ -42,6 +42,7 @@
     get_env_identifiers("drug_name", simplify = TRUE),
     get_env_identifiers("masked_tag", simplify = TRUE),
     paste0(get_env_identifiers("drug_name", simplify = TRUE), "_", 2:10),
+    paste0(get_env_identifiers("drug_moa", simplify = TRUE), "_", 2:10),
     HEADERS_LIST[["raw_data"]],
     HEADERS_LIST[["normalized_results"]],
     HEADERS_LIST[["averaged_results"]],
@@ -51,6 +52,26 @@
 
   HEADERS_LIST[["ordered_1"]] <- .orderHeaderList(HEADERS_LIST, 1)
   HEADERS_LIST[["ordered_2"]] <- .orderHeaderList(HEADERS_LIST, 2)
+
+  HEADERS_LIST[["id"]] <- c("rId", "cId")
+
+
+  HEADERS_LIST[["iso_position"]] <- c("iso_level",
+                                      "pos_x",
+                                      "pos_y",
+                                      "pos_x_ref",
+                                      "pos_y_ref")
+
+
+  HEADERS_LIST[["isobolograms"]] <- c("normalization_type",
+                                      HEADERS_LIST[["iso_position"]],
+                                      "log2_CI",
+                                      "log10_ratio_conc")
+
+  HEADERS_LIST[["obsolete"]] <- c("RV",
+                                  "GR",
+                                  "Excess")
+
 
   HEADERS_LIST
 }
@@ -83,7 +104,9 @@
 .getAveragedResultsList <- function() {
   c(
     "x",
-    "x_std"
+    "x_std",
+    "std_RelativeViability",
+    "std_GRvalue"
   )
 }
 
@@ -140,7 +163,7 @@
 }
 
 #' @keywords internal
-.getMetricAverageFilds <- function() {
+.getMetricAverageFields <- function() {
   list(
     mean = c(
       "x_mean", 
@@ -152,7 +175,11 @@
     ),
     geometric_mean = c(
       "xc50", 
-      "ec50"
+      "ec50",
+      "GR50",
+      "GEC50",
+      "IC50",
+      "EC50"
     )
   )
 }

diff --git a/R/manage_additional_metadata.R b/R/manage_additional_metadata.R
@@ -0,0 +1,199 @@
+#' add arbitrary S3 class to an object
+#'
+#' Modify and object's \code{class} attribute.
+#'
+#' This is a simple convenience function that an item to the \code{class} attribute of an object
+#' so that it can be dispatched to a proper S3 method. This is purely for code clarity,
+#' so that individual methods do not clutter the definitions of higher order functions.
+#'
+#' @param x an object
+#' @param newClass character string; class to be added
+#'
+#' @return The same object with an added S3 class.
+#' 
+#' @examples 
+#' addClass(data.table::data.table(), "someClass")
+#'
+#' @export
+#'
+addClass <- function(x, newClass) {
+  checkmate::assert_string(newClass)
+  if (!is(x, newClass)) {
+    class(x) <- c(newClass, class(x))
+  }
+  return(x)
+}
+
+#' modify assay with additional data
+#'
+#' Reduce dimensionality of an assay by dropping extra data or combining variables.
+#'
+#' If an essay extracted from a \code{SummarizedExperiment} contains additional information,
+#' i.e. factors beyond \code{DrugName} and \code{CellLineName}, that information will be treated
+#' in one of three ways, depending on the value of \code{option}:
+#'
+#' \itemize{
+#'   \item{\code{drop}: Some information will be discarded and only one value
+#'                      of the additional variable (chosen by the user) will be kept.
+#'   }
+#'   \item{\code{toDrug}: The information is pasted together with the primary drug name.
+#'                        All observations are kept.
+#'   }
+#'   \item{\code{toCellLine}: As above, but pasting is done with cell line name.
+#'   }
+
+#' }
+#'
+#' Depending on the type of additional information, the exact details will differ.
+#' This is handled in the app by adding special classes to the data tables and dispatching to S3 methods.
+#'
+#' Following modification, the additional columns are discarded.
+#'
+#' @param x a \code{data.table} containing an assay
+#' @param ... additional arguments passed to methods
+#' @param option character string specifying the action to be taken, see \code{Details}
+#' @param keep character string specifying the value of the active variable that will be kept
+#'
+#' @examples
+#' dt <- data.table::data.table(a = as.character(1:10), b = "data")
+#' dt <- addClass(dt, "a")
+#' modifyData(dt, "average", keep = "b")
+#'
+#' @export
+#'
+modifyData <- function(x, ...) {
+  UseMethod("modifyData")
+}
+
+#' @export
+#' @describeIn modifyData includes the name and concentration of the second drug
+modifyData.drug_name2 <- function(x, option, keep, ...) {
+  checkmate::assert_data_table(x)
+  checkmate::assert_string(option)
+  checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
+  checkmate::assert_string(keep, null.ok = TRUE)
+
+  pidfs <- get_prettified_identifiers(simplify = TRUE)
+  drug_name <- pidfs[["drug_name"]]
+  drug_name2 <- pidfs[["drug_name2"]]
+  conc2 <- pidfs[["concentration2"]]
+  drug2 <- pidfs[["drug2"]]
+  cell_name <- pidfs[["cellline_name"]]
+
+  if (option == "average") {
+    # drop data and keep only the requested value
+    x <- average_biological_replicates_dt(x, drug_name2)
+  } else {
+    # ensure concentration of co-drug is a numeric value
+    if (is.factor(x[[conc2]])) {
+      x[[conc2]] <- as.character(x[[conc2]])
+    }
+    if (is.character(x[[conc2]])) {
+      x[[conc2]] <- as.numeric(x[[conc2]])
+    }
+    if (option == "toDrug") {
+      x[[drug_name]] <-
+        sprintf("%s (%s = %s at %.4f &mu;M)", x[[drug_name]], drug_name2, x[[drug_name2]], x[[conc2]])
+      x[[drug_name]] <-
+        sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[drug_name]])
+    } else if (option == "toCellLine") {
+      x[[cell_name]] <-
+        sprintf("%s (%s = %s at %.4f &mu;M)", x[[cell_name]], drug_name2, x[[drug_name2]], x[[conc2]])
+      x[[cell_name]] <-
+        sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[cell_name]])
+    }
+  }
+
+  # drop the additional columns
+  x[c(drug_name2, conc2, drug2)] <- NULL
+  # remove special class
+  class(x) <- setdiff(class(x), "drug_name2")
+  return(x)
+}
+
+#' @export
+#' @describeIn modifyData includes the data source
+modifyData.data_source <- function(x, option, keep, ...) {
+  checkmate::assert_data_table(x)
+  checkmate::assert_string(option)
+  checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
+  checkmate::assert_string(keep, null.ok = TRUE)
+
+  pidfs <- get_prettified_identifiers(simplify = TRUE)
+  dt_src <- pidfs[["data_source"]]
+  drug <- pidfs[["drug_name"]]
+  clid <- pidfs[["cellline"]]
+  cl_name <- pidfs[["cellline_name"]]
+
+  if (option == "average") {
+    # drop data and keep only the requested value
+    x <- average_biological_replicates_dt(x, dt_src)
+  } else {
+    duplicated_rows <- get_duplicated_rows(x, c(drug, clid))
+    if (option == "toDrug") {
+      drugs_to_combine <- unique(x[duplicated_rows, drug])
+      drug_idx <- which(x[[drug]] %in% drugs_to_combine)
+      drug_to_replace <- x[drug_idx, drug]
+      x[drug_idx, drug] <-
+        vapply(
+          seq_len(length(drug_to_replace)),
+          function(y) sprintf("%s (%s)", drug_to_replace[y], x[, dt_src][y]), "string")
+    } else if (option == "toCellLine") {
+      cell_lines_to_combine <- unique(x[duplicated_rows, cl_name])
+      cell_line_idx <- which(x[[cl_name]] %in% cell_lines_to_combine)
+      x[cell_line_idx, cl_name] <-
+        sprintf("%s (%s)", x[cell_line_idx, cl_name], x[cell_line_idx, dt_src])
+    }
+  }
+  # drop the additional columns
+  x[, c(dt_src) := NULL]
+  # remove special class
+  class(x) <- setdiff(class(x), dt_src)
+  return(x)
+}
+
+
+#' @export
+#' @describeIn modifyData includes the name of other additional variables
+modifyData.default <- function(x, option, keep, ...) {
+  checkmate::assert_data_table(x)
+  checkmate::assert_string(option)
+  checkmate::assert_choice(option, c("average", "toDrug", "toCellLine"))
+  checkmate::assert_string(keep, null.ok = TRUE)
+  pidfs <- get_prettified_identifiers(simplify = TRUE)
+  additional_var_names <- class(x)[[1]]
+  additional_var <- ifelse(additional_var_names %in% names(pidfs),
+                           pidfs[[additional_var_names]],
+                           additional_var_names)
+  cell_name <- pidfs[["cellline_name"]]
+  drug_name <- pidfs[["drug_name"]]
+
+  if (option == "average") {
+    # drop data and keep only the requested value
+    x <- average_biological_replicates_dt(x, additional_var)
+  } else {
+    if (option == "toDrug") {
+      x <- modify_label(x, drug_name, additional_var)
+    } else if (option == "toCellLine") {
+      x <- modify_label(x, cell_name, additional_var)
+    }
+  }
+  # drop the additional columns
+  if ("data.table" %in% class(x)) {
+    x[[additional_var]] <- NULL
+  } else {
+    x[additional_var] <- NULL
+  }
+  # remove special class
+  class(x) <- setdiff(class(x), additional_var_names)
+  return(x)
+}
+
+#' @keywords internal
+modify_label <- function(x, option, var_name) {
+  x[[option]] <-
+    sprintf("%s (%s = %s)", x[[option]], var_name, as.character(x[[var_name]]))
+  x[[option]] <-
+    sub(" \\(.*? at 0\\.?0* &mu;M\\)", "", x[[option]])
+  x
+}
diff --git a/R/standardize_MAE.R b/R/standardize_MAE.R
@@ -185,6 +185,7 @@ rename_bumpy <- function(bumpy, mapping_vector) {
 #' @return a charvec containing the names of the optional identifiers in the SE colData
 #'
 get_optional_coldata_fields <- function(se) {
+  checkmate::assert_class(se, "SummarizedExperiment")
   idfs <- get_SE_identifiers(se)
 
   as.character(idfs["cellline_tissue"])
@@ -197,14 +198,16 @@ get_optional_coldata_fields <- function(se) {
 #' @return a charvec containing the names of the optional identifiers in the SE rowData
 #'
 get_optional_rowdata_fields <- function(se) {
+  checkmate::assert_class(se, "SummarizedExperiment")
   idfs <- get_SE_identifiers(se)
+  rowdata <- SummarizedExperiment::rowData(se)
 
   out <- c(idfs["drug_moa"])
 
-  if (!is.null(idfs["drug2"])) {
+  if (!is.null(rowdata[[idfs[["drug2"]]]])) {
     out <- c(out, idfs["drug_moa2"])
   }
-  if (!is.null(idfs["drug3"])) {
+  if (!is.null(rowdata[[idfs[["drug3"]]]])) {
     out <- c(out, idfs["drug_moa3"])
   }