Skip to content

Commit

Permalink
Merge pull request #47 from bhklab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jjjermiah authored Mar 27, 2024
2 parents e645297 + f09eac0 commit 1014ee1
Show file tree
Hide file tree
Showing 43 changed files with 1,075 additions and 423 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ docs
/Meta/
.lintr
covr
TRASH
TRASH
Treatment-Annotation*.Rmd

./*.csv
CCLE_treatmentMetadata.csv
8 changes: 3 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: AnnotationGx
Title: AnnotationGx: A package for building, updating and querying an
annotation database for pharmaco-genomic data
Version: 0.0.0.9090
Version: 0.0.0.9095
Authors@R: c(
person("Jermiah", "Joseph", role = c("aut", "cre"),
email = "[email protected]"),
Expand All @@ -22,17 +22,15 @@ Imports:
crayon,
httr2,
data.table,
options,
BiocParallel,
readr,
xml2
xml2,
memoise
Suggests:
testthat (>= 3.0.0),
covr,
readxl,
knitr,
rmarkdown,
BiocStyle,
RefManageR,
sessioninfo
Config/testthat/edition: 3
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ COPY . /app
WORKDIR /app

# RUN R -e 'install.packages(c("BiocManager", "devtools", "jsonlite", "qpdf"), repos=c("https://cloud.r-project.org/", "https://cran.rstudio.com/"))'
# RUN R -e 'BiocManager::install("BiocParallel")'
RUN R -e 'install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))'
RUN R -e 'pak::pkg_install(".", dependencies=TRUE, upgrade=TRUE, ask = FALSE)'
RUN R -e 'pak::cache_clean(); pak::meta_clean(force = TRUE)'

# RUN install2.r --error --deps TRUE \
# qpdf \
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ export(standardize_names)
export(strSplit)
export(unlistNested)
exportMethods(matchNested)
import(BiocParallel)
importFrom(checkmate,assert)
importFrom(checkmate,assert_atomic)
importFrom(checkmate,assert_choice)
Expand Down
11 changes: 5 additions & 6 deletions R/data.R → R/AnnotationGx-data.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#' gdsc_sampleMetadata is some preprocessed sample metadata from the GDSC dataset
#'
#' A preprocessed version of the sample metadata from the GDSC dataset. This dataset
#' contains the following columns: GDSC.Sample_Name, GDSC.BROAD_ID, GDSC.RRID, GDSC.COSMIC_ID, and CCLE.sampleid.
#' This dataset is used in the AnnotationGx package to map cell line names from various sources to the
#' Cellosaurus database.
#' A preprocessed version of the sample metadata from the GDSC dataset.
#' This dataset is provided in the package to test the functionality of the package.
#' The original dataset can be downloaded from the CancerRxGene website.
#'
#' @format A data table with 5 columns and 1001 rows.
#' \describe{
Expand Down Expand Up @@ -38,10 +37,10 @@
#'
"CTRP_treatmentMetadata"

#' gCSI_sampleMetadata is some preprocessed sample metadata from the NCI60 dataset
#' gCSI_sampleMetadata is some preprocessed sample metadata from the gCSI dataset
#'
"gCSI_sampleMetadata"

#' gCSI_treatmentMetadata is some preprocessed treatment metadata from the NCI60 dataset
#' gCSI_treatmentMetadata is some preprocessed treatment metadata from the gCSI dataset
#'
"gCSI_treatmentMetadata"
15 changes: 9 additions & 6 deletions R/cellosaurus.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ mapCell2Accession <- function(
result$query <- name
return(result)
}
response_dt <- parse_cellosaurus_text(resp, name, parsed, keep_duplicates)
response_dt <- .parse_cellosaurus_text(resp, name, parsed, keep_duplicates)
response_dt
})
})


responses_dt <- data.table::rbindlist(responses_dt, fill = TRUE)
Expand Down Expand Up @@ -168,20 +168,23 @@ mapCell2Accession <- function(
#'
#' @noRd
#' @keywords internal
parse_cellosaurus_text <- function(resp, name, parsed = FALSE, keep_duplicates = FALSE){
.parse_cellosaurus_text <- function(resp, name, parsed = FALSE, keep_duplicates = FALSE){

responses_dt <- lapply(
X = resp,
FUN = .processEntry
)

responses_dt <- data.table::rbindlist(responses_dt, fill = TRUE)
tryCatch({
responses_dt <- data.table::rbindlist(responses_dt, fill = TRUE)
}, error = function(e) {
.err(paste0("Error parsing response for ", name, ": ", e$message))
})

responses_dt <- .formatSynonyms(responses_dt)

if(!parsed) {
responses_dt$query <- name
return(responses_dt)
return(responses_dt[, c("cellLineName", "accession", "query")])
}


Expand Down
22 changes: 0 additions & 22 deletions R/options.R

This file was deleted.

16 changes: 13 additions & 3 deletions R/pubchem_status.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,20 @@ getPubchemStatus <- function(
funContext <- .funContext("getPubchemStatus")

request <- .buildURL(url) |> .build_pubchem_request()
response <- httr2::req_perform(request)

status_code <- httr2::resp_status(response)
message <- response$headers[["X-Throttling-Control"]]
# need to do NULL while loop bc sometimes X-Throttling-Control is not in the response
message <- NULL

while(is.null(message)) {
response <- httr2::req_perform(request)

if (httr2::resp_status(response) == 200) {
message <- response$headers[["X-Throttling-Control"]]
} else {
.warn("Request failed. Retrying...")
Sys.sleep(1)
}
}
parsed_info <- .checkThrottlingStatus2(message, printMessage)
if (returnMessage) {
return(parsed_info)
Expand Down
39 changes: 25 additions & 14 deletions R/pubchem_view.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
getPubchemAnnotationHeadings <- function(
type = "all", heading = NULL) {
funContext <- .funContext("getPubchemAnnotationHeadings")
.debug(funContext, " type: ", type, " heading: ", heading)

.debug(funContext, " type: ", type, " heading: ", heading)
# TODO:: messy...
checkmate::assert(
checkmate::test_choice(
Expand Down Expand Up @@ -56,6 +56,7 @@ getPubchemAnnotationHeadings <- function(
#' @param parse_function A custom parsing function to process the response. Default is the identity function.
#' @param query_only Logical indicating whether to return the query URL only. Default is FALSE.
#' @param raw Logical indicating whether to return the raw response. Default is FALSE.
#' @param nParallel The number of parallel processes to use. Default is 1.
#'
#' @return The annotated information about the PubChem compound.
#'
Expand All @@ -66,29 +67,36 @@ getPubchemAnnotationHeadings <- function(
#' @export
annotatePubchemCompound <- function(
cids, heading = "ChEMBL ID", source = NULL, parse_function = identity,
query_only = FALSE, raw = FALSE) {
query_only = FALSE, raw = FALSE, nParallel = 1
) {
funContext <- .funContext("annotatePubchemCompound")

.info(funContext, sprintf("Building requests for %s CIDs", length(cids)))
requests <- lapply(cids, function(cid) {
.build_pubchem_view_query(
id = cid, record = "compound", heading = heading,
output = "JSON", source = source
)
})
)
}
)

.debug(funContext, paste0("query:", sapply(requests, `[[`, i = "url")))
.debug(funContext, paste0("query: ", sapply(requests, `[[`, i = "url")))
if (query_only) return(requests)

if (query_only) {
return(requests)
}
tryCatch({
resp_raw <- httr2::req_perform_sequential(
reqs = requests,
on_error = "continue",
progress = "Performing API requests..."
)}, error = function(e) {
.err(funContext, "An error occurred while performing requests:\n", e)
})

resp_raw <- httr2::req_perform_sequential(requests, on_error = "continue")
if (raw) {
return(resp_raw)
}
if (raw) return(resp_raw)

responses <- lapply(seq_along(resp_raw), function(i){
resp <- resp_raw[[i]]
if(is.null(resp)) return(NA_character_)
tryCatch(
{
.parse_resp_json(resp)
Expand All @@ -107,7 +115,7 @@ annotatePubchemCompound <- function(
})

# apply the parse function to each response depending on heading
parsed_responses <- .bplapply(responses, function(response) {
parsed_responses <- parallel::mclapply(responses, function(response) {
switch(heading,
"ChEMBL ID" = .parseCHEMBLresponse(response),
"CAS" = .parseCASresponse(response),
Expand All @@ -128,7 +136,10 @@ annotatePubchemCompound <- function(
}
)
)
})
},
mc.cores = nParallel
)


sapply(parsed_responses, .replace_null)

Expand Down
22 changes: 9 additions & 13 deletions R/pubchem_view_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
#'
#' @keywords internal
#' @noRd
.get_all_heading_types <- function() {
.get_all_heading_types_base <- function() {
url <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug/annotations/headings/JSON"
req <- .build_pubchem_request(url)
response <- httr2::req_perform(req) |> .parse_resp_json()
.asDT(response[[1]][[1]])
}

#' @keywords internal
.get_all_heading_types <- memoise::memoise(.get_all_heading_types_base)

#' Build a PubChem REST query URL
#'
Expand Down Expand Up @@ -41,7 +43,10 @@
#' @keywords internal
#' @noRd
.build_pubchem_view_query <- function(
id, annotation = "data", record = "compound", page = NULL, version = NULL, heading = NULL, source = NULL, output = "JSON", ...) {
id, annotation = "data", record = "compound",
page = NULL, version = NULL, heading = NULL, source = NULL,
output = "JSON", ...
) {
funContext <- .funContext(".build_pubchem_view_query")


Expand All @@ -60,16 +65,7 @@
has no substance headings"
)
} else {
check <- checkmate::check_character(
unique(getPubchemAnnotationHeadings(record, heading)$Heading),
min.chars = 1, min.len = 1
)
if (!isTRUE(check)) {
.err(
funContext, "Invalid heading: ", heading,
". Use getPubchemAnnotationHeadings() to get valid headings."
)
}
checkmate::assert(heading %in% .get_all_heading_types()$Heading)
}
opts_ <- c(opts_, list(heading = heading))
}
Expand Down Expand Up @@ -99,7 +95,7 @@

url |>
httr2::url_build() |>
.build_pubchem_request()
.build_request()
}

#' Generic function to parse one of the annotation helpers
Expand Down
31 changes: 0 additions & 31 deletions R/utils-general.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,6 @@
#' @noRd
.asDT <- function(x, ...) data.table::as.data.table(x, ...)



#' Custom wrapper function for parallelizing lapply using BiocParallel.
#'
#' This function provides a convenient way to parallelize the lapply function
#' using the BiocParallel package. It takes a list or vector \code{X} and applies
#' the function \code{FUN} to each element in parallel. The parallelization is
#' controlled by the \code{BPPARAM} argument, which defaults to the SerialParam
#' object from BiocParallel.
#'
#' @param X A list or vector to apply the function to.
#' @param FUN The function to apply to each element of \code{X}.
#' @param ... Additional arguments to pass to \code{FUN}.
#' @param BPPARAM A BiocParallel parameter object controlling the parallelization.
#' @inheritParams BiocParallel::bplapply
#' @return A list containing the results of applying \code{FUN} to each element of \code{X}.
#'
#' @import BiocParallel
#'
#' @examples
#' # Apply a function to a list in parallel
#' x <- list(1, 2, 3, 4, 5)
#' .bplapply(x, function(x) x^2)
#'
#' @keywords internal
#' @noRd
.bplapply <- function(X, FUN, ..., BPPARAM = BiocParallel::SerialParam()) {
BiocParallel::bplapply(X, FUN, ..., BPPARAM = BPPARAM)
}


#' Parses the query response into a data table
#'
#' This function takes a query response and converts it into a data table using the `as.data.table` function from the `data.table` package.
Expand Down
2 changes: 1 addition & 1 deletion R/utils-httr2.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' @keywords internal
.build_request <- function(url) {
httr2::request(url) |>
httr2::req_retry(max_tries = 3) |>
httr2::req_retry(max_tries = 5, backoff = ~ 10) |>
httr2::req_error(is_error = \(resp) FALSE)
}

Expand Down
Loading

0 comments on commit 1014ee1

Please sign in to comment.