Skip to content

Commit

Permalink
refactor: fix error and add new functions for PubChem API. Start vign…
Browse files Browse the repository at this point in the history
…ette for pubchem.
  • Loading branch information
jjjermiah committed Mar 7, 2024
1 parent 109f3ed commit 0668580
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 32 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: AnnotationGx
Title: AnnotationGx: A package for building, updating and querying an
annotation database for pharmaco-genomic data
Version: 0.0.0.9071
Version: 0.0.0.9073
Authors@R: c(
person("Jermiah", "Joseph", role = c("aut", "cre"),
email = "[email protected]"),
Expand Down Expand Up @@ -33,6 +33,8 @@ Suggests:
knitr,
rmarkdown
Config/testthat/edition: 3
Config/testthat/parallel: true
Config/testthat/start-first: watcher, parallel*
License: GPL (>= 3) + file LICENSE
LazyData: true
VignetteBuilder: knitr
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ export(getChemblResourceFields)
export(getPubchemAnnotationHeadings)
export(getPubchemCompound)
export(mapCell2Accession)
export(mapCompound2CID)
export(standardize_names)
import(BiocParallel)
import(data.table)
importFrom(checkmate,assert)
importFrom(checkmate,assert_atomic)
importFrom(checkmate,assert_choice)
importFrom(checkmate,assert_integerish)
importFrom(checkmate,assert_logical)
importFrom(checkmate,test_atomic)
importFrom(checkmate,test_choice)
importFrom(data.table,":=")
4 changes: 2 additions & 2 deletions R/3_httr2_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
#' @param resp The response object from the HTTP request.
#' @return The parsed JSON response.
#' @noRd
.parse_resp_json <- function(resp){
httr2::resp_body_json(resp, simplifyVector = TRUE)
.parse_resp_json <- function(resp, simplifyVector = TRUE){
httr2::resp_body_json(resp, simplifyVector = simplifyVector)
}


Expand Down
6 changes: 3 additions & 3 deletions R/pubchem_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ getPubchemStatus <- function(
}
# Check if the request count or request time is
if(parsed_info$service$status == "Black"){
message("WARNING: The request limit has been exceeded and requests are being blocked.")
.warn("The request limit has been exceeded and requests are being blocked.")
}else if(parsed_info$service$status %in% c("Red", "Yellow")){
message("WARNING: The request limit has been reached or is close to being reached.")
.warn("The request limit has been reached or is close to being reached.")
}else{
message("The request limit is not close to being reached.")
.warn("The request limit is not close to being reached.")
}
return(parsed_info)
}
Expand Down
74 changes: 64 additions & 10 deletions R/pubchem_rest.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,37 @@ getPubchemCompound <- function(
){
funContext <- .funContext("getPubchemCompound")

if(to == 'property'){

to_ <- if(to == 'property'){
checkmate::assert_atomic(properties, all.missing = FALSE)
checkmate::assert_character(properties)
to <- paste0(to, '/', paste0(properties, collapse = ','))
}
}else to

requests <- lapply(ids, function(x) {
.build_pubchem_rest_query(
id = x, domain = 'compound', namespace = from, operation = to, output = output,
id = x, domain = 'compound', namespace = from, operation = to_, output = output,
raw = raw, query_only = query_only, ...)
}
)
if(query_only) return(requests)

resps_raw <- httr2::req_perform_sequential(requests, on_error = "continue")
.debug(funContext, " Number of responses: ", length(resps_raw))

names(resps_raw) <- ids
if(raw) return(resps_raw)
resps <- lapply(resps_raw, function(x){
.parse_resp_json(x) |> .parseQueryToDT()
})

names(resps) <- ids

# Parse the responses
resps <- .parse_pubchem_rest_responses(resps_raw)
failed <- sapply(resps_raw, httr2::resp_is_error, USE.NAMES = T)

if(any(failed)){
.warn(funContext, " Some queries failed. See the 'failed' object for details.")
failures <- lapply(resps_raw[failed], function(resp){
.parse_resp_json(resp)$Fault
})
}else failures <- NULL

if(from != 'name'){
responses <- data.table::rbindlist(resps, fill= TRUE)
Expand All @@ -55,10 +63,55 @@ getPubchemCompound <- function(
}
data.table::setnames(responses, 'V1', to, skip_absent=TRUE)

attributes(responses)$failed <- failures

responses
}


#' Map compound names to PubChem CIDs
#'
#' This function maps compound names to PubChem CIDs using the PubChem REST API.
#'
#' @param names A character vector of compound names.
#' @param raw Logical indicating whether to return the raw response from the API (default is FALSE).
#' @param query_only Logical indicating whether to only perform the query without retrieving the data (default is FALSE).
#' @param output The format of the output, either 'JSON' or 'XML' (default is 'JSON').
#' @param ... Additional arguments to be passed to the getPubchemCompound function.
#'
#' @return A character vector of PubChem CIDs.
#'
#' @examples
#' mapCompound2CID(c("aspirin", "caffeine"))
#'
#' @export
mapCompound2CID <- function(names, raw = FALSE, query_only = FALSE, output = 'JSON', ...){
getPubchemCompound(ids = names, from = 'name', to = 'cids', raw = raw, query_only = query_only, output = output, ...)
}

.parse_pubchem_rest_responses <- function(responses){
checkmate::assert_list(
x = responses,
any.missing = FALSE,
names = 'named',
min.len = 1
)

responses_parsed <- lapply(names(responses), function(i){
resp <- responses[[i]]
body <- .parse_resp_json(resp)
if(httr2::resp_is_error(resp)) return(.parseQueryToDT(NA_integer_))

return(.parseQueryToDT(body))
})
names(responses_parsed) <- names(responses)
return(responses_parsed)

}





#' Build a query for the PubChem REST API
#'
Expand All @@ -76,7 +129,7 @@ getPubchemCompound <- function(
#'
#' @return The query URL or the parsed response, depending on the arguments.
#'
#' @importFrom checkmate assert assert_choice assert_logical assert_atomic test_choice
#' @importFrom checkmate assert assert_choice assert_logical assert_atomic test_choice assert_integerish test_atomic
#'
#' @keywords internal
.build_pubchem_rest_query <- function(
Expand All @@ -102,8 +155,9 @@ getPubchemCompound <- function(
)
assert_choice(output, c('JSON', 'XML', 'SDF', 'TXT', 'CSV'))
assert_logical(raw, query_only)
assert_atomic(id, all.missing = FALSE)
if(!test_atomic(id, any.missing = FALSE)) .err("id must be an atomic vector with no missing/NA values")

if(namespace == 'cid') assert_integerish(id)

# -------------------------------------- Function context --------------------------------------
funContext <- .funContext("query_pubchem_rest")
Expand Down
29 changes: 29 additions & 0 deletions man/mapCompound2CID.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions tests/testthat/test_pubchem_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,20 @@ resp_ <- .buildURL(url) |> .build_pubchem_request() |> httr2::req_perform()
test_that("checkThrottlingStatus Works", {
response <- resp_

response$headers$`x-throttling-control` <-
"Request Count status: Yellow (60%), Request Time status: Yellow (60%), Service status: Yellow (60%)"
response$headers["X-Throttling-Control"] <- "Request Count status: Yellow (60%), Request Time status: Yellow (60%), Service status: Yellow (60%)"
parsed_info <- AnnotationGx:::.checkThrottlingStatus2(response, printMessage = FALSE)
expect_equal(parsed_info, list(request_count = list(status = "Yellow", percent = 60),
request_time = list(status = "Yellow", percent = 60),
service = list(status = "Yellow", percent = 60)))

response$headers$`x-throttling-control` <-
response$headers["X-Throttling-Control"] <-
"Request Count status: Red (80%), Request Time status: Red (80%), Service status: Red (80%)"
parsed_info <- AnnotationGx:::.checkThrottlingStatus2(response, printMessage = FALSE)
expect_equal(parsed_info, list(request_count = list(status = "Red", percent = 80),
request_time = list(status = "Red", percent = 80),
service = list(status = "Red", percent = 80)))

response$headers$`x-throttling-control` <-
response$headers["X-Throttling-Control"] <-
"Request Count status: Black (100%), Request Time status: Red (80%), Service status: Red (80%)"
parsed_info <- AnnotationGx:::.checkThrottlingStatus2(response, printMessage = FALSE)
expect_equal(parsed_info, list(request_count = list(status = "Black", percent = 100),
Expand Down
66 changes: 54 additions & 12 deletions tests/testthat/test_pubchem_rest.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@ library(checkmate)

compounds <- c('temozolomide', 'erlotinib', 'TRETINOIN', 'TRAMETINIB', 'epigallocatechin-3-monogallate')


# Comprehensive Tests:
test_that("AnnotationGx::getPubchemCompound 5 Correct Drugs", {
# Test for all possible combinations of domain, namespace, operation, and output
domains <- c('compound')

expected_cids <- c(5394, 176870, 444795, 11707110, 65064)

result <- getPubchemCompound(ids = compounds, from = 'name', to = 'cids')
Expand All @@ -24,22 +22,66 @@ test_that("AnnotationGx::getPubchemCompound 5 Correct Drugs", {

test_that("AnnotationGx::getPubchemCompound 1 Incorrect Drug", {
# Test for an incorrect drug, scoped so it doesnt affect the other tests
compounds <- c('BAD_DRUG_NAME', compounds)
getPubchemCompound(ids = compounds, from = 'name', to = 'cids')
compounds <- c('BAD_DRUG_NAME')
result <- getPubchemCompound(ids = compounds, from = 'name', to = 'cids')
expect_data_table(
x = result,
types = c('character', 'integer'),
ncols = 2,
nrows = length(compounds),
col.names = 'named'
)

failed_queries <- attributes(result)$failed

expect_list(
failed_queries,
len = 1,
any.missing = FALSE,
names = 'named'
)

expect_equal(names(failed_queries), c('BAD_DRUG_NAME'))
})

result <- getPubchemCompound('BAD', from='name', to = 'cids', raw = T)[[1]] |>
AnnotationGx:::.parse_resp_json() |>
AnnotationGx:::.parseQueryToDT()
test_that("AnnotationGx::getPubchemCompound bad input", {

data(ctrp_treatmentIDs)
expect_error(getPubchemCompound(ctrp_treatmentIDs))

})

test_that("AnnotationGx::getPubchemCompound 2 Incorrect Drugs in a list", {
# Test for an incorrect drug, scoped so it doesnt affect the other tests
compounds <- c('BAD_DRUG_NAME', compounds, 'Another bad drug')
result <- getPubchemCompound(ids = compounds, from = 'name', to = 'cids')
expect_data_table(
x = result,
types = c('character', 'integer'),
any.missing = FALSE,
ncols = 1,
nrows = 1,
ncols = 2,
nrows = length(compounds),
col.names = 'named'
)


failed_queries <- attributes(result)$failed

expect_list(
failed_queries,
len = 2,
any.missing = FALSE,
names = 'named'
)

expect_equal(names(failed_queries), c('BAD_DRUG_NAME', 'Another bad drug'))
})

test_that("AnnotationGx::getPubchemCompound errors if cid and not integer", {
expect_error(
AnnotationGx::getPubchemCompound(
ids= c(5394, 'PUGREST.BadRequest'),
from = 'cid', to = 'property',
properties = c('Title', 'MolecularFormula', 'InChIKey', 'CanonicalSMILES')
))
})


Expand Down
Binary file added tests/testthat/testthat-problems.rds
Binary file not shown.
33 changes: 33 additions & 0 deletions vignettes/PubChemAPI.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,44 @@ UniProt, ChEBI, and ChEMBL, given a specific identifier.

```{r setup}
library(AnnotationGx)
```


#

### Mapping from chemical name to PubChem CID
The main function that is provided by the package is `mapCompound2CID`.

``` {r map aspirin to cid}
mapCompound2CID("aspirin")
```

In the case of a compound that can't be mapped, `NA` will be returned and a warning will be issued.

``` {r map non existent compound to cid}
(result <- mapCompound2CID(c("non existent compound", "another bad compound")))
failed <- attributes(result)$failed
# get the list of failed inputs
names(failed)
# get the error message for the failed input
failed[1]
```

### Mapping from PubChem CID to Properties
TODO::


### Mapping from PubChem CID to Annotations
TODO::


# References
1. PUG REST. PubChem Docs [website]. Retrieved from https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest.
Expand Down

0 comments on commit 0668580

Please sign in to comment.