Skip to content

Commit

Permalink
Merge pull request #26 from bhklab/unichem
Browse files Browse the repository at this point in the history
Unichem
  • Loading branch information
jjjermiah authored Mar 11, 2024
2 parents a1db08c + efa3208 commit a15893f
Show file tree
Hide file tree
Showing 9 changed files with 475 additions and 1 deletion.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: AnnotationGx
Title: AnnotationGx: A package for building, updating and querying an
annotation database for pharmaco-genomic data
Version: 0.0.0.9077
Version: 0.0.0.9080
Authors@R: c(
person("Jermiah", "Joseph", role = c("aut", "cre"),
email = "[email protected]"),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ export(getPubchemAnnotationHeadings)
export(getPubchemCompound)
export(getPubchemProperties)
export(getPubchemStatus)
export(getUnichemSources)
export(mapCID2Properties)
export(mapCell2Accession)
export(mapCompound2CID)
export(queryUnichem)
export(standardize_names)
import(BiocParallel)
import(data.table)
Expand Down
126 changes: 126 additions & 0 deletions R/unichem.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@

# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices


#' Get the list of sources in UniChem.
#'
#' Returns a `data.table` with the following columns:
#' - `CompoundCount` (integer): Total of compounds provided by that source
#' - `BaseURL` (string): Source Base URL for compounds
#' - `Description` (string): Source database description
#' - `LastUpdated` (string): Date in which the source database was last updated
#' - `Name` (string): Short name of the source database
#' - `NameLabel` (string): Machine readable label name of the source database
#' - `NameLong` (string): Full name of the source database
#' - `SourceID` (integer): Unique ID for the source database
#' - `Details` (string): Notes about the source
#' - `ReleaseDate` (string): Date in which the source database was released
#' - `ReleaseNumber` (integer): Release number of the source database data stored in UniChEM
#' - `URL` (string): Main URL for the source
#' - `UpdateComments` (string): Notes about the update process of that source to UniChEM
#'
#'
#' @return A data.table with the list of sources in UniChem.
#'
#' @export
getUnichemSources <- function() {
funContext <- .funContext("AnnotationGx::getUnichemSources")

response <- .build_unichem_query("sources") |>
.build_request() |>
.perform_request() |>
.parse_resp_json()

if(response$response != "Success"){
.err(funContext, "Unichem API request failed.")

Check warning on line 35 in R/unichem.R

View check run for this annotation

Codecov / codecov/patch

R/unichem.R#L35

Added line #L35 was not covered by tests
}

.debug(funContext, sprintf("Unichem sourceCount: %s", response$totalSources))

sources_dt <- .asDT(response$sources)

old_names <- c(
"UCICount", "baseIdUrl", "description", "lastUpdated", "name",
"nameLabel", "nameLong", "sourceID", "srcDetails", "srcReleaseDate",
"srcReleaseNumber", "srcUrl", "updateComments")

new_names <- c(
"CompoundCount", "BaseURL", "Description", "LastUpdated", "Name",
"NameLabel", "NameLong", "SourceID", "Details", "ReleaseDate",
"ReleaseNumber", "URL", "UpdateComments")

setnames(sources_dt, old_names, new_names)

new_order <- c(
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount",
"BaseURL", "URL", "Details",
"Description", "ReleaseNumber", "ReleaseDate", "LastUpdated",
"UpdateComments"
)

sources_dt[, ..new_order]

}

#' Query UniChem for a compound.
#'
#' This function queries the UniChem API for a compound based on the provided parameters.
#'
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound `character` or `integer` The compound identifier to search for.
#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
#' @param ... Additional arguments.
#'
#' @return A list with the external mappings and the UniChem mappings.
#'
#' @examples
#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
#'
#' @export
queryUnichem <- function(
type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
){
checkmate::assert_string(type)
checkmate::assert_atomic(compound)
checkmate::assert_integerish(sourceID)
checkmate::assertLogical(request_only)
checkmate::assertLogical(raw)

request <- .build_unichem_compound_req(type, compound, sourceID,...)
if(request_only) return(request)

Check warning on line 92 in R/unichem.R

View check run for this annotation

Codecov / codecov/patch

R/unichem.R#L92

Added line #L92 was not covered by tests

response <- request |>
.perform_request() |>
.parse_resp_json()

if(raw) return(response)

if(response$response != "Success"){
.err("Unichem API request failed.")
}

# Mapping names to be consistent with other API calls
mapped_sources_dt <- .asDT(response$compounds$sources)
old_names <- c("compoundId", "shortName", "longName", "id", "url")
new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
setnames(mapped_sources_dt, old = old_names, new = new_names)

External_Mappings <- mapped_sources_dt[, ..new_names]

UniChem_Mappings <- list(
UniChem.UCI = response$compounds$uci,
UniChem.InchiKey = response$compounds$standardInchiKey,
UniChem.Inchi = response$compounds$inchi$inchi,
UniChem.formula = response$compounds$inchi$formula,
UniChem.connections = response$compounds$inchi$connections,
UniChem.hAtoms = response$compounds$inchi$hAtoms
)

list(
External_Mappings = External_Mappings,
UniChem_Mappings = UniChem_Mappings
)

}
87 changes: 87 additions & 0 deletions R/unichem_helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#' Build a UniChem query URL
#'
#' This function builds a UniChem query URL based on the specified endpoint.
#'
#' @param endpoint The UniChem endpoint to query (valid options: "compounds", "connectivity", "images", "sources")
#' @param query_only Logical indicating whether to return only the query URL without building it (default: FALSE)
#'
#' @return `httr2::httr2_url` object if `query_only` is TRUE, otherwise the built URL.
#'
#' @examples
#' .build_unichem_query("sources")
#' .build_unichem_query("connectivity", query_only = TRUE)
#'
#' @noRd
#' @keywords internal
.build_unichem_query <- function(
endpoint, query_only = FALSE
) {
funContext <- .funContext("AnnotationGx:::.build_unichem_query")

valid_endpoints <- c("compounds", "connectivity", "images", "sources")
checkmate::assert_subset(endpoint, valid_endpoints)

unichem_api <- "https://www.ebi.ac.uk/unichem/api/v1"
url <- httr2::url_parse(unichem_api)
url$path <- .buildURL(url$path, endpoint)

.debug(funContext, "URL: ", capture.output(show(url)))

if (query_only) return(url)

return(httr2::url_build(url))
}


#' Build a UniChem compound request
#'
#' This function builds a UniChem compound request based on the provided parameters.
#'
#' @param type The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound The compound identifier to search for.
#' @param sourceID The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param ... Additional arguments.
#'
#' @return A `httr2_request` request object for the UniChem compound query.
#'
#' @examples
#' .build_unichem_compound_req(type = "uci", compound = "538323")
#' .build_unichem_compound_req(type = "sourceID", sourceID = 22, compound = "2244")
#'
#' @noRd
#' @keywords internal
.build_unichem_compound_req <- function(
type, compound, sourceID = NULL, ...
){
funContext <- .funContext("AnnotationGx:::.build_unichem_compound_req")

valid_types <- c("uci", "inchi", "inchikey", "sourceID")
checkmate::assert_subset(type, valid_types)

base_url <- .build_unichem_query("compounds")

.debug(funContext, "Base URL: ", capture.output(show(base_url)))

body <- list(
type = type,
compound = compound
)

body$sourceID <- if (type == "sourceID") {
checkmate::assert_integerish(
x = sourceID,
lower = 1,
upper = max(getUnichemSources()$SourceID),
len = 1
)
sourceID
} else NULL


request <- base_url |>
.build_request() |>
httr2::req_body_json(body)

.debug(funContext, "Request: ", capture.output(show(request)))
return(request)
}
29 changes: 29 additions & 0 deletions man/getUnichemSources.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/queryUnichem.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 73 additions & 0 deletions tests/testthat/test_unichem.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
library(testthat)
library(AnnotationGx)
library(checkmate)

test_that("getUnichemSources returns a data.table with the correct columns", {
sources <- getUnichemSources()

expected_columns <- c(
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount",
"BaseURL", "URL", "Details", "Description", "ReleaseNumber",
"ReleaseDate", "LastUpdated", "UpdateComments"
)

expect_data_table(
sources,
all.missing = FALSE,
min.rows = 40, # As of March 2024
min.cols = 13, # As of March 2024
col.names = 'named',
info = "The data.table should have the correct columns.
The min number of rows and columns may change over time and is set on
from UniChem as of March 2024.",
)
})


test_that("queryUnichem returns the expected results", {
# Test case 1
result1 <- queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
expect_true(is.list(result1))
expect_true("External_Mappings" %in% names(result1))
expect_true("UniChem_Mappings" %in% names(result1))

# Test case 2
expect_error(queryUnichem(type = "inchikey", compound = "InchiKey123"))

})

test_that("queryUnichem returns the expected results 2", {
# Test case 1
result1 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T)

expect_true(is.list(result1))


checkmate::expect_names(
names(result1),
subset.of=c("compounds", "notFound", "response", "totalCompounds"))

checkmate::expect_names(
names(result1$compounds),
subset.of=c("inchi", "sources", "standardInchiKey", "uci")
)

result2 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F)

expect_true(is.list(result2))

checkmate::expect_names(
names(result2$External_Mappings),
subset.of = c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
)

checkmate::expect_names(
names(result2$UniChem_Mappings),
subset.of = c(
"UniChem.UCI", "UniChem.InchiKey", 'UniChem.Inchi',
'UniChem.formula','UniChem.connections','UniChem.hAtoms'
)
)


})
Loading

0 comments on commit a15893f

Please sign in to comment.