-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from bhklab/unichem
Unichem
- Loading branch information
Showing
9 changed files
with
475 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: AnnotationGx | ||
Title: AnnotationGx: A package for building, updating and querying an | ||
annotation database for pharmaco-genomic data | ||
Version: 0.0.0.9077 | ||
Version: 0.0.0.9080 | ||
Authors@R: c( | ||
person("Jermiah", "Joseph", role = c("aut", "cre"), | ||
email = "[email protected]"), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
|
||
# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices | ||
|
||
|
||
#' Get the list of sources in UniChem. | ||
#' | ||
#' Returns a `data.table` with the following columns: | ||
#' - `CompoundCount` (integer): Total of compounds provided by that source | ||
#' - `BaseURL` (string): Source Base URL for compounds | ||
#' - `Description` (string): Source database description | ||
#' - `LastUpdated` (string): Date in which the source database was last updated | ||
#' - `Name` (string): Short name of the source database | ||
#' - `NameLabel` (string): Machine readable label name of the source database | ||
#' - `NameLong` (string): Full name of the source database | ||
#' - `SourceID` (integer): Unique ID for the source database | ||
#' - `Details` (string): Notes about the source | ||
#' - `ReleaseDate` (string): Date in which the source database was released | ||
#' - `ReleaseNumber` (integer): Release number of the source database data stored in UniChEM | ||
#' - `URL` (string): Main URL for the source | ||
#' - `UpdateComments` (string): Notes about the update process of that source to UniChEM | ||
#' | ||
#' | ||
#' @return A data.table with the list of sources in UniChem. | ||
#' | ||
#' @export | ||
getUnichemSources <- function() { | ||
funContext <- .funContext("AnnotationGx::getUnichemSources") | ||
|
||
response <- .build_unichem_query("sources") |> | ||
.build_request() |> | ||
.perform_request() |> | ||
.parse_resp_json() | ||
|
||
if(response$response != "Success"){ | ||
.err(funContext, "Unichem API request failed.") | ||
} | ||
|
||
.debug(funContext, sprintf("Unichem sourceCount: %s", response$totalSources)) | ||
|
||
sources_dt <- .asDT(response$sources) | ||
|
||
old_names <- c( | ||
"UCICount", "baseIdUrl", "description", "lastUpdated", "name", | ||
"nameLabel", "nameLong", "sourceID", "srcDetails", "srcReleaseDate", | ||
"srcReleaseNumber", "srcUrl", "updateComments") | ||
|
||
new_names <- c( | ||
"CompoundCount", "BaseURL", "Description", "LastUpdated", "Name", | ||
"NameLabel", "NameLong", "SourceID", "Details", "ReleaseDate", | ||
"ReleaseNumber", "URL", "UpdateComments") | ||
|
||
setnames(sources_dt, old_names, new_names) | ||
|
||
new_order <- c( | ||
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount", | ||
"BaseURL", "URL", "Details", | ||
"Description", "ReleaseNumber", "ReleaseDate", "LastUpdated", | ||
"UpdateComments" | ||
) | ||
|
||
sources_dt[, ..new_order] | ||
|
||
} | ||
|
||
#' Query UniChem for a compound. | ||
#' | ||
#' This function queries the UniChem API for a compound based on the provided parameters. | ||
#' | ||
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID". | ||
#' @param compound `character` or `integer` The compound identifier to search for. | ||
#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL. | ||
#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE. | ||
#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE. | ||
#' @param ... Additional arguments. | ||
#' | ||
#' @return A list with the external mappings and the UniChem mappings. | ||
#' | ||
#' @examples | ||
#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22) | ||
#' | ||
#' @export | ||
queryUnichem <- function( | ||
type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ... | ||
){ | ||
checkmate::assert_string(type) | ||
checkmate::assert_atomic(compound) | ||
checkmate::assert_integerish(sourceID) | ||
checkmate::assertLogical(request_only) | ||
checkmate::assertLogical(raw) | ||
|
||
request <- .build_unichem_compound_req(type, compound, sourceID,...) | ||
if(request_only) return(request) | ||
|
||
response <- request |> | ||
.perform_request() |> | ||
.parse_resp_json() | ||
|
||
if(raw) return(response) | ||
|
||
if(response$response != "Success"){ | ||
.err("Unichem API request failed.") | ||
} | ||
|
||
# Mapping names to be consistent with other API calls | ||
mapped_sources_dt <- .asDT(response$compounds$sources) | ||
old_names <- c("compoundId", "shortName", "longName", "id", "url") | ||
new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL") | ||
setnames(mapped_sources_dt, old = old_names, new = new_names) | ||
|
||
External_Mappings <- mapped_sources_dt[, ..new_names] | ||
|
||
UniChem_Mappings <- list( | ||
UniChem.UCI = response$compounds$uci, | ||
UniChem.InchiKey = response$compounds$standardInchiKey, | ||
UniChem.Inchi = response$compounds$inchi$inchi, | ||
UniChem.formula = response$compounds$inchi$formula, | ||
UniChem.connections = response$compounds$inchi$connections, | ||
UniChem.hAtoms = response$compounds$inchi$hAtoms | ||
) | ||
|
||
list( | ||
External_Mappings = External_Mappings, | ||
UniChem_Mappings = UniChem_Mappings | ||
) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#' Build a UniChem query URL | ||
#' | ||
#' This function builds a UniChem query URL based on the specified endpoint. | ||
#' | ||
#' @param endpoint The UniChem endpoint to query (valid options: "compounds", "connectivity", "images", "sources") | ||
#' @param query_only Logical indicating whether to return only the query URL without building it (default: FALSE) | ||
#' | ||
#' @return `httr2::httr2_url` object if `query_only` is TRUE, otherwise the built URL. | ||
#' | ||
#' @examples | ||
#' .build_unichem_query("sources") | ||
#' .build_unichem_query("connectivity", query_only = TRUE) | ||
#' | ||
#' @noRd | ||
#' @keywords internal | ||
.build_unichem_query <- function( | ||
endpoint, query_only = FALSE | ||
) { | ||
funContext <- .funContext("AnnotationGx:::.build_unichem_query") | ||
|
||
valid_endpoints <- c("compounds", "connectivity", "images", "sources") | ||
checkmate::assert_subset(endpoint, valid_endpoints) | ||
|
||
unichem_api <- "https://www.ebi.ac.uk/unichem/api/v1" | ||
url <- httr2::url_parse(unichem_api) | ||
url$path <- .buildURL(url$path, endpoint) | ||
|
||
.debug(funContext, "URL: ", capture.output(show(url))) | ||
|
||
if (query_only) return(url) | ||
|
||
return(httr2::url_build(url)) | ||
} | ||
|
||
|
||
#' Build a UniChem compound request | ||
#' | ||
#' This function builds a UniChem compound request based on the provided parameters. | ||
#' | ||
#' @param type The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID". | ||
#' @param compound The compound identifier to search for. | ||
#' @param sourceID The source ID to search for if the type is "sourceID". Defaults to NULL. | ||
#' @param ... Additional arguments. | ||
#' | ||
#' @return A `httr2_request` request object for the UniChem compound query. | ||
#' | ||
#' @examples | ||
#' .build_unichem_compound_req(type = "uci", compound = "538323") | ||
#' .build_unichem_compound_req(type = "sourceID", sourceID = 22, compound = "2244") | ||
#' | ||
#' @noRd | ||
#' @keywords internal | ||
.build_unichem_compound_req <- function( | ||
type, compound, sourceID = NULL, ... | ||
){ | ||
funContext <- .funContext("AnnotationGx:::.build_unichem_compound_req") | ||
|
||
valid_types <- c("uci", "inchi", "inchikey", "sourceID") | ||
checkmate::assert_subset(type, valid_types) | ||
|
||
base_url <- .build_unichem_query("compounds") | ||
|
||
.debug(funContext, "Base URL: ", capture.output(show(base_url))) | ||
|
||
body <- list( | ||
type = type, | ||
compound = compound | ||
) | ||
|
||
body$sourceID <- if (type == "sourceID") { | ||
checkmate::assert_integerish( | ||
x = sourceID, | ||
lower = 1, | ||
upper = max(getUnichemSources()$SourceID), | ||
len = 1 | ||
) | ||
sourceID | ||
} else NULL | ||
|
||
|
||
request <- base_url |> | ||
.build_request() |> | ||
httr2::req_body_json(body) | ||
|
||
.debug(funContext, "Request: ", capture.output(show(request))) | ||
return(request) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
library(testthat) | ||
library(AnnotationGx) | ||
library(checkmate) | ||
|
||
test_that("getUnichemSources returns a data.table with the correct columns", { | ||
sources <- getUnichemSources() | ||
|
||
expected_columns <- c( | ||
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount", | ||
"BaseURL", "URL", "Details", "Description", "ReleaseNumber", | ||
"ReleaseDate", "LastUpdated", "UpdateComments" | ||
) | ||
|
||
expect_data_table( | ||
sources, | ||
all.missing = FALSE, | ||
min.rows = 40, # As of March 2024 | ||
min.cols = 13, # As of March 2024 | ||
col.names = 'named', | ||
info = "The data.table should have the correct columns. | ||
The min number of rows and columns may change over time and is set on | ||
from UniChem as of March 2024.", | ||
) | ||
}) | ||
|
||
|
||
test_that("queryUnichem returns the expected results", { | ||
# Test case 1 | ||
result1 <- queryUnichem(type = "sourceID", compound = "444795", sourceID = 22) | ||
expect_true(is.list(result1)) | ||
expect_true("External_Mappings" %in% names(result1)) | ||
expect_true("UniChem_Mappings" %in% names(result1)) | ||
|
||
# Test case 2 | ||
expect_error(queryUnichem(type = "inchikey", compound = "InchiKey123")) | ||
|
||
}) | ||
|
||
test_that("queryUnichem returns the expected results 2", { | ||
# Test case 1 | ||
result1 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T) | ||
|
||
expect_true(is.list(result1)) | ||
|
||
|
||
checkmate::expect_names( | ||
names(result1), | ||
subset.of=c("compounds", "notFound", "response", "totalCompounds")) | ||
|
||
checkmate::expect_names( | ||
names(result1$compounds), | ||
subset.of=c("inchi", "sources", "standardInchiKey", "uci") | ||
) | ||
|
||
result2 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F) | ||
|
||
expect_true(is.list(result2)) | ||
|
||
checkmate::expect_names( | ||
names(result2$External_Mappings), | ||
subset.of = c("compoundID", "Name", "NameLong", "sourceID", "sourcURL") | ||
) | ||
|
||
checkmate::expect_names( | ||
names(result2$UniChem_Mappings), | ||
subset.of = c( | ||
"UniChem.UCI", "UniChem.InchiKey", 'UniChem.Inchi', | ||
'UniChem.formula','UniChem.connections','UniChem.hAtoms' | ||
) | ||
) | ||
|
||
|
||
}) |
Oops, something went wrong.