Skip to content

Commit

Permalink
Add new function getUnichemSources() with optional parameter all_colu…
Browse files Browse the repository at this point in the history
…mns. Modify queryUnichem() to queryUnichemCompound(). Update function and variable names for consistency. Update documentation and examples.
  • Loading branch information
jjjermiah committed Mar 15, 2024
1 parent ce9086a commit db6facc
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 22 deletions.
24 changes: 16 additions & 8 deletions R/unichem.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

# Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices


#' Get the list of sources in UniChem.
#'
#' @param all_columns `boolean` Whether to return all columns. Defaults to FALSE.
#'

#'
#' Returns a `data.table` with the following columns:
#' - `CompoundCount` (integer): Total of compounds provided by that source
Expand All @@ -23,7 +26,7 @@
#' @return A data.table with the list of sources in UniChem.
#'
#' @export
getUnichemSources <- function() {
getUnichemSources <- function(all_columns = FALSE) {
funContext <- .funContext("AnnotationGx::getUnichemSources")

response <- .build_unichem_query("sources") |>
Expand Down Expand Up @@ -58,16 +61,21 @@ getUnichemSources <- function() {
"UpdateComments"
)

sources_dt[, new_order, with = FALSE]

sources_dt <- sources_dt[, new_order, with = FALSE]

if(all_columns) return(sources_dt)

sources_dt[, c("Name", "SourceID")]

}

#' Query UniChem for a compound.
#'
#' This function queries the UniChem API for a compound based on the provided parameters.
#'
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param compound `character` or `integer` The compound identifier to search for.
#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID".
#' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL.
#' @param request_only `boolean` Whether to return the request only. Defaults to FALSE.
#' @param raw `boolean` Whether to return the raw response. Defaults to FALSE.
Expand All @@ -76,11 +84,11 @@ getUnichemSources <- function() {
#' @return A list with the external mappings and the UniChem mappings.
#'
#' @examples
#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
#' queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22)
#'
#' @export
queryUnichem <- function(
type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
queryUnichemCompound <- function(
compound, type, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ...
){
checkmate::assert_string(type)
checkmate::assert_atomic(compound)
Expand All @@ -105,7 +113,7 @@ queryUnichem <- function(
mapped_sources_dt <- .asDT(response$compounds$sources)
old_names <- c("compoundId", "shortName", "longName", "id", "url")

new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL")
new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourceURL")
setnames(mapped_sources_dt, old = old_names, new = new_names)

External_Mappings <- mapped_sources_dt[, new_names, with = FALSE]
Expand Down
11 changes: 6 additions & 5 deletions R/unichem_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
url <- httr2::url_parse(unichem_api)
url$path <- .buildURL(url$path, endpoint)

.debug(funContext, "URL: ", utils::capture.output(show(url)))
output <- httr2::url_build(url)

if (query_only) return(url)
.debug(funContext, "URL: ", output )

return(httr2::url_build(url))
if (query_only) return(url)
httr2::url_build(url)
}


Expand Down Expand Up @@ -60,7 +61,7 @@

base_url <- .build_unichem_query("compounds")

.debug(funContext, "Base URL: ", utils::capture.output(show(base_url)))
.debug(funContext, "Base URL: ", base_url)

body <- list(
type = type,
Expand All @@ -82,6 +83,6 @@
.build_request() |>
httr2::req_body_json(body)

.debug(funContext, "Request: ", utils::capture.output(show(request)))
.debug(funContext, "Request: ", request)
return(request)
}
10 changes: 10 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,13 @@ url: https://bhklab.github.io/AnnotationGx/
template:
bootstrap: 5

articles:
- title: Articles
navbar: ~
contents:
- Introduction
- Cellosaurus
- ChEMBL
- OncoTree
- PubChemAPI
- Unichem
16 changes: 8 additions & 8 deletions tests/testthat/test_unichem.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ library(AnnotationGx)
library(checkmate)

test_that("getUnichemSources returns a data.table with the correct columns", {
sources <- getUnichemSources()
sources <- getUnichemSources(all_columns = TRUE)

expected_columns <- c(
"Name", "NameLabel", "NameLong", "SourceID", "CompoundCount",
Expand All @@ -24,21 +24,21 @@ test_that("getUnichemSources returns a data.table with the correct columns", {
})


test_that("queryUnichem returns the expected results", {
test_that("queryUnichemCompound returns the expected results", {
# Test case 1
result1 <- queryUnichem(type = "sourceID", compound = "444795", sourceID = 22)
result1 <- queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22)
expect_true(is.list(result1))
expect_true("External_Mappings" %in% names(result1))
expect_true("UniChem_Mappings" %in% names(result1))

# Test case 2
expect_error(queryUnichem(type = "inchikey", compound = "InchiKey123"))
expect_error(queryUnichemCompound(type = "inchikey", compound = "InchiKey123"))

})

test_that("queryUnichem returns the expected results 2", {
test_that("queryUnichemCompound returns the expected results 2", {
# Test case 1
result1 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T)
result1 <- queryUnichemCompound(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T)

expect_true(is.list(result1))

Expand All @@ -52,7 +52,7 @@ test_that("queryUnichem returns the expected results 2", {
subset.of=c("inchi", "sources", "standardInchiKey", "uci")
)

result2 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F)
result2 <- queryUnichemCompound(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F)

expect_true(is.list(result2))

Expand All @@ -70,4 +70,4 @@ test_that("queryUnichem returns the expected results 2", {
)


})
})
2 changes: 1 addition & 1 deletion vignettes/Introduction.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "1. Introduction to AnnotationGx"
title: "Introduction to AnnotationGx"
author:
- name: Jermiah Joseph
affiliation:
Expand Down
113 changes: 113 additions & 0 deletions vignettes/Unichem.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
---
title: "Querying Unichem Database"
author:
- name: Jermiah Joseph, Shahzada Muhammad Shameel Farooq, and Christopher Eeles
output:
BiocStyle::html_document:
self_contained: yes
toc: true
toc_float: true
toc_depth: 2
code_folding: show
date: "`r doc_date()`"
package: "`r pkg_ver('AnnotationGx')`"
vignette: >
%\VignetteIndexEntry{Querying Unichem Database}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

<style type="text/css">
.main-container {
max-width: 1800px;
margin-left: auto;
margin-right: auto;
}
</style>

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
)
```


# Introduction to the Unichem API

The UniChem database provides a publicly available REST API for
programmatic retrieval of mappings from standardized structural compound
identifiers to unique compound IDs across a range of large online
cheminformatic databases such as PubChem, ChEMBL, DrugBank and many more.
The service accepts POST requests to two different end-points:
`/compound` and `/connectivity`. Both endpoints accept query parameters via the
POST body in JSON format. The `/compound` API returns exact matches for the
queried compound, while the `/connectivity` API uses layers of the International
Chemical Identifier (InChI) of the query compound to return exact matches as
well as structurally related compounds such as isomers, salts, ionizations
and more.
[@UniChemBeta; @chambersUniChemUnifiedChemical2013]

The functions in `AnnotationGx` have been designed to allow package users to
easily query UniChem resources without any pre-existing knowledge of
HTTP requests or the API specifications. In doing so we hope to provide an
R native interface for mapping between various cheminformatic databases,
accessible to anyone familar with using R functions!

```{r load_pkg_example}
library(AnnotationGx)
```


# Available Databases

To see a table of database identifiers available via UniChem, you can call
the `getUniChemSources` function.
By default, just the database shortname ("Name") and UniChem's ID for it ("SourceID") columns
are returned.
To return all columns, pass the `all_columns = TRUE` argument

```{r get_sources_short_echo}
getUnichemSources()
```

When mapping using the `queryUnichemCompound` function, these are the sources that can be used from,
and the databases to which the compound mappings will be returned.

# Querying UniChem Compound API

The `queryUnichemCompound` function allows you to query the UniChem Compound API
to retrieve mappings for a given compound identifier. The function takes two mandatory arguments.
The first is the `compound` argument which is the compound identifier to be queried.
The second is the `type` argument which is the type of compound identifier to search for.
Options are "uci", "inchi", "inchikey", and "sourceID".
The `sourceID` argument is optional and is only required if the `type` argument is "sourceID".

The function returns a list of:

1. "External_Mappings" `data.table` containing the mapping to other Databases with the following headings:
1. "compoundID" `character` The compound identifier
2. "Name" `character` The name of the database
3. "NameLong" `character` The long name of the database
4. "SourceID" `character` The UniChem Source ID
5. "sourceURL" `character` The URL of the source
2. "UniChem_Mappings" `list` of the following six mappings:
1. "UCI" `character` The UniChem Identifier
2. "InchiKey" `character` The InChIKey
3. "Inchi" `character` The InChI
4. "formula" `character` The molecular formula
5. "connections" `character` connection representation "1-6(10)13-8-5-3-2-4-7(8)9(11)12"
6. "hAtoms" `character` hydrogen atom connections "2-5H,1H3,(H,11,12)"


#### Example Searching using `uci` (UniChem Identifier)
Note: This type of query requires you to know the UniChem Identifier for the compound.

```{r uci query}
queryUnichemCompound(compound = "161671", type = "uci")
```


0 comments on commit db6facc

Please sign in to comment.