diff --git a/R/unichem.R b/R/unichem.R index 168f41d..2e75a34 100644 --- a/R/unichem.R +++ b/R/unichem.R @@ -1,8 +1,11 @@ # Unichem API documentation: https://www.ebi.ac.uk/unichem/info/webservices - #' Get the list of sources in UniChem. +#' +#' @param all_columns `boolean` Whether to return all columns. Defaults to FALSE. +#' + #' #' Returns a `data.table` with the following columns: #' - `CompoundCount` (integer): Total of compounds provided by that source @@ -23,7 +26,7 @@ #' @return A data.table with the list of sources in UniChem. #' #' @export -getUnichemSources <- function() { +getUnichemSources <- function(all_columns = FALSE) { funContext <- .funContext("AnnotationGx::getUnichemSources") response <- .build_unichem_query("sources") |> @@ -58,7 +61,12 @@ getUnichemSources <- function() { "UpdateComments" ) - sources_dt[, new_order, with = FALSE] + + sources_dt <- sources_dt[, new_order, with = FALSE] + + if(all_columns) return(sources_dt) + + sources_dt[, c("Name", "SourceID")] } @@ -66,8 +74,8 @@ getUnichemSources <- function() { #' #' This function queries the UniChem API for a compound based on the provided parameters. #' -#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID". #' @param compound `character` or `integer` The compound identifier to search for. +#' @param type `character` The type of compound identifier to search for. Valid types are "uci", "inchi", "inchikey", and "sourceID". #' @param sourceID `integer` The source ID to search for if the type is "sourceID". Defaults to NULL. #' @param request_only `boolean` Whether to return the request only. Defaults to FALSE. #' @param raw `boolean` Whether to return the raw response. Defaults to FALSE. @@ -76,11 +84,11 @@ getUnichemSources <- function() { #' @return A list with the external mappings and the UniChem mappings. #' #' @examples -#' queryUnichem(type = "sourceID", compound = "444795", sourceID = 22) +#' queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22) #' #' @export -queryUnichem <- function( - type, compound, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ... +queryUnichemCompound <- function( + compound, type, sourceID = NA_integer_, request_only = FALSE, raw = FALSE, ... ){ checkmate::assert_string(type) checkmate::assert_atomic(compound) @@ -105,7 +113,7 @@ queryUnichem <- function( mapped_sources_dt <- .asDT(response$compounds$sources) old_names <- c("compoundId", "shortName", "longName", "id", "url") - new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourcURL") + new_names <- c("compoundID", "Name", "NameLong", "sourceID", "sourceURL") setnames(mapped_sources_dt, old = old_names, new = new_names) External_Mappings <- mapped_sources_dt[, new_names, with = FALSE] diff --git a/R/unichem_helpers.R b/R/unichem_helpers.R index b733915..524ba0e 100644 --- a/R/unichem_helpers.R +++ b/R/unichem_helpers.R @@ -25,11 +25,12 @@ url <- httr2::url_parse(unichem_api) url$path <- .buildURL(url$path, endpoint) - .debug(funContext, "URL: ", utils::capture.output(show(url))) + output <- httr2::url_build(url) - if (query_only) return(url) + .debug(funContext, "URL: ", output ) - return(httr2::url_build(url)) + if (query_only) return(url) + httr2::url_build(url) } @@ -60,7 +61,7 @@ base_url <- .build_unichem_query("compounds") - .debug(funContext, "Base URL: ", utils::capture.output(show(base_url))) + .debug(funContext, "Base URL: ", base_url) body <- list( type = type, @@ -82,6 +83,6 @@ .build_request() |> httr2::req_body_json(body) - .debug(funContext, "Request: ", utils::capture.output(show(request))) + .debug(funContext, "Request: ", request) return(request) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 50f063e..1b5fe40 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -2,3 +2,13 @@ url: https://bhklab.github.io/AnnotationGx/ template: bootstrap: 5 +articles: +- title: Articles + navbar: ~ + contents: + - Introduction + - Cellosaurus + - ChEMBL + - OncoTree + - PubChemAPI + - Unichem diff --git a/tests/testthat/test_unichem.R b/tests/testthat/test_unichem.R index dfba62a..1e45984 100644 --- a/tests/testthat/test_unichem.R +++ b/tests/testthat/test_unichem.R @@ -3,7 +3,7 @@ library(AnnotationGx) library(checkmate) test_that("getUnichemSources returns a data.table with the correct columns", { - sources <- getUnichemSources() + sources <- getUnichemSources(all_columns = TRUE) expected_columns <- c( "Name", "NameLabel", "NameLong", "SourceID", "CompoundCount", @@ -24,21 +24,21 @@ test_that("getUnichemSources returns a data.table with the correct columns", { }) -test_that("queryUnichem returns the expected results", { +test_that("queryUnichemCompound returns the expected results", { # Test case 1 - result1 <- queryUnichem(type = "sourceID", compound = "444795", sourceID = 22) + result1 <- queryUnichemCompound(type = "sourceID", compound = "444795", sourceID = 22) expect_true(is.list(result1)) expect_true("External_Mappings" %in% names(result1)) expect_true("UniChem_Mappings" %in% names(result1)) # Test case 2 - expect_error(queryUnichem(type = "inchikey", compound = "InchiKey123")) + expect_error(queryUnichemCompound(type = "inchikey", compound = "InchiKey123")) }) -test_that("queryUnichem returns the expected results 2", { +test_that("queryUnichemCompound returns the expected results 2", { # Test case 1 - result1 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T) + result1 <- queryUnichemCompound(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = T) expect_true(is.list(result1)) @@ -52,7 +52,7 @@ test_that("queryUnichem returns the expected results 2", { subset.of=c("inchi", "sources", "standardInchiKey", "uci") ) - result2 <- queryUnichem(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F) + result2 <- queryUnichemCompound(type = "inchikey", compound = "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", raw = F) expect_true(is.list(result2)) @@ -70,4 +70,4 @@ test_that("queryUnichem returns the expected results 2", { ) -}) \ No newline at end of file +}) diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd index a59ca9d..a50e873 100644 --- a/vignettes/Introduction.Rmd +++ b/vignettes/Introduction.Rmd @@ -1,5 +1,5 @@ --- -title: "1. Introduction to AnnotationGx" +title: "Introduction to AnnotationGx" author: - name: Jermiah Joseph affiliation: diff --git a/vignettes/Unichem.Rmd b/vignettes/Unichem.Rmd new file mode 100644 index 0000000..94d9fe9 --- /dev/null +++ b/vignettes/Unichem.Rmd @@ -0,0 +1,113 @@ +--- +title: "Querying Unichem Database" +author: + - name: Jermiah Joseph, Shahzada Muhammad Shameel Farooq, and Christopher Eeles +output: + BiocStyle::html_document: + self_contained: yes + toc: true + toc_float: true + toc_depth: 2 + code_folding: show +date: "`r doc_date()`" +package: "`r pkg_ver('AnnotationGx')`" +vignette: > + %\VignetteIndexEntry{Querying Unichem Database} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html +) +``` + + +# Introduction to the Unichem API + +The UniChem database provides a publicly available REST API for +programmatic retrieval of mappings from standardized structural compound +identifiers to unique compound IDs across a range of large online +cheminformatic databases such as PubChem, ChEMBL, DrugBank and many more. +The service accepts POST requests to two different end-points: +`/compound` and `/connectivity`. Both endpoints accept query parameters via the +POST body in JSON format. The `/compound` API returns exact matches for the +queried compound, while the `/connectivity` API uses layers of the International +Chemical Identifier (InChI) of the query compound to return exact matches as +well as structurally related compounds such as isomers, salts, ionizations +and more. +[@UniChemBeta; @chambersUniChemUnifiedChemical2013] + +The functions in `AnnotationGx` have been designed to allow package users to +easily query UniChem resources without any pre-existing knowledge of +HTTP requests or the API specifications. In doing so we hope to provide an +R native interface for mapping between various cheminformatic databases, +accessible to anyone familar with using R functions! + +```{r load_pkg_example} +library(AnnotationGx) +``` + + +# Available Databases + +To see a table of database identifiers available via UniChem, you can call +the `getUniChemSources` function. +By default, just the database shortname ("Name") and UniChem's ID for it ("SourceID") columns +are returned. +To return all columns, pass the `all_columns = TRUE` argument + +```{r get_sources_short_echo} +getUnichemSources() +``` + +When mapping using the `queryUnichemCompound` function, these are the sources that can be used from, +and the databases to which the compound mappings will be returned. + +# Querying UniChem Compound API + +The `queryUnichemCompound` function allows you to query the UniChem Compound API +to retrieve mappings for a given compound identifier. The function takes two mandatory arguments. +The first is the `compound` argument which is the compound identifier to be queried. +The second is the `type` argument which is the type of compound identifier to search for. +Options are "uci", "inchi", "inchikey", and "sourceID". +The `sourceID` argument is optional and is only required if the `type` argument is "sourceID". + +The function returns a list of: + +1. "External_Mappings" `data.table` containing the mapping to other Databases with the following headings: + 1. "compoundID" `character` The compound identifier + 2. "Name" `character` The name of the database + 3. "NameLong" `character` The long name of the database + 4. "SourceID" `character` The UniChem Source ID + 5. "sourceURL" `character` The URL of the source +2. "UniChem_Mappings" `list` of the following six mappings: + 1. "UCI" `character` The UniChem Identifier + 2. "InchiKey" `character` The InChIKey + 3. "Inchi" `character` The InChI + 4. "formula" `character` The molecular formula + 5. "connections" `character` connection representation "1-6(10)13-8-5-3-2-4-7(8)9(11)12" + 6. "hAtoms" `character` hydrogen atom connections "2-5H,1H3,(H,11,12)" + + +#### Example Searching using `uci` (UniChem Identifier) +Note: This type of query requires you to know the UniChem Identifier for the compound. + +```{r uci query} + +queryUnichemCompound(compound = "161671", type = "uci") + +``` + +