From 77a99f3d9b72f87c880fcaceb06c3369e18559cb Mon Sep 17 00:00:00 2001
From: Jermiah <jermiahjoseph@gmail.com>
Date: Wed, 13 Dec 2023 20:56:03 +0000
Subject: [PATCH] fix bugs in parseJSON, add multiCID functions

---
 NAMESPACE                         |  1 +
 R/getPubChem-helpers.R            |  2 +-
 R/getPubChem.R                    | 97 +++++++++++++++++--------------
 man/dot-getPubChemAnnotationDT.Rd | 11 ++++
 man/downloadAndExtract.Rd         |  2 +-
 man/getPubChemAnnotation.Rd       |  5 +-
 man/getPubChemAnnotations.Rd      | 27 +++++++++
 7 files changed, 96 insertions(+), 49 deletions(-)
 create mode 100644 man/dot-getPubChemAnnotationDT.Rd
 create mode 100644 man/getPubChemAnnotations.Rd

diff --git a/NAMESPACE b/NAMESPACE
index c5d2a4d..197e657 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -35,6 +35,7 @@ export(getGencodeGRangesAnnotated)
 export(getGuideToPharm)
 export(getInfoFromCelllineInput)
 export(getPubChemAnnotation)
+export(getPubChemAnnotations)
 export(getPubChemCompound)
 export(getPubChemFromNSC)
 export(getPubChemSubstance)
diff --git a/R/getPubChem-helpers.R b/R/getPubChem-helpers.R
index ac514c1..24ee64c 100644
--- a/R/getPubChem-helpers.R
+++ b/R/getPubChem-helpers.R
@@ -23,7 +23,7 @@
         print(paste0("Throttling at ", percentage, "%. Sleeping for 60 seconds."))
         Sys.sleep(60)
     }else{
-        Sys.sleep(5)
+        Sys.sleep(max(as.numeric(percentages)))
     }   
 
     return(as.integer(percentage) > 15)
diff --git a/R/getPubChem.R b/R/getPubChem.R
index 948273b..b59fd28 100644
--- a/R/getPubChem.R
+++ b/R/getPubChem.R
@@ -290,13 +290,24 @@ queryPubChem <- function(id, domain='compound', namespace='cid', operation=NA,
 #' @export
 parseJSON <- function(response, ..., encoding='UTF-8', query_only=FALSE) {
     if (isTRUE(query_only)) return(response)
+    response <- content(CAS, encoding = "UTF-8", as='text', type='JSON')
+
+    if (is.null(response)) return(NULL)
+    if (is.na(response)) return(NA)
+
     tryCatch({
-        fromJSON(content(response, ..., as='text', type='JSON',
-            encoding=encoding))
+        fromJSON(response, ...)
     },
     error=function(e) {
-        fromJSON(content(response, ..., type='JSON', encoding=encoding))
+        NA
     })
+    # tryCatch({
+    #     fromJSON(content(response, ..., as='text', type='JSON',
+    #         encoding=encoding))
+    # },
+    # error=function(e) {
+    #     fromJSON(content(response, ..., type='JSON', encoding=encoding))
+    # })
 }
 
 #' Query the PubChem REST API, with the result automatically converted from
@@ -768,7 +779,6 @@ getPubChemAnnotation <- function(
     compound,
     annotationType = 'ChEMBL ID',
     url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound',
-    output = 'JSON', 
     timeout_s = 29,
     retries = 3,
     quiet = TRUE,
@@ -786,8 +796,8 @@ getPubChemAnnotation <- function(
         # TODO:: add a check to see if the compound is a valid CID or SID
         # TODO:: allow for variaitons of headers due to spelling errors
         # Temporary:
-        if(header == "DILI") queryURL <- paste0(.buildURL(url, compound, output), '?heading=', "Drug Induced Liver Injury")
-        else queryURL <- paste0(.buildURL(url, compound, output), '?heading=', header)
+        if(header == "DILI") queryURL <- paste0(.buildURL(url, compound, 'JSON'), '?heading=', "Drug Induced Liver Injury")
+        else queryURL <- paste0(.buildURL(url, compound, 'JSON'), '?heading=', header)
 
         tryCatch({
             result <- RETRY('GET', URLencode(queryURL), times = retries, quiet = quiet)
@@ -798,58 +808,57 @@ getPubChemAnnotation <- function(
 
         .checkThrottlingStatus(result, throttleMessage = throttleMessage)
         result <- parseJSON(result)
-        # switch(header,
-        #     'ATC Code'=return(.parseATCannotations(annotationDT)),
-        #     'Drug Induced Liver Injury'=return(.parseDILIannotations(annotationDT)),
-        #     'NSC Number'=return(.parseNSCannotations(annotationDT)),
-        #     'CTD Chemical-Gene Interactions'=return(.parseCTDannotations(annotationDT)),
-        #     'Names and Synonyms'=return(.parseNamesAndSynonyms(annotationDT)),
-        #     'Synonyms and Identifiers'=return(.parseSynonymsAndIdentifiers(annotationDT)),
-        #     'CAS'=return(.parseCASannotations(annotationDT)),
-        #     tryCatch({
-        #         parseFUN(annotationDT)
-        #     },
-        #     error=function(e) {
-        #         .warning(funContext, 'The parseFUN function failed: ', e,
-        #             '. Returning unparsed results instead. Please test the parseFUN
-        #             on the returned data.')
-        #         return(annotationDT)
-        #     })
-        # )
-
-
-        if (header == 'ChEMBL ID') {
-            result <- .parseCHEMBLresponse(result)
-        }else if (header == 'NSC Number'){
-            result <- .parseNSCresponse(result)
-        }else if (header == 'DILI' || header =='Drug Induced Liver Injury'){
-            result <- .parseDILIresponse(result)
-        }else if (header == 'CAS'){
-            result <- .parseCASresponse(result)
-        }else if (header == 'ATC Code'){
-            result <- .parseATCresponse(result)
-        }
-        
-        # Using switch instead of if statements
         result <- switch(
             header,
             'ChEMBL ID'     = .parseCHEMBLresponse(result),
             'NSC Number'    = .parseNSCresponse(result),
             'DILI'          = .parseDILIresponse(result),
             'CAS'           = .parseCASresponse(result),
-            'ATC Code'      = .parseATCresponse(result)
-        )
-
+            'ATC Code'      = .parseATCresponse(result))
+    
         if (is.null(result)) result <- list(compound, "N/A")
         else result <- list(compound,result)
+
         names(result) <- c("cid", header)
         return(result)
     }
- 
+
+
+#' Retrieve PubChem annotations for a given compound
+#'
+#' This function retrieves PubChem annotations for a given compound using the specified annotations.
+#'
+#' @param compound The compound for which PubChem annotations are to be retrieved.
+#' @param annotations A character vector specifying the annotations to retrieve.
+#' @param ... Additional arguments to be passed to getPubChemAnnotation().
+#'
+#' @return A merged data table containing the PubChem annotations for the specified compound.
+#'
+#' @examples
+#' getPubChemAnnotations(
+#'      compound = "36314", 
+#'      annotations= c('ChEMBL ID', 'NSC Number', 'Drug Induced Liver Injury'))
+#'
+#' @export
+getPubChemAnnotations <- function(compound, annotations, ...){
+    result <- lapply(annotations, .getPubChemAnnotationDT, compound = compound, ...)
+    names(result) <- annotations
+    Reduce(function(x, y) merge(x, y, by = "cid", all.x = TRUE), result)
+}
+
+
+#' Function that returns a DT of getPubChemAnnotation results 
+.getPubChemAnnotationDT <- function(compound, annotationType, ...){
+    result <- getPubChemAnnotation(compound, annotationType, ...)
+    data.table::as.data.table(result)
+}
+
+
+
 #' Function that parses the results of the PubChem PUG-VIEW API for the CHEMBL ID header
 .parseCHEMBLresponse <- function(result){
     result <- result$Record$Reference$SourceID
-    result <- gsub("::Compound", "", result)
+    result <- gsub("Compound::", "", result)
     return(result)
 }
 
diff --git a/man/dot-getPubChemAnnotationDT.Rd b/man/dot-getPubChemAnnotationDT.Rd
new file mode 100644
index 0000000..9a28910
--- /dev/null
+++ b/man/dot-getPubChemAnnotationDT.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/getPubChem.R
+\name{.getPubChemAnnotationDT}
+\alias{.getPubChemAnnotationDT}
+\title{Function that returns a DT of getPubChemAnnotation results}
+\usage{
+.getPubChemAnnotationDT(compound, annotationType, ...)
+}
+\description{
+Function that returns a DT of getPubChemAnnotation results
+}
diff --git a/man/downloadAndExtract.Rd b/man/downloadAndExtract.Rd
index 66fa13f..7d0735d 100644
--- a/man/downloadAndExtract.Rd
+++ b/man/downloadAndExtract.Rd
@@ -24,5 +24,5 @@ the specified \code{extract_fun} for more details.}
 Download a compressed file from a remote URL and extract it.
 }
 \seealso{
-\link[utils:unzip]{utils::unzip}, \link[utils:untar]{utils::untar}, \link[R.utils:gunzip]{R.utils::gunzip}, \link[R.utils:bunzip2]{R.utils::bunzip2}
+\link[utils:unzip]{utils::unzip}, \link[utils:untar]{utils::untar}, \link[R.utils:compressFile]{R.utils::gunzip}, \link[R.utils:compressFile]{R.utils::bunzip2}
 }
diff --git a/man/getPubChemAnnotation.Rd b/man/getPubChemAnnotation.Rd
index 443d922..c43f552 100644
--- a/man/getPubChemAnnotation.Rd
+++ b/man/getPubChemAnnotation.Rd
@@ -8,7 +8,6 @@ getPubChemAnnotation(
   compound,
   annotationType = "ChEMBL ID",
   url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound",
-  output = "JSON",
   timeout_s = 29,
   retries = 3,
   quiet = TRUE,
@@ -20,8 +19,6 @@ getPubChemAnnotation(
 
 \item{url}{\code{character(1)} The URL to perform API queries on. default = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound'}
 
-\item{output}{\code{character(1)} The output format. Defaults to 'JSON'.}
-
 \item{timeout_s}{\code{numeric(1)} The number of seconds to wait before timing out. Default is 29.}
 
 \item{retries}{\code{numeric(1)} The number of times to retry a failed query. Default is 3.}
@@ -31,6 +28,8 @@ getPubChemAnnotation(
 \item{throttleMessage}{\code{logical(1)} Should a message be printed when the query is throttled? Default is FALSE.}
 
 \item{header}{\code{character(1)} A valid header name for the PUG VIEW annotations}
+
+\item{output}{\code{character(1)} The output format. Defaults to 'JSON'.}
 }
 \description{
 queries the PubChem PUG-VIEW API to get a single annotation using a CID for a header
diff --git a/man/getPubChemAnnotations.Rd b/man/getPubChemAnnotations.Rd
new file mode 100644
index 0000000..675d46e
--- /dev/null
+++ b/man/getPubChemAnnotations.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/getPubChem.R
+\name{getPubChemAnnotations}
+\alias{getPubChemAnnotations}
+\title{Retrieve PubChem annotations for a given compound}
+\usage{
+getPubChemAnnotations(compound, annotations, ...)
+}
+\arguments{
+\item{compound}{The compound for which PubChem annotations are to be retrieved.}
+
+\item{annotations}{A character vector specifying the annotations to retrieve.}
+
+\item{...}{Additional arguments to be passed to getPubChemAnnotation().}
+}
+\value{
+A merged data table containing the PubChem annotations for the specified compound.
+}
+\description{
+This function retrieves PubChem annotations for a given compound using the specified annotations.
+}
+\examples{
+getPubChemAnnotations(
+     compound = "36314", 
+     annotations= c('ChEMBL ID', 'NSC Number', 'Drug Induced Liver Injury'))
+
+}