Skip to content

Commit

Permalink
fix bugs in parseJSON, add multiCID functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Jermiah committed Dec 13, 2023
1 parent 68ab615 commit 77a99f3
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 49 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export(getGencodeGRangesAnnotated)
export(getGuideToPharm)
export(getInfoFromCelllineInput)
export(getPubChemAnnotation)
export(getPubChemAnnotations)
export(getPubChemCompound)
export(getPubChemFromNSC)
export(getPubChemSubstance)
Expand Down
2 changes: 1 addition & 1 deletion R/getPubChem-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
print(paste0("Throttling at ", percentage, "%. Sleeping for 60 seconds."))
Sys.sleep(60)
}else{
Sys.sleep(5)
Sys.sleep(max(as.numeric(percentages)))
}

return(as.integer(percentage) > 15)
Expand Down
97 changes: 53 additions & 44 deletions R/getPubChem.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,24 @@ queryPubChem <- function(id, domain='compound', namespace='cid', operation=NA,
#' @export
parseJSON <- function(response, ..., encoding='UTF-8', query_only=FALSE) {
if (isTRUE(query_only)) return(response)
response <- content(CAS, encoding = "UTF-8", as='text', type='JSON')

if (is.null(response)) return(NULL)
if (is.na(response)) return(NA)

tryCatch({
fromJSON(content(response, ..., as='text', type='JSON',
encoding=encoding))
fromJSON(response, ...)
},
error=function(e) {
fromJSON(content(response, ..., type='JSON', encoding=encoding))
NA
})
# tryCatch({
# fromJSON(content(response, ..., as='text', type='JSON',
# encoding=encoding))
# },
# error=function(e) {
# fromJSON(content(response, ..., type='JSON', encoding=encoding))
# })
}

#' Query the PubChem REST API, with the result automatically converted from
Expand Down Expand Up @@ -768,7 +779,6 @@ getPubChemAnnotation <- function(
compound,
annotationType = 'ChEMBL ID',
url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound',
output = 'JSON',
timeout_s = 29,
retries = 3,
quiet = TRUE,
Expand All @@ -786,8 +796,8 @@ getPubChemAnnotation <- function(
# TODO:: add a check to see if the compound is a valid CID or SID
# TODO:: allow for variaitons of headers due to spelling errors
# Temporary:
if(header == "DILI") queryURL <- paste0(.buildURL(url, compound, output), '?heading=', "Drug Induced Liver Injury")
else queryURL <- paste0(.buildURL(url, compound, output), '?heading=', header)
if(header == "DILI") queryURL <- paste0(.buildURL(url, compound, 'JSON'), '?heading=', "Drug Induced Liver Injury")
else queryURL <- paste0(.buildURL(url, compound, 'JSON'), '?heading=', header)

tryCatch({
result <- RETRY('GET', URLencode(queryURL), times = retries, quiet = quiet)
Expand All @@ -798,58 +808,57 @@ getPubChemAnnotation <- function(

.checkThrottlingStatus(result, throttleMessage = throttleMessage)
result <- parseJSON(result)
# switch(header,
# 'ATC Code'=return(.parseATCannotations(annotationDT)),
# 'Drug Induced Liver Injury'=return(.parseDILIannotations(annotationDT)),
# 'NSC Number'=return(.parseNSCannotations(annotationDT)),
# 'CTD Chemical-Gene Interactions'=return(.parseCTDannotations(annotationDT)),
# 'Names and Synonyms'=return(.parseNamesAndSynonyms(annotationDT)),
# 'Synonyms and Identifiers'=return(.parseSynonymsAndIdentifiers(annotationDT)),
# 'CAS'=return(.parseCASannotations(annotationDT)),
# tryCatch({
# parseFUN(annotationDT)
# },
# error=function(e) {
# .warning(funContext, 'The parseFUN function failed: ', e,
# '. Returning unparsed results instead. Please test the parseFUN
# on the returned data.')
# return(annotationDT)
# })
# )


if (header == 'ChEMBL ID') {
result <- .parseCHEMBLresponse(result)
}else if (header == 'NSC Number'){
result <- .parseNSCresponse(result)
}else if (header == 'DILI' || header =='Drug Induced Liver Injury'){
result <- .parseDILIresponse(result)
}else if (header == 'CAS'){
result <- .parseCASresponse(result)
}else if (header == 'ATC Code'){
result <- .parseATCresponse(result)
}

# Using switch instead of if statements
result <- switch(
header,
'ChEMBL ID' = .parseCHEMBLresponse(result),
'NSC Number' = .parseNSCresponse(result),
'DILI' = .parseDILIresponse(result),
'CAS' = .parseCASresponse(result),
'ATC Code' = .parseATCresponse(result)
)

'ATC Code' = .parseATCresponse(result))

if (is.null(result)) result <- list(compound, "N/A")
else result <- list(compound,result)

names(result) <- c("cid", header)
return(result)
}



#' Retrieve PubChem annotations for a given compound
#'
#' This function retrieves PubChem annotations for a given compound using the specified annotations.
#'
#' @param compound The compound for which PubChem annotations are to be retrieved.
#' @param annotations A character vector specifying the annotations to retrieve.
#' @param ... Additional arguments to be passed to getPubChemAnnotation().
#'
#' @return A merged data table containing the PubChem annotations for the specified compound.
#'
#' @examples
#' getPubChemAnnotations(
#' compound = "36314",
#' annotations= c('ChEMBL ID', 'NSC Number', 'Drug Induced Liver Injury'))
#'
#' @export
getPubChemAnnotations <- function(compound, annotations, ...){
result <- lapply(annotations, .getPubChemAnnotationDT, compound = compound, ...)
names(result) <- annotations
Reduce(function(x, y) merge(x, y, by = "cid", all.x = TRUE), result)
}


#' Function that returns a DT of getPubChemAnnotation results
.getPubChemAnnotationDT <- function(compound, annotationType, ...){
result <- getPubChemAnnotation(compound, annotationType, ...)
data.table::as.data.table(result)
}



#' Function that parses the results of the PubChem PUG-VIEW API for the CHEMBL ID header
.parseCHEMBLresponse <- function(result){
result <- result$Record$Reference$SourceID
result <- gsub("::Compound", "", result)
result <- gsub("Compound::", "", result)
return(result)
}

Expand Down
11 changes: 11 additions & 0 deletions man/dot-getPubChemAnnotationDT.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/downloadAndExtract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions man/getPubChemAnnotation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/getPubChemAnnotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 77a99f3

Please sign in to comment.