Skip to content

Commit

Permalink
New functionality: term usages (#776)
Browse files Browse the repository at this point in the history
Adding UsageInterface

Adding a QuickGO adapter
  • Loading branch information
cmungall authored Jun 5, 2024
1 parent d56db0e commit 84bd709
Show file tree
Hide file tree
Showing 25 changed files with 744 additions and 30 deletions.
83 changes: 80 additions & 3 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
from oaklib.interfaces.summary_statistics_interface import SummaryStatisticsInterface
from oaklib.interfaces.taxon_constraint_interface import TaxonConstraintInterface
from oaklib.interfaces.text_annotator_interface import TextAnnotatorInterface
from oaklib.interfaces.usages_interface import UsagesInterface
from oaklib.io.heatmap_writer import HeatmapWriter
from oaklib.io.html_writer import HTMLWriter
from oaklib.io.obograph_writer import write_graph
Expand Down Expand Up @@ -1439,7 +1440,6 @@ def annotate(
@output_type_option
# TODO: the main output option uses a filelike object
@click.option("-o", "--output", help="Path to output file")
# @output_option
def viz(
terms,
predicates,
Expand Down Expand Up @@ -4181,6 +4181,77 @@ def apply_taxon_constraints(
writer.emit(st)


@main.command()
@output_option
@autolabel_option
@output_type_option
@output_option
@click.option(
"--used-by-prefix",
"-P",
multiple=True,
)
@click.argument("terms", nargs=-1)
def usages(
terms,
autolabel: bool,
output_type: str,
output: str,
used_by_prefix: List,
**kwargs,
):
"""
List usages of a term or set of terms.
Usages of neuron in GO:
runoak -i sqlite:obo:go usages CL:0000540
Association/annotations sources can also be used:
runoak -i quickgo: usages GO:0031969
Note this query may be slow - you can restrict to a species:
runoak -i quickgo:NCBITaxon:9606 usages GO:0031969
(this should return no results, as there should be no human proteins annotated
to chloroplast membrane)
Using amigo:
runoak -i amigo: usages GO:0031969
Using ubergraph:
runoak -i ubergraph: usages CL:0000540
This will include usages over multiple ontologies
You can multiple queries over multiple sources (an AggregatorImplementation):
runoak -i sqlite:obo:go -a ubergraph: -a amigo: -a quickgo: usages GO:0031969
"""
impl = settings.impl
writer = _get_writer(output_type, impl, StreamingCsvWriter)
writer.autolabel = autolabel
writer.output = output
if not isinstance(impl, UsagesInterface):
raise NotImplementedError(
f"Cannot execute this using {settings.impl} of type {type(settings.impl)}"
)
used_by = None
if "@" in terms:
ix = terms.index("@")
curies = list(query_terms_iterator(terms[:ix], impl))
used_by = terms[ix + 1 :]
else:
curies = list(query_terms_iterator(terms, impl))
for usage in impl.usages(curies, used_by=used_by, used_by_prefixes=used_by_prefix, **kwargs):
writer.emit(usage)


@main.command()
@output_option
@predicates_option
Expand Down Expand Up @@ -4662,6 +4733,7 @@ def apply_labels(group):
@output_option
@click.option(
"--ontology-only/--no-ontology-only",
"-T",
default=False,
show_default=True,
help="If true, perform a pseudo-enrichment analysis treating each term as an association to itself.",
Expand Down Expand Up @@ -4750,14 +4822,20 @@ def enrichment(
actual_association_predicates = _process_predicates_arg(association_predicates)
if sample_file:
subjects = list(curies_from_file(sample_file, adapter=impl, allow_labels=allow_labels))
curies = list(query_terms_iterator(terms, impl))
else:
if "@" in terms:
if not ontology_only:
raise ValueError("Cannot use @ with --no-ontology-only")
ix = terms.index("@")
logging.info(f"Splitting terms into two, position = {ix}")
subjects = list(query_terms_iterator(terms[0:ix], impl))
terms = terms[ix + 1 :]
curies = list(query_terms_iterator(terms[ix + 1 :], impl))
logging.info(f"Num Subjects={len(subjects)} (using {len(curies)} terms)")
else:
subjects = list(query_terms_iterator(terms, impl))
curies = None
logging.info(f"Num Subjects={len(subjects)} (using all terms)")
if not subjects:
raise ValueError("No terms or upload provided")
background = (
Expand All @@ -4778,7 +4856,6 @@ def enrichment(
writer = _get_writer(output_type, impl, StreamingYamlWriter)
writer.autolabel = autolabel
writer.output = output
curies = list(query_terms_iterator(terms, impl))
results = impl.enriched_classes(
subjects,
predicates=actual_association_predicates,
Expand Down
21 changes: 21 additions & 0 deletions src/oaklib/conf/obograph-style.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@
"penwidth": 2,
"label": ""
},
"RO:0002333": {
"arrowhead": "box",
"color": "green",
"penwidth": 2,
"label": ""
},
"RO:0004009": {
"arrowhead": "box",
"color": "green",
Expand Down Expand Up @@ -97,6 +103,21 @@
"color": "red",
"label": ""
},
"RO:0002233": {
"arrowhead": "diamond",
"color": "blue",
"label": "→⊚"
},
"RO:0002234": {
"arrowhead": "diamond",
"color": "blue",
"label": "⊚→"
},
"RO:0002400": {
"arrowhead": "diamond",
"color": "blue",
"label": "⇨⊚"
},
"RO:0002220": {
"color": "gray",
"label": "A"
Expand Down
6 changes: 6 additions & 0 deletions src/oaklib/datamodels/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,19 @@
LOCATED_IN = "RO:0001025"
DEVELOPS_FROM = "RO:0002202"
HAS_PART = "BFO:0000051"
OCCURS_IN = "BFO:0000066"
ONLY_IN_TAXON = "RO:0002160"
NEVER_IN_TAXON = "RO:0002161"
IN_TAXON = "RO:0002162"
PRESENT_IN_TAXON = "RO:0002175"
NEGATIVELY_REGULATES = "RO:0002212"
POSITIVELY_REGULATES = "RO:0002213"
REGULATES = "RO:0002211"
ENABLES = "RO:0002327"
ENABLED_BY = "RO:0002333"
HAS_DIRECT_INPUT = "RO:0002400"
HAS_INPUT = "RO:0002233"
HAS_OUTPUT = "RO:0002234"

BIOLOGICAL_PROCESS = "GO:0008150"
CELLULAR_COMPONENT = "GO:0005575"
Expand Down
4 changes: 3 additions & 1 deletion src/oaklib/implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from oaklib.implementations.agrkb.agrkb_implementation import AGRKBImplementation
from oaklib.implementations.amigo.amigo_implementation import AmiGOImplementation
from oaklib.implementations.cx.cx_implementation import CXImplementation
from oaklib.implementations.eutils.pubmed_implementation import PubMedImplementation
from oaklib.implementations.funowl.funowl_implementation import FunOwlImplementation
from oaklib.implementations.gilda import GildaImplementation
from oaklib.implementations.kgx.kgx_implementation import KGXImplementation
from oaklib.implementations.llm_implementation import LLMImplementation
from oaklib.implementations.monarch.monarch_implementation import MonarchImplementation
from oaklib.implementations.ncbi.ncbi_gene_implementation import NCBIGeneImplementation
from oaklib.implementations.ncbi.pubmed_implementation import PubMedImplementation
from oaklib.implementations.ols import (
BaseOlsImplementation,
OlsImplementation,
Expand All @@ -44,6 +44,7 @@
PantherDBImplementation,
)
from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
from oaklib.implementations.quickgo.quickgo_implementation import QuickGOImplementation
from oaklib.implementations.semsimian.semsimian_implementation import (
SemSimianImplementation,
)
Expand Down Expand Up @@ -86,6 +87,7 @@
"NCBIGeneImplementation",
"OntobeeImplementation",
"ProntoImplementation",
"QuickGOImplementation",
"SimpleOboImplementation",
"SqlImplementation",
"UbergraphImplementation",
Expand Down
39 changes: 34 additions & 5 deletions src/oaklib/implementations/aggregator/aggregator_implementation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import defaultdict
from dataclasses import dataclass
from io import TextIOWrapper
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Type

from sssom_schema import Mapping

Expand All @@ -12,6 +12,7 @@
ValidationConfiguration,
ValidationResult,
)
from oaklib.interfaces.association_provider_interface import AssociationProviderInterface
from oaklib.interfaces.basic_ontology_interface import (
ALIAS_MAP,
DEFINITION,
Expand All @@ -25,19 +26,22 @@
from oaklib.interfaces.relation_graph_interface import RelationGraphInterface
from oaklib.interfaces.search_interface import SearchInterface
from oaklib.interfaces.text_annotator_interface import TEXT, TextAnnotatorInterface
from oaklib.interfaces.usages_interface import UsagesInterface
from oaklib.interfaces.validator_interface import ValidatorInterface
from oaklib.types import CURIE, SUBSET_CURIE


@dataclass
class AggregatorImplementation(
AssociationProviderInterface,
ValidatorInterface,
RdfInterface,
RelationGraphInterface,
OboGraphInterface,
SearchInterface,
MappingProviderInterface,
TextAnnotatorInterface,
UsagesInterface,
):
"""
An OAK adapter that wraps multiple implementations and integrates results together.
Expand Down Expand Up @@ -78,10 +82,20 @@ class AggregatorImplementation(

implementations: List[BasicOntologyInterface] = None

def _delegate_iterator(self, func: Callable) -> Iterator:
@property
def implementation_name(self):
impl_names = []
for i in self.implementations:
for v in func(i):
yield v
impl_names.append(i.implementation_name)
return "-".join(impl_names)

def _delegate_iterator(
self, func: Callable, interface: Optional[Type[BasicOntologyInterface]] = None
) -> Iterator:
for i in self.implementations:
if interface is None or isinstance(i, interface):
for v in func(i):
yield v

def _delegate_simple_tuple_map(self, func: Callable, strict=False) -> Dict[Any, List[Any]]:
m = defaultdict(list)
Expand All @@ -107,11 +121,21 @@ def validate(self, configuration: ValidationConfiguration = None) -> Iterable[Va
def entities(self, **kwargs) -> Iterable[CURIE]:
return self._delegate_iterator(lambda i: i.entities(**kwargs))

def relationships(self, *args, **kwargs) -> Iterable[CURIE]:
return self._delegate_iterator(lambda i: i.relationships(*args, **kwargs))

def simple_mappings_by_curie(self, curie: CURIE) -> Iterable[Tuple[PRED_CURIE, CURIE]]:
return self._delegate_iterator(lambda i: i.simple_mappings_by_curie(curie))

def get_sssom_mappings_by_curie(self, curie: CURIE) -> Iterable[Mapping]:
return self._delegate_iterator(lambda i: i.get_sssom_mappings_by_curie(curie))
return self._delegate_iterator(
lambda i: i.get_sssom_mappings_by_curie(curie), MappingProviderInterface
)

def sssom_mappings(self, *args, **kwargs) -> Iterable[Mapping]:
return self._delegate_iterator(
lambda i: i.sssom_mappings(*args, **kwargs), MappingProviderInterface
)

def label(self, curie: CURIE, **kwargs) -> str:
return self._delegate_first(lambda i: i.label(curie, **kwargs))
Expand Down Expand Up @@ -151,6 +175,11 @@ def outgoing_relationship_map(self, curie: CURIE) -> RELATIONSHIP_MAP:
def incoming_relationship_map(self, curie: CURIE) -> RELATIONSHIP_MAP:
return self._delegate_simple_tuple_map(lambda i: i.incoming_relationship_map(curie))

def associations(self, *args, **kwargs) -> Iterable[CURIE]:
return self._delegate_iterator(
lambda i: i.associations(*args, **kwargs), AssociationProviderInterface
)

def annotate_text(
self, text: TEXT, configuration: Optional[TextAnnotationConfiguration] = None
) -> Iterable[TextAnnotation]:
Expand Down
2 changes: 2 additions & 0 deletions src/oaklib/implementations/amigo/amigo_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
]

from oaklib.interfaces.basic_ontology_interface import LANGUAGE_TAG, RELATIONSHIP
from oaklib.interfaces.usages_interface import UsagesInterface
from oaklib.types import CURIE, PRED_CURIE, SUBSET_CURIE
from oaklib.utilities.iterator_utils import chunk

Expand Down Expand Up @@ -130,6 +131,7 @@ def _normalize(curie: CURIE) -> CURIE:
class AmiGOImplementation(
AssociationProviderInterface,
SearchInterface,
UsagesInterface,
):
"""
Wraps AmiGO endpoint.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dataclasses import dataclass, field
from typing import ClassVar, Iterable, Iterator, Optional, Tuple

import requests_cache
from eutils import Client

__all__ = [
Expand All @@ -22,6 +23,8 @@

logger = logging.getLogger(__name__)

NCBI_REQUESTS_CACHE = ".ncbi_requests_cache"


@dataclass
class EUtilsImplementation(OboGraphInterface, ABC):
Expand All @@ -30,10 +33,20 @@ class EUtilsImplementation(OboGraphInterface, ABC):
"""

entrez_client: Client = field(default_factory=lambda: Client())
# 0.6.0 release in 2019 - considered switching to direct API calls?

database: ClassVar[Optional[str]] = None
entity_type: ClassVar[Optional[str]] = None

# alternative to entrez_client
_requests_session: requests_cache.CachedSession = None

@property
def requests_session(self):
if self._requests_session is None:
self._requests_session = requests_cache.CachedSession(NCBI_REQUESTS_CACHE)
return self._requests_session

def label(self, curie: CURIE, lang: Optional[LANGUAGE_TAG] = None) -> Optional[str]:
if lang is not None:
raise NotImplementedError("lang not implemented for eutils")
Expand Down
Loading

0 comments on commit 84bd709

Please sign in to comment.