From 379acdb57530436de1b42ae44a1b86fa6ba96fc4 Mon Sep 17 00:00:00 2001 From: cmungall Date: Mon, 9 May 2022 17:35:54 -0700 Subject: [PATCH] Adding agroportal implementation. Added ability to dynamically make sqlite from owl. Added query datamodel. Various documentation. Added LOV implementation --- Makefile | 3 + docs/best-practice.rst | 11 +- docs/faq.rst | 15 +- docs/glossary.rst | 2 +- docs/howtos/perform-lexical-matching.rst | 1 + .../roll-up-annotations-to-a-subset.rst | 0 docs/howtos/visualize-ontology-subgraphs.rst | 0 docs/interfaces/validator.rst | 17 ++ poetry.lock | 191 ++++++++++++++---- pyproject.toml | 3 +- src/oaklib/__init__.py | 13 +- src/oaklib/cli.py | 75 ++++++- src/oaklib/datamodels/search.py | 8 +- src/oaklib/datamodels/search_datamodel.py | 102 +++++++++- src/oaklib/datamodels/search_datamodel.yaml | 37 ++++ src/oaklib/datamodels/text_annotator.py | 9 +- src/oaklib/datamodels/text_annotator.yaml | 12 +- src/oaklib/datamodels/validation_datamodel.py | 144 ++++++++++++- .../datamodels/validation_datamodel.yaml | 50 ++++- src/oaklib/datamodels/vocabulary.py | 2 +- .../bioportal/agroportal_implementation.py | 33 +++ .../bioportal/bioportal_implementation.py | 54 +++-- .../sparql/abstract_sparql_implementation.py | 4 +- .../sparql/lov_implementation.py | 38 ++++ .../sqldb/sql_implementation.py | 18 +- .../interfaces/basic_ontology_interface.py | 17 ++ src/oaklib/interfaces/obograph_interface.py | 1 + src/oaklib/interfaces/validator_interface.py | 6 +- src/oaklib/selector.py | 28 ++- .../test_validation_datamodel.py | 32 +++ 30 files changed, 831 insertions(+), 95 deletions(-) create mode 100644 docs/howtos/perform-lexical-matching.rst create mode 100644 docs/howtos/roll-up-annotations-to-a-subset.rst create mode 100644 docs/howtos/visualize-ontology-subgraphs.rst create mode 100644 src/oaklib/implementations/bioportal/agroportal_implementation.py create mode 100644 src/oaklib/implementations/sparql/lov_implementation.py create mode 100644 tests/test_datamodels/test_validation_datamodel.py diff --git a/Makefile b/Makefile index 6bbd71cc3..e7056e23a 100644 --- a/Makefile +++ b/Makefile @@ -51,3 +51,6 @@ stage-docs: tests/input/%.db: tests/input/%.owl $(RUN) semsql make $@ + +bin/runoak: + echo `poetry run which runoak` '"$$0"' > $@ && chmod +x $@ diff --git a/docs/best-practice.rst b/docs/best-practice.rst index c403cb2bd..bef21f571 100644 --- a/docs/best-practice.rst +++ b/docs/best-practice.rst @@ -69,4 +69,13 @@ The :ref:`.chunk` utility function will chunk iterator calls into sizeable amoun This is slightly more boilerplate code, and may not be necessary for an in-memory implementation like Pronto. However, this pattern could have considerable advantages for result sets that are potentially large. Even if the external server is -slow to return results, users will see batches or results rather than waiting on the external server to produce *all* results. \ No newline at end of file +slow to return results, users will see batches or results rather than waiting on the external server to produce *all* results. + +Command Line +------------ + +If you are extending the CLI module or writing a Python application that uses OAK: + +- Use click +- Follow CLIG guidelines +- Ensure that there are tests for the command line using test_click \ No newline at end of file diff --git a/docs/faq.rst b/docs/faq.rst index 5daa9ba8e..f0ab33b6d 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -32,10 +32,23 @@ Using OAK with... Bioportal todo +OLS + todo + +Command Line +------------ + +What do the codes "i" and "p" mean? + Many commands take a :code:`--predicates` option, the value is a comma separated list of CURIEs. + You can use "i" as a shortcut for is_a and "p" as a shortcut for part_of + Troubleshooting --------------- ... Why do I get a "Error: No such option: -i" message The :code:`--input` or :code:`-i` option must come *before* the subcommand name. This is because the input option is one of the few options that are shared across *all* subcommands. - For example, you should write :code:`runoak -i my-ont.owl lexmatch -o results.sssom.tsv` \ No newline at end of file + For example, you should write :code:`runoak -i my-ont.owl lexmatch -o results.sssom.tsv` + +... How do I get a bioportal API key + TODO \ No newline at end of file diff --git a/docs/glossary.rst b/docs/glossary.rst index fbdb361f0..674ced2b0 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -25,7 +25,7 @@ Glossary An |Ontology element| that formally represents something that can be instantiated. For example, the class "heart" Mapping - See SSSOM + See :ref:`SSSOM` Graph Formally... diff --git a/docs/howtos/perform-lexical-matching.rst b/docs/howtos/perform-lexical-matching.rst new file mode 100644 index 000000000..30404ce4c --- /dev/null +++ b/docs/howtos/perform-lexical-matching.rst @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/docs/howtos/roll-up-annotations-to-a-subset.rst b/docs/howtos/roll-up-annotations-to-a-subset.rst new file mode 100644 index 000000000..e69de29bb diff --git a/docs/howtos/visualize-ontology-subgraphs.rst b/docs/howtos/visualize-ontology-subgraphs.rst new file mode 100644 index 000000000..e69de29bb diff --git a/docs/interfaces/validator.rst b/docs/interfaces/validator.rst index 8c6b3a1bc..e2c740879 100644 --- a/docs/interfaces/validator.rst +++ b/docs/interfaces/validator.rst @@ -1,6 +1,23 @@ Validator Interface -------------------- +.. warning :: + + Currently the main validator methods are only implemented for :ref:`SqlDatabaseImplementation` + +The validate method is configured using a *metadata schema*. The default one used is: + +- `Ontology Metadata `_ + +This is specified using LinkML which provides an expressive way to state constraints on metadata elements, +such as :ref:`AnnotationProperty` assertions in ontologies. For example, this schema states that definition +is *recommended* (not required), and that it is single-valued. + +Different projects may wish to configure this - it is possible to pass in a different or modified schema + +For more details see `this howto guide `_ + + .. currentmodule:: oaklib.interfaces.validator_interface .. autoclass:: ValidatorInterface diff --git a/poetry.lock b/poetry.lock index 7fcca9300..fbc88b6df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -178,6 +178,32 @@ soupsieve = ">1.2" html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bioregistry" +version = "0.4.111" +description = "Integrated registry of biological databases and nomenclatures" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +click = "*" +more-click = "*" +pydantic = "*" +pystow = ">=0.1.13" +requests = "*" +tqdm = "*" + +[package.extras] +align = ["pyyaml", "beautifulsoup4", "tabulate", "defusedxml"] +charts = ["matplotlib", "matplotlib-venn", "seaborn"] +docs = ["sphinx", "sphinx-rtd-theme", "sphinx-click", "sphinx-autodoc-typehints", "sphinx-automodapi", "autodoc-pydantic"] +export = ["pyyaml", "rdflib", "rdflib-jsonld"] +gha = ["more-itertools"] +health = ["click-default-group", "pandas"] +tests = ["coverage", "pytest", "more-itertools"] +web = ["pyyaml", "rdflib", "rdflib-jsonld", "flask", "flasgger", "bootstrap-flask (<=2.0.0)", "markdown"] + [[package]] name = "bleach" version = "5.0.0" @@ -322,6 +348,17 @@ wrapt = ">=1.10,<2" [package.extras] dev = ["tox", "bump2version (<1)", "sphinx (<2)", "importlib-metadata (<3)", "importlib-resources (<4)", "configparser (<5)", "sphinxcontrib-websupport (<2)", "zipp (<2)", "PyTest (<5)", "PyTest-Cov (<2.6)", "pytest", "pytest-cov"] +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +packaging = "*" + [[package]] name = "docutils" version = "0.17.1" @@ -682,7 +719,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "jsonschema" -version = "4.4.0" +version = "4.5.1" description = "An implementation of JSON Schema validation for Python" category = "main" optional = false @@ -714,7 +751,7 @@ qtconsole = "*" [[package]] name = "jupyter-client" -version = "7.3.0" +version = "7.3.1" description = "Jupyter protocol implementation and client libraries" category = "dev" optional = false @@ -782,6 +819,19 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "lark" +version = "1.1.2" +description = "a modern parsing library" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +atomic_cache = ["atomicwrites"] +nearley = ["js2py"] +regex = ["regex"] + [[package]] name = "linkml" version = "1.2.10" @@ -838,7 +888,7 @@ linkml-runtime = ">=1.1.6" [[package]] name = "linkml-runtime" -version = "1.2.9" +version = "1.2.10" description = "Runtime environment for LinkML, the Linked open data modeling language" category = "main" optional = false @@ -928,9 +978,20 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "more-click" +version = "0.1.1" +description = "More click." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = "*" + [[package]] name = "more-itertools" -version = "8.12.0" +version = "8.13.0" description = "More routines for operating on iterables, beyond itertools" category = "dev" optional = false @@ -969,7 +1030,7 @@ testing = ["beautifulsoup4", "coverage", "docutils (>=0.17.0,<0.18.0)", "pytest [[package]] name = "nbclient" -version = "0.6.0" +version = "0.6.2" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." category = "dev" optional = false @@ -1020,7 +1081,7 @@ webpdf = ["pyppeteer (>=1,<1.1)"] [[package]] name = "nbformat" -version = "5.3.0" +version = "5.4.0" description = "The Jupyter Notebook format" category = "dev" optional = false @@ -1030,7 +1091,7 @@ python-versions = ">=3.7" fastjsonschema = "*" jsonschema = ">=2.6" jupyter-core = "*" -traitlets = ">=4.1" +traitlets = ">=5.1" [package.extras] test = ["check-manifest", "testpath", "pytest", "pre-commit"] @@ -1418,6 +1479,27 @@ pyjsg = ">=0.11.10" rdflib-shim = "*" shexjsg = ">=0.8.1" +[[package]] +name = "pystow" +version = "0.4.3" +description = "Easily pick a place to store data for your python package." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = "*" +requests = "*" +tqdm = "*" + +[package.extras] +aws = ["boto3"] +docs = ["sphinx", "sphinx-rtd-theme", "sphinx-click", "sphinx-autodoc-typehints", "sphinx-automodapi"] +pandas = ["pandas"] +rdf = ["rdflib"] +tests = ["coverage", "pytest", "requests-file"] +xml = ["lxml"] + [[package]] name = "pytest" version = "5.4.3" @@ -1519,17 +1601,17 @@ test = ["flaky", "pytest", "pytest-qt"] [[package]] name = "qtpy" -version = "2.0.1" +version = "2.1.0" description = "Provides an abstraction layer on top of the various Qt bindings (PyQt5/6 and PySide2/6)." category = "dev" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" [package.dependencies] packaging = "*" [package.extras] -test = ["pytest (>=6.0.0)", "pytest-cov (>=3.0.0)", "pytest-qt"] +test = ["pytest (>=6,!=7.0.0,!=7.0.1)", "pytest-cov (>=3.0.0)", "pytest-qt"] [[package]] name = "ratelimit" @@ -1666,7 +1748,7 @@ numpy = ">=1.16.5" [[package]] name = "semsql" -version = "0.1.2" +version = "0.1.3" description = "" category = "main" optional = false @@ -1954,14 +2036,16 @@ url = ["furl (>=0.4.1)"] [[package]] name = "sssom" -version = "0.3.9" +version = "0.3.10" description = "Operations on SSSOM mapping tables" category = "main" optional = false python-versions = ">=3.7" [package.dependencies] +bioregistry = "*" click = "*" +deprecation = "*" linkml-runtime = ">=1.1.12" networkx = "*" numpy = "*" @@ -2046,6 +2130,23 @@ category = "dev" optional = false python-versions = ">= 3.5" +[[package]] +name = "tqdm" +version = "4.64.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "traitlets" version = "5.1.1" @@ -2080,7 +2181,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "validators" -version = "0.18.2" +version = "0.19.0" description = "Python Data Validation for Humans™." category = "main" optional = false @@ -2088,7 +2189,6 @@ python-versions = ">=3.4" [package.dependencies] decorator = ">=3.4.0" -six = ">=1.4.0" [package.extras] test = ["pytest (>=2.2.3)", "flake8 (>=2.4.0)", "isort (>=4.2.2)"] @@ -2169,7 +2269,7 @@ docs = [] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "58a5529a88a6878817b818b6f7380dad70c6379d32259361557ea35e012d046c" +content-hash = "5b2b36c42c13b481d58a663fd77101d8c0a5fc09418c8a0f2f08950743907eb2" [metadata.files] aiohttp = [ @@ -2324,6 +2424,10 @@ beautifulsoup4 = [ {file = "beautifulsoup4-4.11.1-py3-none-any.whl", hash = "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30"}, {file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"}, ] +bioregistry = [ + {file = "bioregistry-0.4.111-py3-none-any.whl", hash = "sha256:af474f4d96a86e9322b550dfcb030dfff6ccbf0e3f6d85642f9626284624d491"}, + {file = "bioregistry-0.4.111.tar.gz", hash = "sha256:f5bf844c5fbd47fa1d33127f2c008139c8d3cd1d92023b641615943148e44085"}, +] bleach = [ {file = "bleach-5.0.0-py3-none-any.whl", hash = "sha256:08a1fe86d253b5c88c92cc3d810fd8048a16d15762e1e5b74d502256e5926aa1"}, {file = "bleach-5.0.0.tar.gz", hash = "sha256:c6d6cc054bdc9c83b48b8083e236e5f00f238428666d2ce2e083eaa5fd568565"}, @@ -2482,6 +2586,10 @@ deprecated = [ {file = "Deprecated-1.2.13-py2.py3-none-any.whl", hash = "sha256:64756e3e14c8c5eea9795d93c524551432a0be75629f8f29e67ab8caf076c76d"}, {file = "Deprecated-1.2.13.tar.gz", hash = "sha256:43ac5335da90c31c24ba028af536a91d41d53f9e6901ddb021bcc572ce44e38d"}, ] +deprecation = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] docutils = [ {file = "docutils-0.17.1-py2.py3-none-any.whl", hash = "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"}, {file = "docutils-0.17.1.tar.gz", hash = "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125"}, @@ -2725,8 +2833,8 @@ jsonpointer = [ {file = "jsonpointer-2.3.tar.gz", hash = "sha256:97cba51526c829282218feb99dab1b1e6bdf8efd1c43dc9d57be093c0d69c99a"}, ] jsonschema = [ - {file = "jsonschema-4.4.0-py3-none-any.whl", hash = "sha256:77281a1f71684953ee8b3d488371b162419767973789272434bbc3f29d9c8823"}, - {file = "jsonschema-4.4.0.tar.gz", hash = "sha256:636694eb41b3535ed608fe04129f26542b59ed99808b4f688aa32dcf55317a83"}, + {file = "jsonschema-4.5.1-py3-none-any.whl", hash = "sha256:71b5e39324422543546572954ce71c67728922c104902cb7ce252e522235b33f"}, + {file = "jsonschema-4.5.1.tar.gz", hash = "sha256:7c6d882619340c3347a1bf7315e147e6d3dae439033ae6383d6acb908c101dfc"}, ] jupyter = [ {file = "jupyter-1.0.0-py2.py3-none-any.whl", hash = "sha256:5b290f93b98ffbc21c0c7e749f054b3267782166d72fa5e3ed1ed4eaf34a2b78"}, @@ -2734,8 +2842,8 @@ jupyter = [ {file = "jupyter-1.0.0.zip", hash = "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7"}, ] jupyter-client = [ - {file = "jupyter_client-7.3.0-py3-none-any.whl", hash = "sha256:671dd2d90d03f41716b09627a4eb06bb37875f92bf6563cc2ce4fe71c61c5cda"}, - {file = "jupyter_client-7.3.0.tar.gz", hash = "sha256:3bcc8e08a294d0fa9406e48cfe17e11ef0efdb7c504fe8cc335128e3ef8f3dac"}, + {file = "jupyter_client-7.3.1-py3-none-any.whl", hash = "sha256:404abe552540aff3527e66e16beb114b6b4ff58479d51a301f4eb9701e4f52ef"}, + {file = "jupyter_client-7.3.1.tar.gz", hash = "sha256:05d4ff6a0ade25138c6bb0fbeac7ddc26b5fe835e7dd816b64b4a45b931bdc0b"}, ] jupyter-console = [ {file = "jupyter_console-6.4.3-py3-none-any.whl", hash = "sha256:e630bcb682c0088dda45688ad7c2424d4a825c8acf494cb036ced03ed0424841"}, @@ -2753,6 +2861,10 @@ jupyterlab-widgets = [ {file = "jupyterlab_widgets-1.1.0-py3-none-any.whl", hash = "sha256:c2a9bd3789f120f64d73268c066ed3b000c56bc1dda217be5cdc43e7b4ebad3f"}, {file = "jupyterlab_widgets-1.1.0.tar.gz", hash = "sha256:d5f41bc1713795385f718d44dcba47e1e1473c6289f28a95aa6b2c0782ee372a"}, ] +lark = [ + {file = "lark-1.1.2-py2.py3-none-any.whl", hash = "sha256:c1ab213fc5e2d273fe2d91da218ccc8b5b92d065b17faa5e743499cb16594b7d"}, + {file = "lark-1.1.2.tar.gz", hash = "sha256:7a8d0c07d663da9391d7faee1bf1d7df4998c47ca43a593cbef5c7566acd057a"}, +] linkml = [ {file = "linkml-1.2.10-py3-none-any.whl", hash = "sha256:35634789d46406ebe103a70715ccb6e05a4ffc70f31d3597b5b25bdf8baee08f"}, {file = "linkml-1.2.10.tar.gz", hash = "sha256:cfefac344adea6d47ed50cdc6787d1f8be8687019fb0df6050b9f1a5e7714904"}, @@ -2762,8 +2874,8 @@ linkml-dataops = [ {file = "linkml_dataops-0.1.0.tar.gz", hash = "sha256:4550eab65e78b70dc3b9c651724a94ac2b1d1edb2fbe576465f1d6951a54ed04"}, ] linkml-runtime = [ - {file = "linkml_runtime-1.2.9-py3-none-any.whl", hash = "sha256:99cee1eff5b34cd2d1da0b9bf8cd91b9f0c2dcc0b4a9a7e44c242f4a0043d91a"}, - {file = "linkml_runtime-1.2.9.tar.gz", hash = "sha256:17fc4ad38e1e421101b2e934eca7f9d8b57f301038efe8e3fa669146b7c9a85f"}, + {file = "linkml_runtime-1.2.10-py3-none-any.whl", hash = "sha256:e39918ca0fb5b1e616f920c4e400b286b2f8a08d442454d624a6dd81b3006b8f"}, + {file = "linkml_runtime-1.2.10.tar.gz", hash = "sha256:6028f9b80fb356b52b9b72d6a38bde074e7fc65b4a97a862f0f1c26428065d3e"}, ] markdown-it-py = [ {file = "markdown-it-py-2.1.0.tar.gz", hash = "sha256:cf7e59fed14b5ae17c0006eff14a2d9a00ed5f3a846148153899a0224e2c07da"}, @@ -2827,9 +2939,13 @@ mistune = [ {file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"}, {file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"}, ] +more-click = [ + {file = "more_click-0.1.1-py3-none-any.whl", hash = "sha256:ff68c7e874fd409ce501903be3177363499aa9c2662607a3b66568f766dea527"}, + {file = "more_click-0.1.1.tar.gz", hash = "sha256:277c64767a6a9c6625ec6bc3e1241012867f6953b2295b2a1e8eeddec586eb53"}, +] more-itertools = [ - {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"}, - {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"}, + {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"}, + {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"}, ] multidict = [ {file = "multidict-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b9e95a740109c6047602f4db4da9949e6c5945cefbad34a1299775ddc9a62e2"}, @@ -2897,16 +3013,16 @@ myst-parser = [ {file = "myst_parser-0.17.2-py3-none-any.whl", hash = "sha256:1635ce3c18965a528d6de980f989ff64d6a1effb482e1f611b1bfb79e38f3d98"}, ] nbclient = [ - {file = "nbclient-0.6.0-py3-none-any.whl", hash = "sha256:2eed35fc954716cdf0a01ea8cbdd9f9316761479008570059e2f5de29e139423"}, - {file = "nbclient-0.6.0.tar.gz", hash = "sha256:3f89a403c6badf24d2855a455b69a80985b3b27e04111243fdb6a88a28d27031"}, + {file = "nbclient-0.6.2-py3-none-any.whl", hash = "sha256:0d575e1e2622ce433b0bb793fec7d8a77aacc2fd21831aec9be3bd48aaee126b"}, + {file = "nbclient-0.6.2.tar.gz", hash = "sha256:8b47553f1ced077cd7c4537fd5d701d46f7681f24b28275e5cc1d347e7c9b46b"}, ] nbconvert = [ {file = "nbconvert-6.5.0-py3-none-any.whl", hash = "sha256:c56dd0b8978a1811a5654f74c727ff16ca87dd5a43abd435a1c49b840fcd8360"}, {file = "nbconvert-6.5.0.tar.gz", hash = "sha256:223e46e27abe8596b8aed54301fadbba433b7ffea8196a68fd7b1ff509eee99d"}, ] nbformat = [ - {file = "nbformat-5.3.0-py3-none-any.whl", hash = "sha256:38856d97de49e8292e2d5d8f595e9d26f02abfd87e075d450af4511870b40538"}, - {file = "nbformat-5.3.0.tar.gz", hash = "sha256:fcc5ab8cb74e20b19570b5be809e2dba9b82836fd2761a89066ad43394ba29f5"}, + {file = "nbformat-5.4.0-py3-none-any.whl", hash = "sha256:0d6072aaec95dddc39735c144ee8bbc6589c383fb462e4058abc855348152dad"}, + {file = "nbformat-5.4.0.tar.gz", hash = "sha256:44ba5ca6acb80c5d5a500f1e5b83ede8cbe364d5a495c4c8cf60aaf1ba656501"}, ] nest-asyncio = [ {file = "nest_asyncio-1.5.5-py3-none-any.whl", hash = "sha256:b98e3ec1b246135e4642eceffa5a6c23a3ab12c82ff816a92c612d68205813b2"}, @@ -3154,6 +3270,10 @@ pyshexc = [ {file = "PyShExC-0.9.1-py2.py3-none-any.whl", hash = "sha256:efc55ed5cb2453e9df569b03e282505e96bb06597934288f3b23dd980ef10028"}, {file = "PyShExC-0.9.1.tar.gz", hash = "sha256:35a9975d4b9afeb20ef710fb6680871756381d0c39fbb5470b3b506581a304d3"}, ] +pystow = [ + {file = "pystow-0.4.3-py3-none-any.whl", hash = "sha256:a6315cd54304fbaf32f64de910b9d4e98bd65d970c6cf4da601d3dc3784c03e3"}, + {file = "pystow-0.4.3.tar.gz", hash = "sha256:4c206bc80cfb9de4c6a2de1c2d92a21da11ae57df862da459e56ba524ba514e3"}, +] pytest = [ {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, @@ -3278,8 +3398,8 @@ qtconsole = [ {file = "qtconsole-5.3.0.tar.gz", hash = "sha256:8e3520fdc75e46abc4cc6cffeca16fa2652754109b8ae839fa28e27d1eba5625"}, ] qtpy = [ - {file = "QtPy-2.0.1-py3-none-any.whl", hash = "sha256:d93f2c98e97387fcc9d623d509772af5b6c15ab9d8f9f4c5dfbad9a73ad34812"}, - {file = "QtPy-2.0.1.tar.gz", hash = "sha256:adfd073ffbd2de81dc7aaa0b983499ef5c59c96adcfdcc9dea60d42ca885eb8f"}, + {file = "QtPy-2.1.0-py3-none-any.whl", hash = "sha256:aee0586081f943029312becece9f63977b0a9e3788f77a6ac8cc74802bb173d6"}, + {file = "QtPy-2.1.0.tar.gz", hash = "sha256:ca8cd4217175186344299ee4c0f7e7adcf362c70852ba35b255a534077025c06"}, ] ratelimit = [ {file = "ratelimit-2.2.1.tar.gz", hash = "sha256:af8a9b64b821529aca09ebaf6d8d279100d766f19e90b5059ac6a718ca6dee42"}, @@ -3391,8 +3511,8 @@ scipy = [ {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"}, ] semsql = [ - {file = "semsql-0.1.2-py3-none-any.whl", hash = "sha256:dec8e52fc93206baac7e814835703908d117d0a3c7e2df7b192df223d18fdd19"}, - {file = "semsql-0.1.2.tar.gz", hash = "sha256:c6646f76684eb4e149238a4747637caca24ceaccfa9db3994bdc44dce7973feb"}, + {file = "semsql-0.1.3-py3-none-any.whl", hash = "sha256:d94f7d95f9ce545bf99b712ea2952d535ca1d1c43391b0af1fe9bfccd3178d00"}, + {file = "semsql-0.1.3.tar.gz", hash = "sha256:274129e95d72181600424a84b0bddd9aaf11f8be50096d9da49fb79ddba2aeb8"}, ] send2trash = [ {file = "Send2Trash-1.8.0-py3-none-any.whl", hash = "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08"}, @@ -3505,7 +3625,7 @@ sqlalchemy-utils = [ {file = "SQLAlchemy_Utils-0.38.2-py3-none-any.whl", hash = "sha256:622235b1598f97300e4d08820ab024f5219c9a6309937a8b908093f487b4ba54"}, ] sssom = [ - {file = "sssom-0.3.9.tar.gz", hash = "sha256:0aa6fe405dcfc4a637bef821311dd3bbde0112c33e2b37d72979c770d00998c4"}, + {file = "sssom-0.3.10.tar.gz", hash = "sha256:4a2c6d2d37a9d0b989355647d66dd34b12a410c084517e08af47251008731380"}, ] stack-data = [ {file = "stack_data-0.2.0-py3-none-any.whl", hash = "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e"}, @@ -3566,6 +3686,10 @@ tornado = [ {file = "tornado-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4"}, {file = "tornado-6.1.tar.gz", hash = "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791"}, ] +tqdm = [ + {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"}, + {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"}, +] traitlets = [ {file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"}, {file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"}, @@ -3579,8 +3703,7 @@ urllib3 = [ {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, ] validators = [ - {file = "validators-0.18.2-py3-none-any.whl", hash = "sha256:0143dcca8a386498edaf5780cbd5960da1a4c85e0719f3ee5c9b41249c4fefbd"}, - {file = "validators-0.18.2.tar.gz", hash = "sha256:37cd9a9213278538ad09b5b9f9134266e7c226ab1fede1d500e29e0a8fbb9ea6"}, + {file = "validators-0.19.0.tar.gz", hash = "sha256:dec45f4381f042f1e705cfa74949505b77f1e27e8b05409096fee8152c839cbe"}, ] watchdog = [ {file = "watchdog-2.1.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:177bae28ca723bc00846466016d34f8c1d6a621383b6caca86745918d55c7383"}, diff --git a/pyproject.toml b/pyproject.toml index b3b2282c0..686b0ab58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,8 @@ networkx = "^2.7.1" sssom = "^0.3.8" ratelimit = "^2.2.1" appdirs = "^1.4.4" -semsql = "^0.1.2" +semsql = "^0.1.3" +lark = "^1.1.2" [tool.poetry.dev-dependencies] pytest = "^5.2" diff --git a/src/oaklib/__init__.py b/src/oaklib/__init__.py index b6aa58a1f..acfbb8c83 100644 --- a/src/oaklib/__init__.py +++ b/src/oaklib/__init__.py @@ -1,14 +1,11 @@ """ -Datamodels ----------- - -..note :: - - this package includes multiple alternative overlapping datamodels; - in general a maximum of one datamodel is required for any one use case. - +Oaklib +------ """ __version__ = '0.1.0' from oaklib.interfaces import BasicOntologyInterface +from oaklib.resource import OntologyResource + +schemes = {} diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py index 777839f88..1f3c872a1 100644 --- a/src/oaklib/cli.py +++ b/src/oaklib/cli.py @@ -249,8 +249,12 @@ def list_subset(subset, output: str): @main.command() @click.argument("words", nargs=-1) +@click.option('--text-file', + type=click.File(mode="r"), + help="Text file to annotate") @output_option -def annotate(words, output: str): +@output_type_option +def annotate(words, output: str, text_file: TextIO, output_type: str): """ Annotate a piece of text using a Named Entity Recognition annotation @@ -264,10 +268,26 @@ def annotate(words, output: str): For more on text annotation, see https://incatools.github.io/ontology-access-kit/interfaces/text-annotator.html """ impl = settings.impl - text = ' '.join(words) if isinstance(impl, TextAnnotatorInterface): - for ann in impl.annotate_text(text): - print(yaml_dumper.dumps(ann)) + if output_type is None or output_type == 'yaml': + writer = StreamingYamlWriter(output) + elif output_type == 'csv': + writer = StreamingCsvWriter(output) + else: + raise ValueError(f'unknown writer: {output_type}') + if words and text_file: + raise ValueError(f'Specify EITHER text-file OR a list of words as arguments') + if text_file: + for line in text_file.readlines(): + line = line.strip() + for ann in impl.annotate_text(line): + # TODO: better way to represent this + ann.subject_source = line + writer.emit(ann) + else: + text = ' '.join(words) + for ann in impl.annotate_text(text): + writer.emit(ann) else: raise NotImplementedError(f'Cannot execute this using {impl} of type {type(impl)}') @@ -678,6 +698,32 @@ def relationships(terms, output: str): else: raise NotImplementedError(f'Cannot execute this using {impl} of type {type(impl)}') + +@main.command() +@output_type_option +@output_option +def all_relationships(output: TextIO, output_type: str): + """ + Show all relationships for all terms + + Example: + runoak -i hp.db all-relationships + + """ + impl = settings.impl + if output_type is None or output_type == 'yaml': + writer = StreamingYamlWriter(output) + elif output_type == 'csv': + writer = StreamingCsvWriter(output) + else: + raise ValueError(f'No such format: {output_type}') + if isinstance(impl, OboGraphInterface): + for s, p, o in impl.all_relationships(): + writer.emit(dict(subject=s, predicate=p, object=o)) + else: + raise NotImplementedError(f'Cannot execute this using {impl} of type {type(impl)}') + + @main.command() @output_option def terms(output: str): @@ -695,6 +741,27 @@ def terms(output: str): else: raise NotImplementedError(f'Cannot execute this using {impl} of type {type(impl)}') + +@main.command() +@output_option +@predicates_option +def roots(output: str, predicates: str): + """ + List all root in the ontology + + Example: + + runoak -i db/cob.db terms + """ + impl = settings.impl + if isinstance(impl, OboGraphInterface): + actual_predicates = _process_predicates_arg(predicates) + for curie in impl.roots(actual_predicates): + print(f'{curie} ! {impl.get_label_by_curie(curie)}') + else: + raise NotImplementedError(f'Cannot execute this using {impl} of type {type(impl)}') + + @main.command() @output_option @output_type_option diff --git a/src/oaklib/datamodels/search.py b/src/oaklib/datamodels/search.py index 0d40edeb2..f27c512ae 100644 --- a/src/oaklib/datamodels/search.py +++ b/src/oaklib/datamodels/search.py @@ -1,5 +1,7 @@ from dataclasses import dataclass from typing import List +from lark import Lark, Transformer + from deprecated.classic import deprecated from oaklib.datamodels.search_datamodel import SearchBaseConfiguration, SearchProperty, SearchTermSyntax @@ -85,4 +87,8 @@ def use_label_only(self) -> "SearchConfiguration": self.include_definition = False self.include_aliases = False self.properties = [SearchProperty.LABEL] - return self \ No newline at end of file + return self + +query_grammar = r""" +query: disj | conj +""" \ No newline at end of file diff --git a/src/oaklib/datamodels/search_datamodel.py b/src/oaklib/datamodels/search_datamodel.py index e9be9b5d5..4a08a80a3 100644 --- a/src/oaklib/datamodels/search_datamodel.py +++ b/src/oaklib/datamodels/search_datamodel.py @@ -1,5 +1,5 @@ # Auto generated from search_datamodel.yaml by pythongen.py version: 0.9.0 -# Generation date: 2022-05-04T09:54:15 +# Generation date: 2022-05-07T21:49:17 # Schema: search-datamodel # # id: https://w3id.org/linkml/search_datamodel @@ -26,8 +26,8 @@ from linkml_runtime.utils.enumerations import EnumDefinitionImpl from rdflib import Namespace, URIRef from linkml_runtime.utils.curienamespace import CurieNamespace -from linkml_runtime.linkml_model.types import Boolean, Integer, String -from linkml_runtime.utils.metamodelcore import Bool +from linkml_runtime.linkml_model.types import Boolean, Integer, String, Uriorcurie +from linkml_runtime.utils.metamodelcore import Bool, URIorCURIE metamodel_version = "1.7.0" version = None @@ -139,6 +139,60 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): super().__post_init__(**kwargs) +@dataclass +class BooleanQuery(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = SEARCH.BooleanQuery + class_class_curie: ClassVar[str] = "search:BooleanQuery" + class_name: ClassVar[str] = "BooleanQuery" + class_model_uri: ClassVar[URIRef] = SEARCH.BooleanQuery + + operator: Optional[Union[str, "BooleanOperator"]] = None + operands: Optional[Union[Union[dict, "BooleanQuery"], List[Union[dict, "BooleanQuery"]]]] = empty_list() + atom: Optional[Union[dict, "AtomicQuery"]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.operator is not None and not isinstance(self.operator, BooleanOperator): + self.operator = BooleanOperator(self.operator) + + if not isinstance(self.operands, list): + self.operands = [self.operands] if self.operands is not None else [] + self.operands = [v if isinstance(v, BooleanQuery) else BooleanQuery(**as_dict(v)) for v in self.operands] + + if self.atom is not None and not isinstance(self.atom, AtomicQuery): + self.atom = AtomicQuery(**as_dict(self.atom)) + + super().__post_init__(**kwargs) + + +@dataclass +class AtomicQuery(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = SEARCH.AtomicQuery + class_class_curie: ClassVar[str] = "search:AtomicQuery" + class_name: ClassVar[str] = "AtomicQuery" + class_model_uri: ClassVar[URIRef] = SEARCH.AtomicQuery + + graph_function: Optional[Union[str, "GraphFunction"]] = None + graph_predicates: Optional[Union[Union[str, URIorCURIE], List[Union[str, URIorCURIE]]]] = empty_list() + search_term: Optional[Union[dict, SearchBaseConfiguration]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.graph_function is not None and not isinstance(self.graph_function, GraphFunction): + self.graph_function = GraphFunction(self.graph_function) + + if not isinstance(self.graph_predicates, list): + self.graph_predicates = [self.graph_predicates] if self.graph_predicates is not None else [] + self.graph_predicates = [v if isinstance(v, URIorCURIE) else URIorCURIE(v) for v in self.graph_predicates] + + if self.search_term is not None and not isinstance(self.search_term, SearchBaseConfiguration): + self.search_term = SearchBaseConfiguration(**as_dict(self.search_term)) + + super().__post_init__(**kwargs) + + @dataclass class SearchResult(YAMLRoot): """ @@ -266,6 +320,30 @@ class SearchProperty(EnumDefinitionImpl): description="A property that can be searched on", ) +class BooleanOperator(EnumDefinitionImpl): + + AND = PermissibleValue(text="AND") + OR = PermissibleValue(text="OR") + NOT = PermissibleValue(text="NOT") + XOR = PermissibleValue(text="XOR") + + _defn = EnumDefinition( + name="BooleanOperator", + ) + +class GraphFunction(EnumDefinitionImpl): + + DESCENDANT_OF = PermissibleValue(text="DESCENDANT_OF") + ANCESTOR_OF = PermissibleValue(text="ANCESTOR_OF") + PROPER_DESCENDANT_OF = PermissibleValue(text="PROPER_DESCENDANT_OF") + PROPER_ANCESTOR_OF = PermissibleValue(text="PROPER_ANCESTOR_OF") + PARENT_OF = PermissibleValue(text="PARENT_OF") + CHILD_OF = PermissibleValue(text="CHILD_OF") + + _defn = EnumDefinition( + name="GraphFunction", + ) + # Slots class slots: pass @@ -312,6 +390,24 @@ class slots: slots.searchBaseConfiguration__categories = Slot(uri=SEARCH.categories, name="searchBaseConfiguration__categories", curie=SEARCH.curie('categories'), model_uri=SEARCH.searchBaseConfiguration__categories, domain=None, range=Optional[Union[str, List[str]]]) +slots.booleanQuery__operator = Slot(uri=SEARCH.operator, name="booleanQuery__operator", curie=SEARCH.curie('operator'), + model_uri=SEARCH.booleanQuery__operator, domain=None, range=Optional[Union[str, "BooleanOperator"]]) + +slots.booleanQuery__operands = Slot(uri=SEARCH.operands, name="booleanQuery__operands", curie=SEARCH.curie('operands'), + model_uri=SEARCH.booleanQuery__operands, domain=None, range=Optional[Union[Union[dict, BooleanQuery], List[Union[dict, BooleanQuery]]]]) + +slots.booleanQuery__atom = Slot(uri=SEARCH.atom, name="booleanQuery__atom", curie=SEARCH.curie('atom'), + model_uri=SEARCH.booleanQuery__atom, domain=None, range=Optional[Union[dict, AtomicQuery]]) + +slots.atomicQuery__graph_function = Slot(uri=SEARCH.graph_function, name="atomicQuery__graph_function", curie=SEARCH.curie('graph_function'), + model_uri=SEARCH.atomicQuery__graph_function, domain=None, range=Optional[Union[str, "GraphFunction"]]) + +slots.atomicQuery__graph_predicates = Slot(uri=SEARCH.graph_predicates, name="atomicQuery__graph_predicates", curie=SEARCH.curie('graph_predicates'), + model_uri=SEARCH.atomicQuery__graph_predicates, domain=None, range=Optional[Union[Union[str, URIorCURIE], List[Union[str, URIorCURIE]]]]) + +slots.atomicQuery__search_term = Slot(uri=SEARCH.search_term, name="atomicQuery__search_term", curie=SEARCH.curie('search_term'), + model_uri=SEARCH.atomicQuery__search_term, domain=None, range=Optional[Union[dict, SearchBaseConfiguration]]) + slots.searchResult__rank = Slot(uri=SEARCH.rank, name="searchResult__rank", curie=SEARCH.curie('rank'), model_uri=SEARCH.searchResult__rank, domain=None, range=Optional[int]) diff --git a/src/oaklib/datamodels/search_datamodel.yaml b/src/oaklib/datamodels/search_datamodel.yaml index d29f3edb6..95987363d 100644 --- a/src/oaklib/datamodels/search_datamodel.yaml +++ b/src/oaklib/datamodels/search_datamodel.yaml @@ -93,6 +93,26 @@ classes: categories: multivalued: true + BooleanQuery: + attributes: + operator: + range: BooleanOperator + operands: + range: BooleanQuery + multivalued: true + atom: + range: AtomicQuery + + AtomicQuery: + attributes: + graph_function: + range: GraphFunction + graph_predicates: + range: uriorcurie + multivalued: true + search_term: + range: SearchBaseConfiguration + SearchResult: description: An individual search result attributes: @@ -169,3 +189,20 @@ enums: description: Any informative text attached to the entity including comments, definitions, descriptions, examples ANYTHING: meaning: rdf:Property + + BooleanOperator: + permissible_values: + AND: + OR: + NOT: + XOR: + + GraphFunction: + permissible_values: + DESCENDANT_OF: + ANCESTOR_OF: + PROPER_DESCENDANT_OF: + PROPER_ANCESTOR_OF: + PARENT_OF: + CHILD_OF: + diff --git a/src/oaklib/datamodels/text_annotator.py b/src/oaklib/datamodels/text_annotator.py index 11fbbde2d..cb26077e2 100644 --- a/src/oaklib/datamodels/text_annotator.py +++ b/src/oaklib/datamodels/text_annotator.py @@ -1,5 +1,5 @@ # Auto generated from text_annotator.yaml by pythongen.py version: 0.9.0 -# Generation date: 2022-04-05T17:21:12 +# Generation date: 2022-05-06T18:20:18 # Schema: text-annotator # # id: https://w3id.org/linkml/text_annotator @@ -168,6 +168,7 @@ class TextAnnotation(YAMLRoot): confidence: Optional[float] = None match_string: Optional[str] = None is_longest_match: Optional[Union[bool, Bool]] = None + matches_whole_text: Optional[Union[bool, Bool]] = None match_type: Optional[str] = None info: Optional[str] = None subject_start: Optional[Union[int, Position]] = None @@ -198,6 +199,9 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self.is_longest_match is not None and not isinstance(self.is_longest_match, Bool): self.is_longest_match = Bool(self.is_longest_match) + if self.matches_whole_text is not None and not isinstance(self.matches_whole_text, Bool): + self.matches_whole_text = Bool(self.matches_whole_text) + if self.match_type is not None and not isinstance(self.match_type, str): self.match_type = str(self.match_type) @@ -302,6 +306,9 @@ class slots: slots.textAnnotation__is_longest_match = Slot(uri=ANN.is_longest_match, name="textAnnotation__is_longest_match", curie=ANN.curie('is_longest_match'), model_uri=ANN.textAnnotation__is_longest_match, domain=None, range=Optional[Union[bool, Bool]]) +slots.textAnnotation__matches_whole_text = Slot(uri=ANN.matches_whole_text, name="textAnnotation__matches_whole_text", curie=ANN.curie('matches_whole_text'), + model_uri=ANN.textAnnotation__matches_whole_text, domain=None, range=Optional[Union[bool, Bool]]) + slots.textAnnotation__match_type = Slot(uri=ANN.match_type, name="textAnnotation__match_type", curie=ANN.curie('match_type'), model_uri=ANN.textAnnotation__match_type, domain=None, range=Optional[str]) diff --git a/src/oaklib/datamodels/text_annotator.yaml b/src/oaklib/datamodels/text_annotator.yaml index 7383564e8..2164e96c9 100644 --- a/src/oaklib/datamodels/text_annotator.yaml +++ b/src/oaklib/datamodels/text_annotator.yaml @@ -1,8 +1,12 @@ id: https://w3id.org/linkml/text_annotator title: Text Annotator Datamodel name: text-annotator -description: A datamodel for representing the results of textual named entity recognition annotation results +description: >- + A datamodel for representing the results of textual named entity recognition annotation results. + This draws upon both SSSOM and https://www.w3.org/TR/annotation-model/ license: https://creativecommons.org/publicdomain/zero/1.0/ +see_also: + - https://github.com/mapping-commons/sssom/issues/155 prefixes: linkml: https://w3id.org/linkml/ @@ -14,6 +18,7 @@ prefixes: prov: http://www.w3.org/ns/prov# sssom: http://w3id.org/sssom/ bpa: https://bioportal.bioontology.org/annotator/ + oa: http://www.w3.org/ns/oa# default_prefix: ann default_range: string @@ -77,11 +82,14 @@ classes: - bpa:text subject_source: slot_uri: sssom:subject_source + exact_mappings: + - oa:hasBody subject_text_id: range: TextualElement TextAnnotation: description: An individual text annotation + class_uri: oa:Annotation mixins: - HasSpan attributes: @@ -104,6 +112,8 @@ classes: slot_uri: sssom:match_string is_longest_match: range: boolean + matches_whole_text: + range: boolean match_type: info: diff --git a/src/oaklib/datamodels/validation_datamodel.py b/src/oaklib/datamodels/validation_datamodel.py index 817e81e1a..f0e27d52b 100644 --- a/src/oaklib/datamodels/validation_datamodel.py +++ b/src/oaklib/datamodels/validation_datamodel.py @@ -1,5 +1,5 @@ # Auto generated from validation_datamodel.yaml by pythongen.py version: 0.9.0 -# Generation date: 2022-04-11T17:33:29 +# Generation date: 2022-05-07T21:26:12 # Schema: validaton-results # # id: https://w3id.org/linkml/validation_results @@ -22,8 +22,8 @@ from linkml_runtime.utils.enumerations import EnumDefinitionImpl from rdflib import Namespace, URIRef from linkml_runtime.utils.curienamespace import CurieNamespace -from linkml_runtime.linkml_model.types import Integer, Nodeidentifier, String -from linkml_runtime.utils.metamodelcore import NodeIdentifier +from linkml_runtime.linkml_model.types import Boolean, Integer, Nodeidentifier, String +from linkml_runtime.utils.metamodelcore import Bool, NodeIdentifier metamodel_version = "1.7.0" version = None @@ -54,6 +54,9 @@ class TypeSeverityKeyValueType(NodeIdentifier): @dataclass class ValidationConfiguration(YAMLRoot): + """ + Configuration parameters for execution of a validation report + """ _inherited_slots: ClassVar[List[str]] = [] class_class_uri: ClassVar[URIRef] = REPORTING.ValidationConfiguration @@ -77,6 +80,31 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): super().__post_init__(**kwargs) +@dataclass +class RepairConfiguration(YAMLRoot): + """ + Configuration parameters for execution of validation repairs + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = REPORTING.RepairConfiguration + class_class_curie: ClassVar[str] = "reporting:RepairConfiguration" + class_name: ClassVar[str] = "RepairConfiguration" + class_model_uri: ClassVar[URIRef] = REPORTING.RepairConfiguration + + validation_configuration: Optional[Union[dict, ValidationConfiguration]] = None + dry_run: Optional[Union[bool, Bool]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.validation_configuration is not None and not isinstance(self.validation_configuration, ValidationConfiguration): + self.validation_configuration = ValidationConfiguration(**as_dict(self.validation_configuration)) + + if self.dry_run is not None and not isinstance(self.dry_run, Bool): + self.dry_run = Bool(self.dry_run) + + super().__post_init__(**kwargs) + + @dataclass class TypeSeverityKeyValue(YAMLRoot): _inherited_slots: ClassVar[List[str]] = [] @@ -102,9 +130,31 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @dataclass -class ValidationReport(YAMLRoot): +class Report(YAMLRoot): """ - A report object + A report object that is a holder to multiple report results + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = REPORTING.Report + class_class_curie: ClassVar[str] = "reporting:Report" + class_name: ClassVar[str] = "Report" + class_model_uri: ClassVar[URIRef] = REPORTING.Report + + results: Optional[Union[Union[dict, "Result"], List[Union[dict, "Result"]]]] = empty_list() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if not isinstance(self.results, list): + self.results = [self.results] if self.results is not None else [] + self.results = [v if isinstance(v, Result) else Result(**as_dict(v)) for v in self.results] + + super().__post_init__(**kwargs) + + +@dataclass +class ValidationReport(Report): + """ + A holder for multiple validation results """ _inherited_slots: ClassVar[List[str]] = [] @@ -124,7 +174,38 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @dataclass -class ValidationResult(YAMLRoot): +class RepairReport(Report): + """ + A repair object + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = REPORTING.RepairReport + class_class_curie: ClassVar[str] = "reporting:RepairReport" + class_name: ClassVar[str] = "RepairReport" + class_model_uri: ClassVar[URIRef] = REPORTING.RepairReport + + results: Optional[Union[Union[dict, "RepairOperation"], List[Union[dict, "RepairOperation"]]]] = empty_list() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if not isinstance(self.results, list): + self.results = [self.results] if self.results is not None else [] + self.results = [v if isinstance(v, RepairOperation) else RepairOperation(**as_dict(v)) for v in self.results] + + super().__post_init__(**kwargs) + + +class Result(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = REPORTING.Result + class_class_curie: ClassVar[str] = "reporting:Result" + class_name: ClassVar[str] = "Result" + class_model_uri: ClassVar[URIRef] = REPORTING.Result + + +@dataclass +class ValidationResult(Result): """ An individual result arising from validation of a data instance using a particular rule """ @@ -176,6 +257,32 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): super().__post_init__(**kwargs) +@dataclass +class RepairOperation(Result): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = REPORTING.RepairOperation + class_class_curie: ClassVar[str] = "reporting:RepairOperation" + class_name: ClassVar[str] = "RepairOperation" + class_model_uri: ClassVar[URIRef] = REPORTING.RepairOperation + + repairs: Optional[Union[dict, ValidationResult]] = None + modified: Optional[Union[bool, Bool]] = None + successful: Optional[Union[bool, Bool]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.repairs is not None and not isinstance(self.repairs, ValidationResult): + self.repairs = ValidationResult(**as_dict(self.repairs)) + + if self.modified is not None and not isinstance(self.modified, Bool): + self.modified = Bool(self.modified) + + if self.successful is not None and not isinstance(self.successful, Bool): + self.successful = Bool(self.successful) + + super().__post_init__(**kwargs) + + @dataclass class ExternalReferenceValidationResult(ValidationResult): """ @@ -278,6 +385,9 @@ class slots: slots.info = Slot(uri=SH.resultMessage, name="info", curie=SH.curie('resultMessage'), model_uri=REPORTING.info, domain=None, range=Optional[str]) +slots.results = Slot(uri=SH.result, name="results", curie=SH.curie('result'), + model_uri=REPORTING.results, domain=None, range=Optional[Union[Union[dict, Result], List[Union[dict, Result]]]]) + slots.validationConfiguration__max_number_results_per_type = Slot(uri=REPORTING.max_number_results_per_type, name="validationConfiguration__max_number_results_per_type", curie=REPORTING.curie('max_number_results_per_type'), model_uri=REPORTING.validationConfiguration__max_number_results_per_type, domain=None, range=Optional[int]) @@ -287,14 +397,26 @@ class slots: slots.validationConfiguration__schema_path = Slot(uri=REPORTING.schema_path, name="validationConfiguration__schema_path", curie=REPORTING.curie('schema_path'), model_uri=REPORTING.validationConfiguration__schema_path, domain=None, range=Optional[str]) +slots.repairConfiguration__validation_configuration = Slot(uri=REPORTING.validation_configuration, name="repairConfiguration__validation_configuration", curie=REPORTING.curie('validation_configuration'), + model_uri=REPORTING.repairConfiguration__validation_configuration, domain=None, range=Optional[Union[dict, ValidationConfiguration]]) + +slots.repairConfiguration__dry_run = Slot(uri=REPORTING.dry_run, name="repairConfiguration__dry_run", curie=REPORTING.curie('dry_run'), + model_uri=REPORTING.repairConfiguration__dry_run, domain=None, range=Optional[Union[bool, Bool]]) + slots.typeSeverityKeyValue__type = Slot(uri=REPORTING.type, name="typeSeverityKeyValue__type", curie=REPORTING.curie('type'), model_uri=REPORTING.typeSeverityKeyValue__type, domain=None, range=URIRef) slots.typeSeverityKeyValue__severity = Slot(uri=REPORTING.severity, name="typeSeverityKeyValue__severity", curie=REPORTING.curie('severity'), model_uri=REPORTING.typeSeverityKeyValue__severity, domain=None, range=Optional[Union[str, "SeverityOptions"]]) -slots.validationReport__results = Slot(uri=SH.result, name="validationReport__results", curie=SH.curie('result'), - model_uri=REPORTING.validationReport__results, domain=None, range=Optional[Union[Union[dict, ValidationResult], List[Union[dict, ValidationResult]]]]) +slots.repairOperation__repairs = Slot(uri=REPORTING.repairs, name="repairOperation__repairs", curie=REPORTING.curie('repairs'), + model_uri=REPORTING.repairOperation__repairs, domain=None, range=Optional[Union[dict, ValidationResult]]) + +slots.repairOperation__modified = Slot(uri=REPORTING.modified, name="repairOperation__modified", curie=REPORTING.curie('modified'), + model_uri=REPORTING.repairOperation__modified, domain=None, range=Optional[Union[bool, Bool]]) + +slots.repairOperation__successful = Slot(uri=REPORTING.successful, name="repairOperation__successful", curie=REPORTING.curie('successful'), + model_uri=REPORTING.repairOperation__successful, domain=None, range=Optional[Union[bool, Bool]]) slots.externalReferenceValidationResult__url = Slot(uri=REPORTING.url, name="externalReferenceValidationResult__url", curie=REPORTING.curie('url'), model_uri=REPORTING.externalReferenceValidationResult__url, domain=None, range=Optional[str]) @@ -307,3 +429,9 @@ class slots: slots.externalReferenceValidationResult__http_response_code = Slot(uri=REPORTING.http_response_code, name="externalReferenceValidationResult__http_response_code", curie=REPORTING.curie('http_response_code'), model_uri=REPORTING.externalReferenceValidationResult__http_response_code, domain=None, range=Optional[int]) + +slots.ValidationReport_results = Slot(uri=SH.result, name="ValidationReport_results", curie=SH.curie('result'), + model_uri=REPORTING.ValidationReport_results, domain=ValidationReport, range=Optional[Union[Union[dict, "ValidationResult"], List[Union[dict, "ValidationResult"]]]]) + +slots.RepairReport_results = Slot(uri=SH.result, name="RepairReport_results", curie=SH.curie('result'), + model_uri=REPORTING.RepairReport_results, domain=RepairReport, range=Optional[Union[Union[dict, "RepairOperation"], List[Union[dict, "RepairOperation"]]]]) diff --git a/src/oaklib/datamodels/validation_datamodel.yaml b/src/oaklib/datamodels/validation_datamodel.yaml index 598a8b432..a498ce2d9 100644 --- a/src/oaklib/datamodels/validation_datamodel.yaml +++ b/src/oaklib/datamodels/validation_datamodel.yaml @@ -35,6 +35,7 @@ imports: #================================== classes: ValidationConfiguration: + description: Configuration parameters for execution of a validation report attributes: max_number_results_per_type: range: integer @@ -48,6 +49,15 @@ classes: range: string description: allows overriding the default OMO schema + RepairConfiguration: + description: Configuration parameters for execution of validation repairs + attributes: + validation_configuration: + range: ValidationConfiguration + dry_run: + range: boolean + + TypeSeverityKeyValue: attributes: type: @@ -56,18 +66,34 @@ classes: severity: range: severity_options + Report: + abstract: true + description: A report object that is a holder to multiple report results + slots: + - results + ValidationReport: + is_a: Report class_uri: sh:ValidationReport - description: A report object - attributes: + description: A holder for multiple validation results + slot_usage: results: - slot_uri: sh:result range: ValidationResult - multivalued: true todos: - add prov object + RepairReport: + is_a: Report + description: A repair object + slot_usage: + results: + range: RepairOperation + + Result: + abstract: true + ValidationResult: + is_a: Result class_uri: sh:ValidationResult description: An individual result arising from validation of a data instance using a particular rule slots: @@ -81,6 +107,18 @@ classes: - source - info + RepairOperation: + is_a: Result + todos: + - integrate with kgcl data model, to be able to describe changes + attributes: + repairs: + range: ValidationResult + modified: + range: boolean + successful: + range: boolean + ExternalReferenceValidationResult: is_a: ValidationResult description: A validation result where the check is to determine if a link to an external resource is still valid @@ -126,6 +164,10 @@ slots: info: range: string slot_uri: sh:resultMessage + results: + slot_uri: sh:result + range: Result + multivalued: true #================================== diff --git a/src/oaklib/datamodels/vocabulary.py b/src/oaklib/datamodels/vocabulary.py index d6080af79..b50da9afd 100644 --- a/src/oaklib/datamodels/vocabulary.py +++ b/src/oaklib/datamodels/vocabulary.py @@ -4,7 +4,7 @@ WIKIDATA = CurieNamespace('wikidata', 'http://www.wikidata.org/entity/') WDP = CurieNamespace('wdp', 'http://www.wikidata.org/prop/direct/') -NAMESPACES = [omd.SKOS, omd.RDF, omd.RDFS, omd.OIO, WIKIDATA, WDP] +NAMESPACES = [omd.OWL, omd.SKOS, omd.RDF, omd.RDFS, omd.OIO, WIKIDATA, WDP] DEFAULT_PREFIX_MAP = {ns.prefix: str(ns) for ns in NAMESPACES} APP_NAME = 'ontology-access-kit' diff --git a/src/oaklib/implementations/bioportal/agroportal_implementation.py b/src/oaklib/implementations/bioportal/agroportal_implementation.py new file mode 100644 index 000000000..5ef50d636 --- /dev/null +++ b/src/oaklib/implementations/bioportal/agroportal_implementation.py @@ -0,0 +1,33 @@ +import logging +from dataclasses import dataclass, field +from typing import Any, Dict, Iterable, Iterator, List, Tuple, Union +from urllib.parse import quote + +import requests +from oaklib.datamodels.text_annotator import TextAnnotation +from oaklib.implementations import BioportalImplementation +from oaklib.interfaces.basic_ontology_interface import PREFIX_MAP +from oaklib.interfaces.mapping_provider_interface import MappingProviderInterface +from oaklib.interfaces.search_interface import SearchInterface +from oaklib.datamodels.search import SearchConfiguration +from oaklib.interfaces.text_annotator_interface import TextAnnotatorInterface +from oaklib.types import CURIE, URI +from oaklib.utilities.apikey_manager import get_apikey_value +from oaklib.utilities.rate_limiter import check_limit +from sssom import Mapping +from sssom.sssom_datamodel import MatchTypeEnum + + +@dataclass +class AgroportalImplementation(BioportalImplementation, TextAnnotatorInterface, SearchInterface, MappingProviderInterface): + """ + Implementation over agroportal endpoint + + """ + + @property + def _base_url(self) -> str: + return "http://data.agroportal.lirmm.fr/" + + def load_bioportal_api_key(self, path: str = None) -> None: + self.bioportal_api_key = '1de0a270-29c5-4dda-b043-7c3580628cd5' diff --git a/src/oaklib/implementations/bioportal/bioportal_implementation.py b/src/oaklib/implementations/bioportal/bioportal_implementation.py index 5ed5b30bb..2729f482f 100644 --- a/src/oaklib/implementations/bioportal/bioportal_implementation.py +++ b/src/oaklib/implementations/bioportal/bioportal_implementation.py @@ -46,6 +46,10 @@ def get_prefix_map(self) -> PREFIX_MAP: # TODO return {} + @property + def _base_url(self) -> str: + return REST_URL + def load_bioportal_api_key(self, path: str = None) -> None: self.bioportal_api_key = get_apikey_value('bioportal') @@ -79,7 +83,9 @@ def annotate_text(self, text: str) -> Iterator[TextAnnotation]: params = {'include': include_str, 'require_exact_match': require_exact_match, 'text': text} - r = self._bioportal_get(REST_URL + '/annotator', params=params) + if self.resource and self.resource.slug: + params['ontologies'] = self.resource.slug.upper() + r = self._bioportal_get(self._base_url + '/annotator', params=params) return self.json_to_results(r.json(), text) def json_to_results(self, json_list: List[Any], text: str) -> Iterator[TextAnnotation]: @@ -88,21 +94,29 @@ def json_to_results(self, json_list: List[Any], text: str) -> Iterator[TextAnnot for obj in json_list: ac_obj = obj['annotatedClass'] for x in obj['annotations']: - ann = TextAnnotation(subject_start=x['from'], - subject_end=x['to'], - subject_label=x['text'], - object_id=self.uri_to_curie(ac_obj['@id']), - object_label=ac_obj['prefLabel'], - object_source=ac_obj['links']['ontology'], - match_type=x['matchType'], - #info=str(obj) - ) - uid = ann.subject_start, ann.subject_end, ann.object_id - if uid in seen: - logging.debug(f'Skipping duplicative annotation to {ann.object_source}') - continue - seen[uid] = True - yield ann + try: + ann = TextAnnotation(subject_start=x['from'], + subject_end=x['to'], + subject_label=x['text'], + object_id=self.uri_to_curie(ac_obj['@id']), + object_label=ac_obj['prefLabel'], + object_source=ac_obj['links']['ontology'], + match_type=x['matchType'], + #info=str(obj) + ) + if len(text) == ann.subject_end: + ann.matches_whole_text = True + uid = ann.subject_start, ann.subject_end, ann.object_id + if uid in seen: + logging.debug(f'Skipping duplicative annotation to {ann.object_source}') + continue + seen[uid] = True + yield ann + except KeyError: + # TODO: we should never catch exceptions in this way; + # this is temporary until we figure out why sometimes BP payloads + # lack some keys such as prefLabel + logging.error(f'Missing keys in annotation: {x} in {obj} when parsing {text}') # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Implements: SearchInterface @@ -110,8 +124,9 @@ def json_to_results(self, json_list: List[Any], text: str) -> Iterator[TextAnnot def basic_search(self, search_term: str, config: SearchConfiguration = SearchConfiguration()) -> Iterable[CURIE]: - r = self._bioportal_get(REST_URL + '/search', params={'q': search_term, 'include': ['prefLabel']}) + r = self._bioportal_get(self._base_url + '/search', params={'q': search_term, 'include': ['prefLabel']}) obj = r.json() + logging.debug(f'Search obj={obj}') collection = obj['collection'] while len(collection) > 0: result = collection[0] @@ -123,7 +138,6 @@ def basic_search(self, search_term: str, config: SearchConfiguration = SearchCon collection = collection[1:] if len(collection) == 0: next_page = obj['links']['nextPage'] - #print(f'NEXT={next_page}') if next_page: check_limit() r = requests.get(next_page, headers=self._headers()) @@ -140,7 +154,7 @@ def get_sssom_mappings_by_curie(self, id: Union[CURIE, URI]) -> Iterable[Mapping # This may return lots of duplicate mappings # See: https://github.com/ncbo/ontologies_linked_data/issues/117 quoted_class_uri = quote(class_uri, safe='') - req_url = f'{REST_URL}/ontologies/{ontology}/classes/{quoted_class_uri}/mappings' + req_url = f'{self._base_url}/ontologies/{ontology}/classes/{quoted_class_uri}/mappings' logging.debug(req_url) response = self._bioportal_get(req_url, params={'display_context': 'false'}) if (response.status_code != requests.codes.ok): @@ -176,7 +190,7 @@ def add_uri_to_ontology_mapping(self, ont_class: Dict[str, Any]) -> None: def ancestors(self, uri: URI) -> Iterable[URI]: ontology, uri = self._get_ontology_and_uri_from_id(uri) quoted_uri = quote(uri, safe='') - request_url = f'{REST_URL}/ontologies/{ontology}/classes/{quoted_uri}/ancestors' + request_url = f'{self._base_url}/ontologies/{ontology}/classes/{quoted_uri}/ancestors' logging.debug(request_url) response = self._bioportal_get(request_url, params={'display_context': 'false'}) if (response.status_code != requests.codes.ok): diff --git a/src/oaklib/implementations/sparql/abstract_sparql_implementation.py b/src/oaklib/implementations/sparql/abstract_sparql_implementation.py index d94bcd917..958fa7a7d 100644 --- a/src/oaklib/implementations/sparql/abstract_sparql_implementation.py +++ b/src/oaklib/implementations/sparql/abstract_sparql_implementation.py @@ -174,7 +174,9 @@ def list_of_named_graphs(self) -> List[URI]: self._list_of_named_graphs = [row['g']['value'] for row in ret["results"]["bindings"]] return self._list_of_named_graphs - def _query(self, query: Union[str, SparqlQuery], prefixes: PREFIX_MAP = {}): + def _query(self, query: Union[str, SparqlQuery], prefixes: PREFIX_MAP = None): + if prefixes is None: + prefixes = DEFAULT_PREFIX_MAP ng = self.named_graph if isinstance(query, SparqlQuery) and ng: if query.graph is not None: diff --git a/src/oaklib/implementations/sparql/lov_implementation.py b/src/oaklib/implementations/sparql/lov_implementation.py new file mode 100644 index 000000000..8ac574fba --- /dev/null +++ b/src/oaklib/implementations/sparql/lov_implementation.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass +from typing import Optional + +from oaklib.implementations.sparql.abstract_sparql_implementation import AbstractSparqlImplementation +from oaklib.interfaces.mapping_provider_interface import MappingProviderInterface +from oaklib.interfaces.obograph_interface import OboGraphInterface +from oaklib.interfaces.search_interface import SearchInterface + + +@dataclass +class LovImplementation(AbstractSparqlImplementation, SearchInterface, MappingProviderInterface, OboGraphInterface): + """ + Wraps the LOV SPARQL endpoint + + See ``_ + + """ + + def _default_url(self) -> str: + return "https://lov.linkeddata.es/dataset/lov/sparql" + + @property + def named_graph(self) -> Optional[str]: + if self.resource.slug is None: + return None + else: + return self.resource.slug + + + + + + + + + + + diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py index 4c0d1cdfe..e9f4fd07a 100644 --- a/src/oaklib/implementations/sqldb/sql_implementation.py +++ b/src/oaklib/implementations/sqldb/sql_implementation.py @@ -4,6 +4,7 @@ from dataclasses import dataclass from typing import List, Any, Iterable, Optional, Type, Dict, Union, Tuple, Iterator +import semsql.builder.builder as semsql_builder import sssom from linkml_runtime import SchemaView from linkml_runtime.utils.introspection import package_schemaview @@ -70,7 +71,18 @@ class SqlImplementation(RelationGraphInterface, OboGraphInterface, ValidatorInte def __post_init__(self): if self.engine is None: - self.engine = create_engine(str(self.resource.slug)) + locator = str(self.resource.slug) + if locator.endswith('.owl'): + # this is currently an "Easter Egg" feature. It allows you to specify a locator + # such as sqlite:/path/to/my.owl + # then semsql will be invoked to build a sqlite db from this. + # the same sqlite db will be reused until the timestamp of the owl file changes. + # the catch is that EITHER the user must have BOTH rdftab and relation-graph installed, OR + # they should be running through ODK docker + locator = locator.replace('.owl', '.db').replace('sqlite:', '') + semsql_builder.make(locator) + locator = f'sqlite:///{locator}' + self.engine = create_engine(locator) @property def session(self): @@ -96,6 +108,10 @@ def all_entity_curies(self) -> Iterable[CURIE]: for row in self.engine.execute(s): yield row['id'] + def all_relationships(self) -> Iterable[RELATIONSHIP]: + for row in self.session.query(Edge): + yield row.subject, row.predicate, row.object + def get_label_by_curie(self, curie: CURIE) -> Optional[str]: s = text('SELECT value FROM rdfs_label_statement WHERE subject = :curie') for row in self.engine.execute(s, curie=curie): diff --git a/src/oaklib/interfaces/basic_ontology_interface.py b/src/oaklib/interfaces/basic_ontology_interface.py index 19bd14da4..b8f81b993 100644 --- a/src/oaklib/interfaces/basic_ontology_interface.py +++ b/src/oaklib/interfaces/basic_ontology_interface.py @@ -152,6 +152,23 @@ def all_relationships(self) -> Iterable[RELATIONSHIP]: for filler in fillers: yield curie, pred, filler + + def roots(self, predicates: List[PRED_CURIE] = None) -> Iterable[CURIE]: + """ + All root nodes, where root is defined as any node that is not the subject of + a relationship with one of the specified predicates + + :param predicates: + :return: + """ + candidates = set(list(self.all_entity_curies())) + for subject, pred, _ in self.all_relationships(): + if subject in candidates: + if predicates is None or pred in predicates: + candidates.remove(subject) + for term in candidates: + yield term + def all_subset_curies(self) -> Iterable[SUBSET_CURIE]: """ returns iterator over all known subset CURIEs diff --git a/src/oaklib/interfaces/obograph_interface.py b/src/oaklib/interfaces/obograph_interface.py index 502b626f0..dc3ab3203 100644 --- a/src/oaklib/interfaces/obograph_interface.py +++ b/src/oaklib/interfaces/obograph_interface.py @@ -190,6 +190,7 @@ def subgraph(self, start_curies: Union[CURIE, List[CURIE]], predicates: List[PRE g = self._merge_graphs([up_graph, down_graph]) return g + def relationships_to_graph(self, relationships: Iterable[RELATIONSHIP]) -> Graph: """ Generates an OboGraph from a list of relationships diff --git a/src/oaklib/interfaces/validator_interface.py b/src/oaklib/interfaces/validator_interface.py index 535c4a971..2a4e4dd8d 100644 --- a/src/oaklib/interfaces/validator_interface.py +++ b/src/oaklib/interfaces/validator_interface.py @@ -4,7 +4,8 @@ from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface from oaklib.types import CURIE -from oaklib.datamodels.validation_datamodel import ValidationResult, ValidationConfiguration +from oaklib.datamodels.validation_datamodel import ValidationResult, ValidationConfiguration, RepairOperation, \ + RepairConfiguration class ValidatorInterface(BasicOntologyInterface, ABC): @@ -53,6 +54,9 @@ def validate(self, configuration: ValidationConfiguration = None) -> Iterable[Va """ raise NotImplementedError + def repair(self, configuration: RepairConfiguration = None) -> Iterable[RepairOperation]: + raise NotImplementedError + def check_external_references(self): raise NotImplementedError diff --git a/src/oaklib/selector.py b/src/oaklib/selector.py index 875ace6c2..f9adbcdd2 100644 --- a/src/oaklib/selector.py +++ b/src/oaklib/selector.py @@ -1,12 +1,16 @@ +import importlib import logging +import pkgutil from dataclasses import dataclass from pathlib import Path -from typing import Type, Union +from typing import Type, Union, Optional +from oaklib.implementations.bioportal.agroportal_implementation import AgroportalImplementation from oaklib.implementations.bioportal.bioportal_implementation import BioportalImplementation from oaklib.implementations.ols.ols_implementation import OlsImplementation from oaklib.implementations.ontobee.ontobee_implementation import OntobeeImplementation from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation +from oaklib.implementations.sparql.lov_implementation import LovImplementation from oaklib.implementations.sparql.sparql_implementation import SparqlImplementation from oaklib.implementations.sqldb.sql_implementation import SqlImplementation from oaklib.implementations.ubergraph import UbergraphImplementation @@ -14,6 +18,12 @@ from oaklib.interfaces import OntologyInterface from oaklib.resource import OntologyResource +discovered_plugins = { + name: importlib.import_module(name) + for finder, name, ispkg + in pkgutil.iter_modules() + if name.startswith('oakext_') +} RDF_SUFFIX_TO_FORMAT = { 'ttl': 'turtle', @@ -54,7 +64,7 @@ def get_resource_from_shorthand(descriptor: str, format: str = None) -> Ontology """ resource = OntologyResource(format=format) resource.slug = descriptor - impl_class: Type[OntologyInterface] + impl_class: Optional[Type[OntologyInterface]] = None if descriptor: if ':' in descriptor: toks = descriptor.split(':') @@ -70,10 +80,14 @@ def get_resource_from_shorthand(descriptor: str, format: str = None) -> Ontology impl_class = UbergraphImplementation elif scheme == 'ontobee': impl_class = OntobeeImplementation + elif scheme == 'lov': + impl_class = LovImplementation elif scheme == 'sparql': impl_class = SparqlImplementation elif scheme == 'bioportal': impl_class = BioportalImplementation + elif scheme == 'agroportal': + impl_class = AgroportalImplementation elif scheme == 'wikidata': impl_class = WikidataImplementation elif scheme == 'ols': @@ -93,7 +107,15 @@ def get_resource_from_shorthand(descriptor: str, format: str = None) -> Ontology elif scheme == 'http' or scheme == 'https': raise NotImplementedError(f'Web requests not implemented yet') else: - raise ValueError(f'Scheme {scheme} not known') + for ext_name, ext_module in discovered_plugins.items(): + try: + if scheme in ext_module.schemes: + impl_class = ext_module.schemes[scheme] + break + except AttributeError: + logging.info(f'Plugin {ext_name} does not declare schemes') + if not impl_class: + raise ValueError(f'Scheme {scheme} not known') else: logging.info(f'No schema: assuming file path {descriptor}') suffix = descriptor.split('.')[-1] diff --git a/tests/test_datamodels/test_validation_datamodel.py b/tests/test_datamodels/test_validation_datamodel.py new file mode 100644 index 000000000..c12f34542 --- /dev/null +++ b/tests/test_datamodels/test_validation_datamodel.py @@ -0,0 +1,32 @@ +import unittest + +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.utils.introspection import package_schemaview +import oaklib.datamodels.validation_datamodel as vdm + +from tests import output_path + + +class TestValidationDatamodel(unittest.TestCase): + + def test_create(self): + """ + Tests the creation of an example instance of reports + """ + r1 = vdm.ValidationResult(severity=vdm.SeverityOptions.ERROR) + op1 = vdm.RepairOperation(repairs=r1, modified=True) + rr = vdm.RepairReport(results=[op1]) + yaml_dumper.dump(rr, output_path('repair_report.vdm.yaml')) + vr = vdm.ValidationReport(results=[r1]) + yaml_dumper.dump(vr, output_path('validation_report.vdm.yaml')) + + def test_introspect(self): + """ + Tests ability to introspect the schema and examine the schema elements + """ + sv = package_schemaview(vdm.__name__) + assert 'severity' in sv.all_slots() + assert 'ValidationResult' in sv.all_classes() + + +