Skip to content

Commit

Permalink
Merge pull request #34 from ternaustralia/edmond/ld-viewer
Browse files Browse the repository at this point in the history
Refactor and reorganise API and project structure - enable endpoint to act like lodview.it
  • Loading branch information
edmondchuc authored Sep 1, 2022
2 parents d3c0526 + 393ecfc commit 3bf32f5
Show file tree
Hide file tree
Showing 44 changed files with 1,632 additions and 361 deletions.
4 changes: 4 additions & 0 deletions src/linkeddata_api/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import logging

from flask import Flask, redirect, url_for

Expand All @@ -21,6 +22,9 @@ def create_app(config=None) -> Flask:
app = Flask("linkeddata_api")
app.config["VERSION"] = version

if app.config["ENV"] == "development":
logging.basicConfig(level=logging.INFO)

###################################################
# custom json encoder
###################################################
Expand Down
2 changes: 2 additions & 0 deletions src/linkeddata_api/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import exceptions
from . import sparql
22 changes: 22 additions & 0 deletions src/linkeddata_api/data/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class RequestError(Exception):
"""Request Exception"""

def __init__(self, description: str) -> None:
super().__init__(description)
self.description = description


class SPARQLResultJSONError(Exception):
"""SPARQL Result JSON Error"""

def __init__(self, description: str) -> None:
super().__init__(description)
self.description = description


class SPARQLNotFoundError(Exception):
"""SPARQL Not Found Error"""

def __init__(self, description: str) -> None:
super().__init__(description)
self.description = description
63 changes: 63 additions & 0 deletions src/linkeddata_api/data/sparql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import requests

from . import exceptions


def post(
query: str, sparql_endpoint: str, accept: str = "application/sparql-results+json"
) -> requests.Response:
"""Make a SPARQL POST request
If the response is JSON, use `response.json()` to get the Python dict.
:param query: SPARQL query
:param sparql_endpoint: SPARQL endpoint to query
:param accept: The mimetype of the response value
:return: Response object
:raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range.
"""
headers = {
"accept": accept,
"content-type": "application/sparql-query",
}

response = requests.post(url=sparql_endpoint, headers=headers, data=query)

try:
response.raise_for_status()
except requests.exceptions.HTTPError as err:
raise exceptions.RequestError(err.response.text) from err

# TODO: raise empty response error here.

return response


def get(
query: str, sparql_endpoint: str, accept: str = "application/sparql-results+json"
) -> requests.Response:
"""Make a SPARQL GET request
If the response is JSON, use `response.json()` to get the Python dict.
:param query: SPARQL query
:param sparql_endpoint: SPARQL endpoint to query
:param accept: The mimetype of the response value
:return: Response object
:raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range.
"""
headers = {
"accept": accept,
}
params = {"query": query}

response = requests.get(url=sparql_endpoint, headers=headers, params=params)

try:
response.raise_for_status()
except requests.exceptions.HTTPError as err:
raise exceptions.RequestError(err.response.text) from err

# TODO: raise empty response error here.

return response
8 changes: 8 additions & 0 deletions src/linkeddata_api/domain/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from . import schema
from . import viewer
from . import rdf
from . import namespaces
from . import label
from . import internal_resource
from . import curie
from . import pydantic_jsonify
79 changes: 79 additions & 0 deletions src/linkeddata_api/domain/curie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging

import requests

logger = logging.getLogger(__name__)

# URIs that don't have curies in external service.
not_found = {}

# Predefined prefixes. New prefixes get added at runtime.
prefixes = {
"http://purl.org/dc/terms/": "dcterms",
"http://www.w3.org/2004/02/skos/core#": "skos",
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
"https://schema.org/": "schema",
"https://w3id.org/tern/ontologies/tern/": "tern",
"http://www.w3.org/2002/07/owl#": "owl",
"http://www.w3.org/2001/XMLSchema#": "xsd",
}

# Don't find curies for these - speeds up request processing.
# TODO: these may no longer be needed since we don't fetch for subjects or objects of an RDF statement anymore.
skips = [
"https://linked.data.gov.au/def/nrm",
"https://linked.data.gov.au/def/test/dawe-cv",
"http://linked.data.gov.au/dataset",
"https://linked.data.gov.au/dataset",
]


def uri_in_skips(uri: str) -> bool:
for skip in skips:
if uri.startswith(skip):
return True
return False


def get(uri: str):
"""Get curie
1. Check if it exists in prefixes.
2. Check if it exists in cache.
3. Make an expensive request to an external service. Cache the result.
If all steps fail to find a curie, return the uri as-is.
"""

for key, val in prefixes.items():
if uri.startswith(key):
localname = uri.split("#")[-1].split("/")[-1]
curie = f"{val}:{localname}"
return curie

if uri in not_found:
return not_found.get(uri)
if uri_in_skips(uri):
return uri

logger.info("Fetching curie from external service - %s", uri)
localname = uri.split("#")[-1].split("/")[-1]
r_index = uri.rfind(localname)
base_uri = uri[:r_index]

response = requests.post(
"https://prefix.zazuko.com/api/v1/shrink", params={"q": base_uri}
)

try:
response.raise_for_status()
except requests.exceptions.HTTPError:
not_found[uri] = uri
return uri

prefix = response.json()["value"][:-1]
prefixes[base_uri] = prefix
curie = f"{prefix}:{localname}"
logger.info("Curie fetch completed for %s, found %s", uri, curie)

return curie
47 changes: 47 additions & 0 deletions src/linkeddata_api/domain/internal_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from jinja2 import Template

from linkeddata_api import data


def _get_from_list_query(uris: list[str]) -> str:
template = Template(
"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT distinct ?uri ?internal
WHERE {
VALUES (?uri) {
{% for uri in uris %}
(<{{ uri }}>)
{% endfor %}
}
bind(exists{ ?uri ?p ?o } as ?internal)
}
"""
)
return template.render(uris=uris)


def get_from_list(
uris: list[str],
sparql_endpoint: str,
) -> dict[str, str]:
query = _get_from_list_query(uris)

result = data.sparql.post(query, sparql_endpoint).json()

return_results = {}

try:
rows = result["results"]["bindings"]
for row in rows:
uri = str(row["uri"]["value"])
internal = str(row["internal"]["value"])
return_results[uri] = True if internal == "true" else False

except KeyError as err:
raise data.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err

return return_results
98 changes: 98 additions & 0 deletions src/linkeddata_api/domain/label.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from typing import Union

from jinja2 import Template

from linkeddata_api import data


def get(
uri: str,
sparql_endpoint: str,
) -> Union[str, None]:
"""
Returns a label or None if no label found.
"""
query = f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?label
WHERE {{
VALUES (?labelProperty) {{
(skos:prefLabel)
}}
<{uri}> ?labelProperty ?label .
}}
"""

result = data.sparql.post(query, sparql_endpoint).json()

try:
rows = result["results"]["bindings"]
for row in rows:
return row["label"]["value"]
except KeyError as err:
raise data.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err


def _get_from_list_query(uris: list[str]) -> str:
# TODO: Currently, we try and fetch from TERN's controlled vocabularies.
# We may want to also fetch with a SERVICE query from other repositories in the future.
template = Template(
"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?uri (SAMPLE(?_label) AS ?label)
WHERE {
VALUES (?uri) {
{% for uri in uris %}
(<{{ uri }}>)
{% endfor %}
}
{
?uri skos:prefLabel ?_label .
}
UNION {
# Also try and fetch label from TERN's controlled vocabularies.
SERVICE <https://graphdb.tern.org.au/repositories/tern_vocabs_core> {
?uri skos:prefLabel ?_label .
}
}
}
GROUP BY ?uri
"""
)
return template.render(uris=uris)


def get_from_list(
uris: list[str],
sparql_endpoint: str,
) -> dict[str, str]:
"""Returns a dict of uri keys and label values.
In addition to the SPARQL endpoint provided, it also fetches labels
from TERN's controlled vocabularies via a federated SPARQL query.
"""
query = _get_from_list_query(uris)

result = data.sparql.post(query, sparql_endpoint).json()

labels = {}

try:
rows = result["results"]["bindings"]
for row in rows:
uri = str(row["uri"]["value"])
label = str(row["label"]["value"])
labels[uri] = label

except KeyError as err:
if result["results"]["bindings"] == [{}]:
return {}

raise data.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err

return labels
3 changes: 3 additions & 0 deletions src/linkeddata_api/domain/namespaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from rdflib import Namespace

TERN = Namespace("https://w3id.org/tern/ontologies/tern/")
File renamed without changes.
8 changes: 8 additions & 0 deletions src/linkeddata_api/domain/rdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from rdflib import Graph


def create_graph() -> Graph:
"""Create a new RDFLib Graph object with opinionated namespace prefix bindings."""
graph = Graph()
graph.bind("tern", "https://w3id.org/tern/ontologies/tern/")
return graph
Loading

0 comments on commit 3bf32f5

Please sign in to comment.