Skip to content

Commit

Permalink
Merge pull request #31 from ternaustralia/edmond/vocab-viewer
Browse files Browse the repository at this point in the history
Speed improvement - get labels and internal resource statuses from a list of URIs in one request
  • Loading branch information
edmondchuc authored Jul 12, 2022
2 parents 2de8caf + bbe2abb commit 77eae8f
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG ALPINE_VERSION=3.13
ARG ALPINE_VERSION=3.16
ARG LINKEDDATA_API_VERSION

# BUILD and install code
Expand Down
1 change: 1 addition & 0 deletions src/linkeddata_api/vocab_viewer/nrm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from . import sparql
from . import label
from . import curie
from . import internal_resource
3 changes: 1 addition & 2 deletions src/linkeddata_api/vocab_viewer/nrm/curie.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"http://purl.org/dc/terms/": "dcterms",
"http://www.w3.org/2004/02/skos/core#": "skos",
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
"https://schema.org/": "sdo",
"https://schema.org/": "schema",
"https://w3id.org/tern/ontologies/tern/": "tern",
"http://www.w3.org/2002/07/owl#": "owl",
"http://www.w3.org/2001/XMLSchema#": "xsd",
Expand Down Expand Up @@ -47,7 +47,6 @@ def get(uri: str):
if uri in not_found:
return not_found.get(uri)
if uri_in_skips(uri):
print("in skip")
return uri

localname = uri.split("#")[-1].split("/")[-1]
Expand Down
48 changes: 48 additions & 0 deletions src/linkeddata_api/vocab_viewer/nrm/internal_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from jinja2 import Template

from linkeddata_api.vocab_viewer import nrm


def _get_from_list_query(uris: list[str]) -> str:
template = Template(
"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT distinct ?uri ?internal
WHERE {
VALUES (?uri) {
(<http://example.com>)
{% for uri in uris %}
(<{{ uri }}>)
{% endfor %}
}
bind(exists{ ?uri ?p ?o } as ?internal)
}
"""
)
return template.render(uris=uris)


def get_from_list(
uris: list[str],
sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core",
) -> dict[str, str]:
query = _get_from_list_query(uris)

result = nrm.sparql.post(query, sparql_endpoint)

return_results = {}

try:
rows = result["results"]["bindings"]
for row in rows:
uri = str(row["uri"]["value"])
internal = str(row["internal"]["value"])
return_results[uri] = True if internal == "true" else False

except KeyError as err:
raise nrm.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err

return return_results
63 changes: 63 additions & 0 deletions src/linkeddata_api/vocab_viewer/nrm/label.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Union

from jinja2 import Template

from linkeddata_api.vocab_viewer import nrm


Expand Down Expand Up @@ -31,3 +33,64 @@ def get(
raise nrm.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err


def _get_from_list_query(uris: list[str]) -> str:
template = Template(
"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?uri (SAMPLE(?_label) AS ?label)
WHERE {
VALUES (?uri) {
{% for uri in uris %}
(<{{ uri }}>)
{% endfor %}
}
{
?uri skos:prefLabel ?_label .
}
UNION {
# Also try and fetch label from TERN's controlled vocabularies.
SERVICE <repository:tern_vocabs_core> {
?uri skos:prefLabel ?_label .
}
}
}
GROUP BY ?uri
"""
)
return template.render(uris=uris)


def get_from_list(
uris: list[str],
sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core",
) -> dict[str, str]:
"""Returns a dict of uri keys and label values.
In addition to the SPARQL endpoint provided, it also fetches labels
from TERN's controlled vocabularies via a federated SPARQL query.
"""
query = _get_from_list_query(uris)

result = nrm.sparql.post(query, sparql_endpoint)

labels = {}

try:
rows = result["results"]["bindings"]
for row in rows:
uri = str(row["uri"]["value"])
label = str(row["label"]["value"])
labels[uri] = label

except KeyError as err:
if result["results"]["bindings"] == [{}]:
return {}

raise nrm.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result set.\n{result}\n{err}"
) from err

return labels
68 changes: 51 additions & 17 deletions src/linkeddata_api/vocab_viewer/nrm/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def get(
uri: str,
profile: str = None,
profile: str = None, # TODO: Add presentation handling for different kinds of data
sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core",
) -> nrm.schema.Resource:
query = f"""
Expand All @@ -19,41 +19,77 @@ def get(

try:
uri = uri
label = nrm.label.get(uri, sparql_endpoint)
types = []
properties = []
# TODO: Do a lookup with sparql to check if URI is internal.

uri_values = filter(
lambda x: x["o"]["type"] == "uri", result["results"]["bindings"]
)
uri_values = [value["o"]["value"] for value in uri_values]
uri_values.append(uri)

uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint)

label = nrm.label.get(uri, sparql_endpoint) or uri

uri_internal_index = nrm.internal_resource.get_from_list(uri_values)

if not uri_internal_index.get(uri):
raise nrm.exceptions.SPARQLNotFoundError(
f"Resource with URI {uri} not found."
)

for row in result["results"]["bindings"]:
if row["p"]["value"] == str(RDF.type):
type_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get(
row["o"]["value"]
)
types.append(
nrm.schema.URI(
label="rdf:type",
label=type_label,
value=row["o"]["value"],
internal=False, # TODO
internal=uri_internal_index.get(row["o"]["value"], False),
)
)
else:
predicate_label = nrm.curie.get(row["p"]["value"])
predicate = nrm.schema.URI(
label=predicate_label,
value=row["p"]["value"],
internal=False, # TODO
internal=uri_internal_index.get(row["p"]["value"], False),
)
if row["o"]["type"] == "uri":
curie = nrm.label.get(
row["o"]["value"], sparql_endpoint
object_label = uri_label_index.get(
row["o"]["value"]
) or nrm.curie.get(row["o"]["value"])
item = nrm.schema.URI(
label=curie,
label=object_label,
value=row["o"]["value"],
internal=False, # TODO
internal=uri_internal_index.get(row["o"]["value"], False),
)
elif row["o"]["type"] == "literal":
item = nrm.schema.Literal(value=row["o"]["value"])
datatype = row["o"].get("datatype", "")
if datatype:
datatype = nrm.schema.URI(
label=datatype,
value=datatype,
internal=uri_internal_index.get(datatype, False),
)
else:
datatype = None

item = nrm.schema.Literal(
value=row["o"]["value"],
datatype=datatype,
language=row["o"].get("xml:lang", ""),
)
elif row["o"]["type"] == "bnode":
# TODO: Handle blank nodes.
pass
else:
raise ValueError(
f"Expected type to be uri or literal but got {row['o']['value']}"
) # TODO
f"Expected type to be uri or literal but got {row['o']['type']}"
)
found = False
for p in properties:
if p.predicate.value == predicate.value:
Expand All @@ -68,11 +104,9 @@ def get(
return nrm.schema.Resource(
uri=uri, label=label, types=types, properties=properties
)
except nrm.exceptions.SPARQLNotFoundError as err:
raise err
except Exception as err:
if result == {"head": {"vars": ["p", "o"]}, "results": {"bindings": []}}:
raise nrm.exceptions.SPARQLNotFoundError(
f"Resource with URI {uri} not found."
)
raise nrm.exceptions.SPARQLResultJSONError(
f"Unexpected SPARQL result.\n{result}\n{err}"
) from err
2 changes: 1 addition & 1 deletion src/linkeddata_api/vocab_viewer/nrm/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class URI(BaseModel):
class Literal(BaseModel):
type: str = "literal"
value: str
datatype: URI = ""
datatype: URI = None
language: str = ""


Expand Down

0 comments on commit 77eae8f

Please sign in to comment.