From a4dc71eff7b5832b9f8965b24705592c46dfb249 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 04:37:40 +0000 Subject: [PATCH 01/11] Remove print --- src/linkeddata_api/vocab_viewer/nrm/curie.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/curie.py b/src/linkeddata_api/vocab_viewer/nrm/curie.py index 2c316d8..bb9ceaa 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/curie.py +++ b/src/linkeddata_api/vocab_viewer/nrm/curie.py @@ -47,7 +47,6 @@ def get(uri: str): if uri in not_found: return not_found.get(uri) if uri_in_skips(uri): - print("in skip") return uri localname = uri.split("#")[-1].split("/")[-1] From 0bbe5af1c0e3bdd729eb92f3c72aa6de847a0e89 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 04:38:38 +0000 Subject: [PATCH 02/11] Add get label and get internal resource status from list of URIs --- .../vocab_viewer/nrm/__init__.py | 1 + .../vocab_viewer/nrm/internal_resource.py | 48 +++++++++++++++++++ src/linkeddata_api/vocab_viewer/nrm/label.py | 47 ++++++++++++++++++ .../vocab_viewer/nrm/resource.py | 29 +++++++---- 4 files changed, 115 insertions(+), 10 deletions(-) create mode 100644 src/linkeddata_api/vocab_viewer/nrm/internal_resource.py diff --git a/src/linkeddata_api/vocab_viewer/nrm/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/__init__.py index f8c3b07..c9d19de 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/__init__.py @@ -4,3 +4,4 @@ from . import sparql from . import label from . import curie +from . import internal_resource diff --git a/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py b/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py new file mode 100644 index 0000000..424eb18 --- /dev/null +++ b/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py @@ -0,0 +1,48 @@ +from jinja2 import Template + +from linkeddata_api.vocab_viewer import nrm + + +def _get_from_list_query(uris: list[str]) -> str: + template = Template( + """ + PREFIX skos: + SELECT distinct ?uri ?internal + WHERE { + VALUES (?uri) { + () + {% for uri in uris %} + (<{{ uri }}>) + {% endfor %} + } + + bind(exists{ ?uri ?p ?o } as ?internal) + } + """ + ) + return template.render(uris=uris) + + +def get_from_list( + uris: list[str], + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> dict[str, str]: + query = _get_from_list_query(uris) + + result = nrm.sparql.post(query, sparql_endpoint) + + return_results = {} + + try: + rows = result["results"]["bindings"] + for row in rows: + uri = str(row["uri"]["value"]) + internal = str(row["internal"]["value"]) + return_results[uri] = True if internal == "true" else False + + except KeyError as err: + raise nrm.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + return return_results diff --git a/src/linkeddata_api/vocab_viewer/nrm/label.py b/src/linkeddata_api/vocab_viewer/nrm/label.py index bae1cff..821a29a 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/label.py +++ b/src/linkeddata_api/vocab_viewer/nrm/label.py @@ -1,5 +1,7 @@ from typing import Union +from jinja2 import Template + from linkeddata_api.vocab_viewer import nrm @@ -31,3 +33,48 @@ def get( raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result set.\n{result}\n{err}" ) from err + + +def _get_from_list_query(uris: list[str]) -> str: + template = Template( + """ + PREFIX skos: + SELECT DISTINCT ?uri ?label + WHERE { + VALUES (?uri) { + {% for uri in uris %} + (<{{ uri }}>) + {% endfor %} + } + + ?uri skos:prefLabel ?label . + } + """ + ) + return template.render(uris=uris) + + +def get_from_list( + uris: list[str], + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> dict[str, str]: + """Returns a dict of uri keys and label values.""" + query = _get_from_list_query(uris) + + result = nrm.sparql.post(query, sparql_endpoint) + + labels = {} + + try: + rows = result["results"]["bindings"] + for row in rows: + uri = str(row["uri"]["value"]) + label = str(row["label"]["value"]) + labels[uri] = label + + except KeyError as err: + raise nrm.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + return labels diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index 9c065d3..b37d461 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -19,17 +19,26 @@ def get( try: uri = uri - label = nrm.label.get(uri, sparql_endpoint) types = [] properties = [] - # TODO: Do a lookup with sparql to check if URI is internal. + + uri_values = filter( + lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] + ) + uri_values = [value["o"]["value"] for value in uri_values] + uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) + + label = nrm.label.get(uri, sparql_endpoint) or uri + + uri_internal_index = nrm.internal_resource.get_from_list(uri_values) + for row in result["results"]["bindings"]: if row["p"]["value"] == str(RDF.type): types.append( nrm.schema.URI( label="rdf:type", value=row["o"]["value"], - internal=False, # TODO + internal=uri_internal_index.get(row["o"]["value"], False), ) ) else: @@ -37,23 +46,23 @@ def get( predicate = nrm.schema.URI( label=predicate_label, value=row["p"]["value"], - internal=False, # TODO + internal=uri_internal_index.get(row["p"]["value"], False), ) if row["o"]["type"] == "uri": - curie = nrm.label.get( - row["o"]["value"], sparql_endpoint - ) or nrm.curie.get(row["o"]["value"]) + curie = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + row["o"]["value"] + ) item = nrm.schema.URI( label=curie, value=row["o"]["value"], - internal=False, # TODO + internal=uri_internal_index.get(row["o"]["value"], False), ) elif row["o"]["type"] == "literal": item = nrm.schema.Literal(value=row["o"]["value"]) else: raise ValueError( f"Expected type to be uri or literal but got {row['o']['value']}" - ) # TODO + ) found = False for p in properties: if p.predicate.value == predicate.value: @@ -72,7 +81,7 @@ def get( if result == {"head": {"vars": ["p", "o"]}, "results": {"bindings": []}}: raise nrm.exceptions.SPARQLNotFoundError( f"Resource with URI {uri} not found." - ) + ) from err raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err From 4176266116410ffdea62fcbee5554520875ba51d Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 04:41:47 +0000 Subject: [PATCH 03/11] Add todo --- src/linkeddata_api/vocab_viewer/nrm/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index b37d461..24f6ed0 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -5,7 +5,7 @@ def get( uri: str, - profile: str = None, + profile: str = None, # TODO: Add presentation handling for different kinds of data sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", ) -> nrm.schema.Resource: query = f""" From 364f5e1038b7c3b8819e1c1a858e2b5937fb7bc4 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 05:40:48 +0000 Subject: [PATCH 04/11] Rename variable --- src/linkeddata_api/vocab_viewer/nrm/resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index 24f6ed0..3f9a8bb 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -49,11 +49,11 @@ def get( internal=uri_internal_index.get(row["p"]["value"], False), ) if row["o"]["type"] == "uri": - curie = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + object_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( row["o"]["value"] ) item = nrm.schema.URI( - label=curie, + label=object_label, value=row["o"]["value"], internal=uri_internal_index.get(row["o"]["value"], False), ) From 3e82efab52d83f294b25d01010ee87d32a3af9ef Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 05:48:53 +0000 Subject: [PATCH 05/11] Perform federated SPARQL query to also fetch labels from TERN's controlled vocabularies. --- src/linkeddata_api/vocab_viewer/nrm/label.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/label.py b/src/linkeddata_api/vocab_viewer/nrm/label.py index 821a29a..37c0563 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/label.py +++ b/src/linkeddata_api/vocab_viewer/nrm/label.py @@ -39,7 +39,7 @@ def _get_from_list_query(uris: list[str]) -> str: template = Template( """ PREFIX skos: - SELECT DISTINCT ?uri ?label + SELECT DISTINCT ?uri (SAMPLE(?_label) AS ?label) WHERE { VALUES (?uri) { {% for uri in uris %} @@ -47,8 +47,17 @@ def _get_from_list_query(uris: list[str]) -> str: {% endfor %} } - ?uri skos:prefLabel ?label . + { + ?uri skos:prefLabel ?_label . + } + UNION { + # Also try and fetch label from TERN's controlled vocabularies. + SERVICE { + ?uri skos:prefLabel ?_label . + } + } } + GROUP BY ?uri """ ) return template.render(uris=uris) @@ -58,7 +67,11 @@ def get_from_list( uris: list[str], sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", ) -> dict[str, str]: - """Returns a dict of uri keys and label values.""" + """Returns a dict of uri keys and label values. + + In addition to the SPARQL endpoint provided, it also fetches labels + from TERN's controlled vocabularies via a federated SPARQL query. + """ query = _get_from_list_query(uris) result = nrm.sparql.post(query, sparql_endpoint) From 33a81ac836359b55de229f4b7c36b34fcca7e0ed Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 11 Jul 2022 06:09:57 +0000 Subject: [PATCH 06/11] Use schema instead of sdo as schema.org prefix --- src/linkeddata_api/vocab_viewer/nrm/curie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/curie.py b/src/linkeddata_api/vocab_viewer/nrm/curie.py index bb9ceaa..614d90f 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/curie.py +++ b/src/linkeddata_api/vocab_viewer/nrm/curie.py @@ -8,7 +8,7 @@ "http://purl.org/dc/terms/": "dcterms", "http://www.w3.org/2004/02/skos/core#": "skos", "http://www.w3.org/2000/01/rdf-schema#": "rdfs", - "https://schema.org/": "sdo", + "https://schema.org/": "schema", "https://w3id.org/tern/ontologies/tern/": "tern", "http://www.w3.org/2002/07/owl#": "owl", "http://www.w3.org/2001/XMLSchema#": "xsd", From b59ba9de4052cdafb86c44776118d6f53cf81cc3 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 12 Jul 2022 01:24:38 +0000 Subject: [PATCH 07/11] Better handling of resource not found. Adds resource uri to list of uri values. --- src/linkeddata_api/vocab_viewer/nrm/resource.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index 3f9a8bb..efec855 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -26,12 +26,19 @@ def get( lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] ) uri_values = [value["o"]["value"] for value in uri_values] + uri_values.append(uri) + uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) label = nrm.label.get(uri, sparql_endpoint) or uri uri_internal_index = nrm.internal_resource.get_from_list(uri_values) + if not uri_internal_index.get(uri): + raise nrm.exceptions.SPARQLNotFoundError( + f"Resource with URI {uri} not found." + ) + for row in result["results"]["bindings"]: if row["p"]["value"] == str(RDF.type): types.append( @@ -49,9 +56,9 @@ def get( internal=uri_internal_index.get(row["p"]["value"], False), ) if row["o"]["type"] == "uri": - object_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + object_label = uri_label_index.get( row["o"]["value"] - ) + ) or nrm.curie.get(row["o"]["value"]) item = nrm.schema.URI( label=object_label, value=row["o"]["value"], @@ -77,11 +84,9 @@ def get( return nrm.schema.Resource( uri=uri, label=label, types=types, properties=properties ) + except nrm.exceptions.SPARQLNotFoundError as err: + raise err except Exception as err: - if result == {"head": {"vars": ["p", "o"]}, "results": {"bindings": []}}: - raise nrm.exceptions.SPARQLNotFoundError( - f"Resource with URI {uri} not found." - ) from err raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err From 7928fee9b1b41a1d45785bf77c25b0726f6eb60f Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 12 Jul 2022 01:25:05 +0000 Subject: [PATCH 08/11] Fix handling of no labels found --- src/linkeddata_api/vocab_viewer/nrm/label.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/linkeddata_api/vocab_viewer/nrm/label.py b/src/linkeddata_api/vocab_viewer/nrm/label.py index 37c0563..10ccf8d 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/label.py +++ b/src/linkeddata_api/vocab_viewer/nrm/label.py @@ -86,6 +86,9 @@ def get_from_list( labels[uri] = label except KeyError as err: + if result["results"]["bindings"] == [{}]: + return {} + raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result set.\n{result}\n{err}" ) from err From 7f9c4cc70dc62b1e4c5b59a7f120c2956efc8ebd Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 12 Jul 2022 02:40:33 +0000 Subject: [PATCH 09/11] Use alpine version 3.16 for dockerfile, which comes with python 3.10 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index cc0f9be..61a9607 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG ALPINE_VERSION=3.13 +ARG ALPINE_VERSION=3.16 ARG LINKEDDATA_API_VERSION # BUILD and install code From ed9add54ef1f8ced3d93c46703075201f7ccf57e Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 12 Jul 2022 03:30:19 +0000 Subject: [PATCH 10/11] Set literal's datatype default value as None --- src/linkeddata_api/vocab_viewer/nrm/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/schema.py b/src/linkeddata_api/vocab_viewer/nrm/schema.py index a81477d..b790648 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/schema.py +++ b/src/linkeddata_api/vocab_viewer/nrm/schema.py @@ -21,7 +21,7 @@ class URI(BaseModel): class Literal(BaseModel): type: str = "literal" value: str - datatype: URI = "" + datatype: URI = None language: str = "" From bbe2abb2daec94be2f283e1e9879642eb5fe2bf1 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 12 Jul 2022 03:30:51 +0000 Subject: [PATCH 11/11] Fix datatype logic. Fix rdf:type value's label. --- .../vocab_viewer/nrm/resource.py | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index efec855..ccefe93 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -41,9 +41,12 @@ def get( for row in result["results"]["bindings"]: if row["p"]["value"] == str(RDF.type): + type_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + row["o"]["value"] + ) types.append( nrm.schema.URI( - label="rdf:type", + label=type_label, value=row["o"]["value"], internal=uri_internal_index.get(row["o"]["value"], False), ) @@ -65,10 +68,27 @@ def get( internal=uri_internal_index.get(row["o"]["value"], False), ) elif row["o"]["type"] == "literal": - item = nrm.schema.Literal(value=row["o"]["value"]) + datatype = row["o"].get("datatype", "") + if datatype: + datatype = nrm.schema.URI( + label=datatype, + value=datatype, + internal=uri_internal_index.get(datatype, False), + ) + else: + datatype = None + + item = nrm.schema.Literal( + value=row["o"]["value"], + datatype=datatype, + language=row["o"].get("xml:lang", ""), + ) + elif row["o"]["type"] == "bnode": + # TODO: Handle blank nodes. + pass else: raise ValueError( - f"Expected type to be uri or literal but got {row['o']['value']}" + f"Expected type to be uri or literal but got {row['o']['type']}" ) found = False for p in properties: