From b2b42a9612f177f09040b0b65b7fad038f289dae Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Thu, 25 Aug 2022 00:46:27 +0000 Subject: [PATCH 01/16] Make resource logic more modular. Prep for adding incoming_properties to model --- .../vocab_viewer/nrm/__init__.py | 1 + .../nrm/{resource.py => resource/__init__.py} | 196 ++++++++---------- .../vocab_viewer/nrm/resource/exists_uri.py | 8 + .../vocab_viewer/nrm/resource/profiles.py | 51 +++++ .../nrm/resource/sort_property_objects.py | 13 ++ src/linkeddata_api/vocab_viewer/nrm/schema.py | 2 + 6 files changed, 163 insertions(+), 108 deletions(-) rename src/linkeddata_api/vocab_viewer/nrm/{resource.py => resource/__init__.py} (60%) create mode 100644 src/linkeddata_api/vocab_viewer/nrm/resource/exists_uri.py create mode 100644 src/linkeddata_api/vocab_viewer/nrm/resource/profiles.py create mode 100644 src/linkeddata_api/vocab_viewer/nrm/resource/sort_property_objects.py diff --git a/src/linkeddata_api/vocab_viewer/nrm/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/__init__.py index b220b10..4885a17 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/__init__.py @@ -6,3 +6,4 @@ from . import curie from . import internal_resource from . import namespaces +from . import schema diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py similarity index 60% rename from src/linkeddata_api/vocab_viewer/nrm/resource.py rename to src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py index 0976b41..a828cba 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py @@ -1,64 +1,14 @@ -from rdflib import RDF, RDFS, SKOS, SDO, DCTERMS +from rdflib import RDF from linkeddata_api.vocab_viewer import nrm -from linkeddata_api.vocab_viewer.nrm.namespaces import TERN +from linkeddata_api.vocab_viewer.nrm.resource.exists_uri import exists_uri +from linkeddata_api.vocab_viewer.nrm.resource.profiles import method_profile +from linkeddata_api.vocab_viewer.nrm.resource.sort_property_objects import ( + sort_property_objects, +) -def _exists_uri(target_uri: str, uris: list[nrm.schema.URI]) -> bool: - for uri in uris: - if uri.value == target_uri: - return True - return False - - -def _add_and_remove_property( - predicate_uri: str, - old_list: list[nrm.schema.PredicateObjects], - new_list: list[nrm.schema.PredicateObjects], -) -> None: - """Add and remove the PredicateObjects object if matched by predicate_uri in - the referenced lists, 'old_list' and 'new_list' - - Returns a copy of the PredicateObjects object. - """ - predicate_object = None - for property_ in old_list: - if property_.predicate.value == predicate_uri: - new_list.append(property_) - predicate_object = property_ - old_list.remove(property_) - return predicate_object - - -def _method_profile( - properties: list[nrm.schema.PredicateObjects], -) -> list[nrm.schema.PredicateObjects]: - new_properties = [] - - _add_and_remove_property(str(RDFS.isDefinedBy), properties, new_properties) - - # Omit skos:prefLabel - _add_and_remove_property(str(SKOS.prefLabel), properties, new_properties) - new_properties.pop() - - _add_and_remove_property(str(TERN), properties, new_properties) - _add_and_remove_property(str(SDO.url), properties, new_properties) - _add_and_remove_property(str(SKOS.memberList), properties, new_properties) - _add_and_remove_property(str(TERN.scope), properties, new_properties) - _add_and_remove_property(str(SKOS.definition), properties, new_properties) - _add_and_remove_property(str(TERN.purpose), properties, new_properties) - # TODO: Change to different property due to issue with RVA - _add_and_remove_property(str(DCTERMS.description), properties, new_properties) - _add_and_remove_property(str(TERN.equipment), properties, new_properties) - _add_and_remove_property(str(TERN.instructions), properties, new_properties) - _add_and_remove_property(str(SKOS.note), properties, new_properties) - _add_and_remove_property(str(DCTERMS.source), properties, new_properties) - _add_and_remove_property(str(TERN.appendix), properties, new_properties) - - return new_properties + properties - - -def _get_rdf_list_item_uris(uri: str, rows: list, sparql_endpoint: str) -> list[str]: +def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[str]: new_uris = [] for row in rows: if row["o"]["type"] == "bnode" and row["listItem"]["value"] == "true": @@ -85,6 +35,70 @@ def _get_rdf_list_item_uris(uri: str, rows: list, sparql_endpoint: str) -> list[ return new_uris +def _get_uri_values_and_list_items(result, uri, sparql_endpoint): + uri_values = filter( + lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] + ) + + uri_values = [value["o"]["value"] for value in uri_values] + uri_values.append(uri) + + # Replace value of blank node list head with items. + list_items = _get_uris_from_rdf_list( + uri, result["results"]["bindings"], sparql_endpoint + ) + + for row in list_items: + uri_values.append(row["o"]["value"]) + + return uri_values, list_items + + +def _add_rows_for_rdf_list_items(result, uri, sparql_endpoint): + """Add rdf:List items as new rows to the SPARQL result object + + :param result: The SPARQL result dict object + :param uri: URI of the resource + :param sparql_endpoint: SPARQL endpoint to fetch the list items from + :return: An updated SPARQL result dict object + """ + _, list_items = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + + # Add additional rows to the `result` representing the RDF List items. + for i, list_item in enumerate(list_items): + list_item.update( + { + "listItem": { + "datatype": "http://www.w3.org/2001/XMLSchema#boolean", + "type": "literal", + "value": "true", + }, + "listItemNumber": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": str(i), + }, + } + ) + result["results"]["bindings"].append(list_item) + + return result + + +def _get_uri_label_index(result, uri, sparql_endpoint): + uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) + return uri_label_index + + +def _get_uri_internal_index(result, uri, sparql_endpoint): + uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + uri_internal_index = nrm.internal_resource.get_from_list( + uri_values, sparql_endpoint + ) + return uri_internal_index + + def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: query = f""" SELECT ?p ?o ?listItem ?listItemNumber @@ -100,47 +114,18 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: result = nrm.sparql.post(query, sparql_endpoint) try: - uri = uri - types = [] - properties = [] - - uri_values = filter( - lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] - ) - - uri_values = [value["o"]["value"] for value in uri_values] - uri_values.append(uri) + types: list[nrm.schema.URI] = [] + properties: list[nrm.schema.PredicateObjects] = [] - # Replace value of blank node list head with items. - list_items = _get_rdf_list_item_uris( - uri, result["results"]["bindings"], sparql_endpoint - ) + result = _add_rows_for_rdf_list_items(result, uri, sparql_endpoint) - for row in list_items: - uri_values.append(row["o"]["value"]) - - for i, list_item in enumerate(list_items): - list_item.update( - { - "listItem": { - "datatype": "http://www.w3.org/2001/XMLSchema#boolean", - "type": "literal", - "value": "true", - }, - "listItemNumber": { - "datatype": "http://www.w3.org/2001/XMLSchema#integer", - "type": "literal", - "value": str(i), - }, - } - ) - result["results"]["bindings"].append(list_item) - - uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) + # An index of URIs with label values. + uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) label = nrm.label.get(uri, sparql_endpoint) or uri - uri_internal_index = nrm.internal_resource.get_from_list(uri_values, sparql_endpoint) + # An index of all the URIs linked to and from this resource that are available internally. + uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) if not uri_internal_index.get(uri): raise nrm.exceptions.SPARQLNotFoundError( @@ -241,15 +226,20 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: property_.objects.sort(key=sort_property_objects) profile = "" - if _exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): + if exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): profile = "https://w3id.org/tern/ontologies/tern/MethodCollection" - properties = _method_profile(properties) - elif _exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): + properties = method_profile(properties) + elif exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): profile = "https://w3id.org/tern/ontologies/tern/Method" - properties = _method_profile(properties) + properties = method_profile(properties) return nrm.schema.Resource( - uri=uri, profile=profile, label=label, types=types, properties=properties + uri=uri, + profile=profile, + label=label, + types=types, + properties=properties, + incoming_properties=[], ) except nrm.exceptions.SPARQLNotFoundError as err: raise err @@ -257,13 +247,3 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err - - -def sort_property_objects(x): - if x.list_item: - return x.list_item_number - else: - if x.type == "uri": - return x.label - else: - return x.value diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/exists_uri.py b/src/linkeddata_api/vocab_viewer/nrm/resource/exists_uri.py new file mode 100644 index 0000000..eae0d57 --- /dev/null +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/exists_uri.py @@ -0,0 +1,8 @@ +from linkeddata_api.vocab_viewer import nrm + + +def exists_uri(target_uri: str, uris: list[nrm.schema.URI]) -> bool: + for uri in uris: + if uri.value == target_uri: + return True + return False diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/profiles.py b/src/linkeddata_api/vocab_viewer/nrm/resource/profiles.py new file mode 100644 index 0000000..b5e72d2 --- /dev/null +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/profiles.py @@ -0,0 +1,51 @@ +from rdflib import RDFS, SKOS, SDO, DCTERMS + +from linkeddata_api.vocab_viewer import nrm +from linkeddata_api.vocab_viewer.nrm.namespaces import TERN + + +def _add_and_remove_property( + predicate_uri: str, + old_list: list[nrm.schema.PredicateObjects], + new_list: list[nrm.schema.PredicateObjects], +) -> None: + """Add and remove the PredicateObjects object if matched by predicate_uri in + the referenced lists, 'old_list' and 'new_list' + + Returns a copy of the PredicateObjects object. + """ + predicate_object = None + for property_ in old_list: + if property_.predicate.value == predicate_uri: + new_list.append(property_) + predicate_object = property_ + old_list.remove(property_) + return predicate_object + + +def method_profile( + properties: list[nrm.schema.PredicateObjects], +) -> list[nrm.schema.PredicateObjects]: + new_properties = [] + + _add_and_remove_property(str(RDFS.isDefinedBy), properties, new_properties) + + # Omit skos:prefLabel + _add_and_remove_property(str(SKOS.prefLabel), properties, new_properties) + new_properties.pop() + + _add_and_remove_property(str(TERN), properties, new_properties) + _add_and_remove_property(str(SDO.url), properties, new_properties) + _add_and_remove_property(str(SKOS.memberList), properties, new_properties) + _add_and_remove_property(str(TERN.scope), properties, new_properties) + _add_and_remove_property(str(SKOS.definition), properties, new_properties) + _add_and_remove_property(str(TERN.purpose), properties, new_properties) + # TODO: Change to different property due to issue with RVA + _add_and_remove_property(str(DCTERMS.description), properties, new_properties) + _add_and_remove_property(str(TERN.equipment), properties, new_properties) + _add_and_remove_property(str(TERN.instructions), properties, new_properties) + _add_and_remove_property(str(SKOS.note), properties, new_properties) + _add_and_remove_property(str(DCTERMS.source), properties, new_properties) + _add_and_remove_property(str(TERN.appendix), properties, new_properties) + + return new_properties + properties diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/sort_property_objects.py b/src/linkeddata_api/vocab_viewer/nrm/resource/sort_property_objects.py new file mode 100644 index 0000000..c010858 --- /dev/null +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/sort_property_objects.py @@ -0,0 +1,13 @@ +from typing import Union + +from linkeddata_api.vocab_viewer import nrm + + +def sort_property_objects(item: list[Union[nrm.schema.URI, nrm.schema.Literal]]): + if item.list_item: + return item.list_item_number + else: + if item.type == "uri": + return item.label + else: + return item.value diff --git a/src/linkeddata_api/vocab_viewer/nrm/schema.py b/src/linkeddata_api/vocab_viewer/nrm/schema.py index b9fc700..9bf43d4 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/schema.py +++ b/src/linkeddata_api/vocab_viewer/nrm/schema.py @@ -12,6 +12,7 @@ class Item(BaseModel): class RDFListItemMixin(BaseModel): + """An item in an RDF List""" list_item: bool = False list_item_number: int | None = None @@ -48,3 +49,4 @@ class Resource(BaseModel): label: str types: list[URI] properties: list[PredicateObjects] + incoming_properties: list[PredicateObjects] From f3d0d29c9421ae5ab5556740d956a8f02d3b5dd3 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Thu, 25 Aug 2022 01:58:33 +0000 Subject: [PATCH 02/16] Resource incoming_properties working --- .../vocab_viewer/nrm/resource/__init__.py | 289 +++++++++++------- src/linkeddata_api/vocab_viewer/nrm/schema.py | 8 +- 2 files changed, 184 insertions(+), 113 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py index a828cba..40bd164 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py @@ -35,7 +35,9 @@ def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[ return new_uris -def _get_uri_values_and_list_items(result, uri, sparql_endpoint): +def _get_uri_values_and_list_items( + result: dict, uri: str, sparql_endpoint: str +) -> tuple[list[str], list[str]]: uri_values = filter( lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] ) @@ -54,7 +56,7 @@ def _get_uri_values_and_list_items(result, uri, sparql_endpoint): return uri_values, list_items -def _add_rows_for_rdf_list_items(result, uri, sparql_endpoint): +def _add_rows_for_rdf_list_items(result: dict, uri: str, sparql_endpoint: str) -> dict: """Add rdf:List items as new rows to the SPARQL result object :param result: The SPARQL result dict object @@ -85,13 +87,17 @@ def _add_rows_for_rdf_list_items(result, uri, sparql_endpoint): return result -def _get_uri_label_index(result, uri, sparql_endpoint): +def _get_uri_label_index( + result: dict, uri: str, sparql_endpoint: str +) -> dict[str, str]: uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) return uri_label_index -def _get_uri_internal_index(result, uri, sparql_endpoint): +def _get_uri_internal_index( + result: dict, uri: str, sparql_endpoint: str +) -> dict[str, str]: uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) uri_internal_index = nrm.internal_resource.get_from_list( uri_values, sparql_endpoint @@ -114,116 +120,10 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: result = nrm.sparql.post(query, sparql_endpoint) try: - types: list[nrm.schema.URI] = [] - properties: list[nrm.schema.PredicateObjects] = [] result = _add_rows_for_rdf_list_items(result, uri, sparql_endpoint) - - # An index of URIs with label values. - uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) - label = nrm.label.get(uri, sparql_endpoint) or uri - - # An index of all the URIs linked to and from this resource that are available internally. - uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) - - if not uri_internal_index.get(uri): - raise nrm.exceptions.SPARQLNotFoundError( - f"Resource with URI {uri} not found." - ) - - for row in result["results"]["bindings"]: - if row["p"]["value"] == str(RDF.type): - type_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( - row["o"]["value"] - ) - types.append( - nrm.schema.URI( - label=type_label, - value=row["o"]["value"], - internal=uri_internal_index.get(row["o"]["value"], False), - ) - ) - else: - predicate_label = nrm.curie.get(row["p"]["value"]) - predicate = nrm.schema.URI( - label=predicate_label, - value=row["p"]["value"], - internal=uri_internal_index.get(row["p"]["value"], False), - list_item=True if row["listItem"]["value"] == "true" else False, - list_item_number=int(row["listItemNumber"]["value"]) - if row["listItem"]["value"] == "true" - else None, - ) - if row["o"]["type"] == "uri": - object_label = uri_label_index.get( - row["o"]["value"] - ) or nrm.curie.get(row["o"]["value"]) - item = nrm.schema.URI( - label=object_label, - value=row["o"]["value"], - internal=uri_internal_index.get(row["o"]["value"], False), - list_item=True if row["listItem"]["value"] == "true" else False, - list_item_number=row["listItemNumber"]["value"] - if row["listItem"]["value"] == "true" - else None, - ) - elif row["o"]["type"] == "literal": - datatype = row["o"].get("datatype", "") - if datatype: - datatype = nrm.schema.URI( - label=datatype, - value=datatype, - internal=uri_internal_index.get(datatype, False), - list_item=True - if row["listItem"]["value"] == "true" - else False, - list_item_number=row["listItemNumber"]["value"] - if row["listItem"]["value"] == "true" - else None, - ) - else: - datatype = None - - item = nrm.schema.Literal( - value=row["o"]["value"], - datatype=datatype, - language=row["o"].get("xml:lang", ""), - list_item=True if row["listItem"]["value"] == "true" else False, - list_item_number=row["listItemNumber"]["value"] - if row["listItem"]["value"] == "true" - else None, - ) - elif row["o"]["type"] == "bnode": - # TODO: Handle blank nodes. - pass - else: - raise ValueError( - f"Expected type to be uri or literal but got {row['o']['type']}" - ) - found = False - for p in properties: - if p.predicate.value == predicate.value: - found = True - p.objects.append(item) - - if not found: - properties.append( - nrm.schema.PredicateObjects(predicate=predicate, objects=[item]) - ) - - # Duplicates may occur due to processing RDF lists. - # Remove duplicates, if any. - for property_ in properties: - if property_.predicate.list_item: - for obj in property_.objects: - if not obj.list_item: - property_.objects.remove(obj) - - # Sort all property objects by label. - properties.sort(key=lambda x: x.predicate.label) - for property_ in properties: - property_.objects.sort(key=sort_property_objects) + types, properties = _get_types_and_properties(result, uri, sparql_endpoint) profile = "" if exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): @@ -233,13 +133,15 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: profile = "https://w3id.org/tern/ontologies/tern/Method" properties = method_profile(properties) + incoming_properties = _get_incoming_properties(uri, sparql_endpoint) + return nrm.schema.Resource( uri=uri, profile=profile, label=label, types=types, properties=properties, - incoming_properties=[], + incoming_properties=incoming_properties, ) except nrm.exceptions.SPARQLNotFoundError as err: raise err @@ -247,3 +149,166 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err + + +def _get_incoming_properties(uri: str, sparql_endpoint: str): + query = f""" + SELECT ?p ?o ?listItem ?listItemNumber + WHERE {{ + ?o ?p <{uri}> . + + # This is not required for `incoming_properties` + # but we need to set the values for compatibility with `properties`. + BIND(EXISTS{{?o rdf:rest ?rest}} as ?listItem) + BIND(0 AS ?listItemNumber) + }} + """ + + result = nrm.sparql.post( + query, + sparql_endpoint, + ) + + uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) + uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) + + incoming_properties = [] + + for row in result["results"]["bindings"]: + subject_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + row["o"]["value"] + ) + item = nrm.schema.URI( + label=subject_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + predicate_label = nrm.curie.get(row["p"]["value"]) + predicate = nrm.schema.URI( + label=predicate_label, + value=row["p"]["value"], + internal=uri_internal_index.get(row["p"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=int(row["listItemNumber"]["value"]) + if row["listItem"]["value"] == "true" + else None, + ) + incoming_properties.append( + nrm.schema.SubjectPredicates(subject=item, predicate=predicate) + ) + + return incoming_properties + + +def _get_types_and_properties( + result: dict, uri: str, sparql_endpoint: str +) -> tuple[list[nrm.schema.URI], list[nrm.schema.PredicateObjects]]: + + types: list[nrm.schema.URI] = [] + properties: list[nrm.schema.PredicateObjects] = [] + + # An index of URIs with label values. + uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) + + # An index of all the URIs linked to and from this resource that are available internally. + uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) + + if not uri_internal_index.get(uri): + raise nrm.exceptions.SPARQLNotFoundError(f"Resource with URI {uri} not found.") + + for row in result["results"]["bindings"]: + if row["p"]["value"] == str(RDF.type): + type_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + row["o"]["value"] + ) + types.append( + nrm.schema.URI( + label=type_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + ) + ) + else: + predicate_label = nrm.curie.get(row["p"]["value"]) + predicate = nrm.schema.URI( + label=predicate_label, + value=row["p"]["value"], + internal=uri_internal_index.get(row["p"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=int(row["listItemNumber"]["value"]) + if row["listItem"]["value"] == "true" + else None, + ) + if row["o"]["type"] == "uri": + object_label = uri_label_index.get(row["o"]["value"]) or nrm.curie.get( + row["o"]["value"] + ) + item = nrm.schema.URI( + label=object_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + elif row["o"]["type"] == "literal": + datatype = row["o"].get("datatype", "") + if datatype: + datatype = nrm.schema.URI( + label=datatype, + value=datatype, + internal=uri_internal_index.get(datatype, False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + else: + datatype = None + + item = nrm.schema.Literal( + value=row["o"]["value"], + datatype=datatype, + language=row["o"].get("xml:lang", ""), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + elif row["o"]["type"] == "bnode": + # TODO: Handle blank nodes. + pass + else: + raise ValueError( + f"Expected type to be uri or literal but got {row['o']['type']}" + ) + found = False + for p in properties: + if p.predicate.value == predicate.value: + found = True + p.objects.append(item) + + if not found: + properties.append( + nrm.schema.PredicateObjects(predicate=predicate, objects=[item]) + ) + + # Duplicates may occur due to processing RDF lists. + # Remove duplicates, if any. + for property_ in properties: + if property_.predicate.list_item: + for obj in property_.objects: + if not obj.list_item: + property_.objects.remove(obj) + + # Sort all property objects by label. + properties.sort(key=lambda x: x.predicate.label) + for property_ in properties: + property_.objects.sort(key=sort_property_objects) + + return types, properties diff --git a/src/linkeddata_api/vocab_viewer/nrm/schema.py b/src/linkeddata_api/vocab_viewer/nrm/schema.py index 9bf43d4..aef75b2 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/schema.py +++ b/src/linkeddata_api/vocab_viewer/nrm/schema.py @@ -13,6 +13,7 @@ class Item(BaseModel): class RDFListItemMixin(BaseModel): """An item in an RDF List""" + list_item: bool = False list_item_number: int | None = None @@ -38,6 +39,11 @@ def __hash__(self): return hash(self.value + datatype + self.language) +class SubjectPredicates(BaseModel): + subject: URI + predicate: URI + + class PredicateObjects(BaseModel): predicate: URI objects: list[Union[URI, Literal]] @@ -49,4 +55,4 @@ class Resource(BaseModel): label: str types: list[URI] properties: list[PredicateObjects] - incoming_properties: list[PredicateObjects] + incoming_properties: list[SubjectPredicates] From bb96a45277bbf0ca16f5f4b7503c0a4e20b18005 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Thu, 25 Aug 2022 04:09:11 +0000 Subject: [PATCH 03/16] SubjectPredicate has a list of subjects for the same predicate --- .../vocab_viewer/nrm/resource/__init__.py | 15 ++++++++++++--- src/linkeddata_api/vocab_viewer/nrm/schema.py | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py index 40bd164..cc9103a 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py @@ -197,9 +197,17 @@ def _get_incoming_properties(uri: str, sparql_endpoint: str): if row["listItem"]["value"] == "true" else None, ) - incoming_properties.append( - nrm.schema.SubjectPredicates(subject=item, predicate=predicate) - ) + + found = False + for p in incoming_properties: + if p.predicate.value == predicate.value: + found = True + p.subjects.append(item) + + if not found: + incoming_properties.append( + nrm.schema.SubjectPredicates(predicate=predicate, subjects=[item]) + ) return incoming_properties @@ -287,6 +295,7 @@ def _get_types_and_properties( raise ValueError( f"Expected type to be uri or literal but got {row['o']['type']}" ) + found = False for p in properties: if p.predicate.value == predicate.value: diff --git a/src/linkeddata_api/vocab_viewer/nrm/schema.py b/src/linkeddata_api/vocab_viewer/nrm/schema.py index aef75b2..19fd668 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/schema.py +++ b/src/linkeddata_api/vocab_viewer/nrm/schema.py @@ -40,8 +40,8 @@ def __hash__(self): class SubjectPredicates(BaseModel): - subject: URI predicate: URI + subjects: list[URI] class PredicateObjects(BaseModel): From 720b595f499755c8bbacbca15b2142388ec89ee6 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 29 Aug 2022 03:23:19 +0000 Subject: [PATCH 04/16] Remove unused import requests --- src/linkeddata_api/vocab_viewer/nrm/vocabs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/linkeddata_api/vocab_viewer/nrm/vocabs.py b/src/linkeddata_api/vocab_viewer/nrm/vocabs.py index 5764798..e4b13b3 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/vocabs.py +++ b/src/linkeddata_api/vocab_viewer/nrm/vocabs.py @@ -1,7 +1,5 @@ from typing import Optional -import requests - from linkeddata_api.vocab_viewer import nrm from . import schema From 9c3f6c9ea23457b9ddfcd5a535a381d580be15c5 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 29 Aug 2022 03:36:59 +0000 Subject: [PATCH 05/16] Refactor /resource to /ld_viewer/resource and use SPARQL endpoint instead of RDF4J repository --- src/linkeddata_api/views/api_v1/__init__.py | 2 +- .../views/api_v1/ld_viewer/__init__.py | 1 + .../{resource/__init__.py => ld_viewer/resource.py} | 9 +++------ src/linkeddata_api/views/api_v1/openapi.yaml | 12 ++++++------ 4 files changed, 11 insertions(+), 13 deletions(-) create mode 100644 src/linkeddata_api/views/api_v1/ld_viewer/__init__.py rename src/linkeddata_api/views/api_v1/{resource/__init__.py => ld_viewer/resource.py} (88%) diff --git a/src/linkeddata_api/views/api_v1/__init__.py b/src/linkeddata_api/views/api_v1/__init__.py index 38be24e..21bd89a 100644 --- a/src/linkeddata_api/views/api_v1/__init__.py +++ b/src/linkeddata_api/views/api_v1/__init__.py @@ -5,4 +5,4 @@ from . import vocab_viewer from . import version_info from . import rdf_tools -from . import resource +from . import ld_viewer diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py b/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py new file mode 100644 index 0000000..364a06e --- /dev/null +++ b/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py @@ -0,0 +1 @@ +from . import resource diff --git a/src/linkeddata_api/views/api_v1/resource/__init__.py b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py similarity index 88% rename from src/linkeddata_api/views/api_v1/resource/__init__.py rename to src/linkeddata_api/views/api_v1/ld_viewer/resource.py index 1005f85..f934d61 100644 --- a/src/linkeddata_api/views/api_v1/resource/__init__.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py @@ -7,14 +7,11 @@ from linkeddata_api.views.api_v1.blueprint import bp from linkeddata_api import rdf -# TODO: Move this somewhere else. -GRAPHDB_URL = "https://graphdb.tern.org.au/repositories/" - -@bp.get("/resource") +@bp.get("/ld_viewer/resource") @openapi.validate(validate_request=False, validate_response=False) def get_resource(): - repository_id = request.args.get("repository_id") + sparql_endpoint = request.args.get("sparql_endpoint") uri = request.args.get("uri") format_ = request.headers.get("accept") # TODO: Support 'format' query arg? It would make it easier to configure persistent redirect services. @@ -27,7 +24,7 @@ def get_resource(): ) response = requests.get( - GRAPHDB_URL + repository_id, + sparql_endpoint, headers={"accept": format_}, params={"query": f"DESCRIBE <{uri}>"}, ) diff --git a/src/linkeddata_api/views/api_v1/openapi.yaml b/src/linkeddata_api/views/api_v1/openapi.yaml index 5dfb9a4..32fda45 100644 --- a/src/linkeddata_api/views/api_v1/openapi.yaml +++ b/src/linkeddata_api/views/api_v1/openapi.yaml @@ -108,23 +108,23 @@ paths: schema: type: string - /resource: + /ld_viewer/resource: get: tags: - - General + - Linked Data viewer summary: Get RDF resource description: Get an RDF resource by its URI in an RDF4J repository. parameters: - in: query - name: repository_id + name: sparql_endpoint schema: type: string required: true - description: The RDF4J repository. + description: The SPARQL endpoint for querying\. examples: nrm_vocabs: - summary: NRM vocabs repository - value: dawe_vocabs_core + summary: NRM vocabs SPARQL endpoint + value: https://graphdb.tern.org.au/repositories/dawe_vocabs_core - in: query name: uri schema: From 0798d61c4eb7fe18200b00332e7de213429a9838 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Mon, 29 Aug 2022 04:41:50 +0000 Subject: [PATCH 06/16] Add data layer for resource --- src/linkeddata_api/data/__init__.py | 2 + src/linkeddata_api/data/exceptions.py | 22 +++++++ src/linkeddata_api/data/sparql.py | 61 +++++++++++++++++++ .../vocab_viewer/nrm/exceptions.py | 3 +- 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/linkeddata_api/data/__init__.py create mode 100644 src/linkeddata_api/data/exceptions.py create mode 100644 src/linkeddata_api/data/sparql.py diff --git a/src/linkeddata_api/data/__init__.py b/src/linkeddata_api/data/__init__.py new file mode 100644 index 0000000..7efc0b8 --- /dev/null +++ b/src/linkeddata_api/data/__init__.py @@ -0,0 +1,2 @@ +from . import exceptions +from . import sparql diff --git a/src/linkeddata_api/data/exceptions.py b/src/linkeddata_api/data/exceptions.py new file mode 100644 index 0000000..56d6c70 --- /dev/null +++ b/src/linkeddata_api/data/exceptions.py @@ -0,0 +1,22 @@ +class RequestError(Exception): + """Request Exception""" + + def __init__(self, description: str) -> None: + super().__init__(description) + self.description = description + + +class SPARQLResultJSONError(Exception): + """SPARQL Result JSON Error""" + + def __init__(self, description: str) -> None: + super().__init__(description) + self.description = description + + +class SPARQLNotFoundError(Exception): + """SPARQL Not Found Error""" + + def __init__(self, description: str) -> None: + super().__init__(description) + self.description = description diff --git a/src/linkeddata_api/data/sparql.py b/src/linkeddata_api/data/sparql.py new file mode 100644 index 0000000..dce99e2 --- /dev/null +++ b/src/linkeddata_api/data/sparql.py @@ -0,0 +1,61 @@ +import requests + +from . import exceptions + + +def post( + query: str, sparql_endpoint: str, accept: str = "application/sparql-results+json" +) -> requests.Response: + """Make a SPARQL POST request + + If the response is JSON, use `response.json()` to get the Python dict. + + :param query: SPARQL query + :param sparql_endpoint: SPARQL endpoint to query + :param accept: The mimetype of the response value + :raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range. + """ + headers = { + "accept": accept, + "content-type": "application/sparql-query", + } + + response = requests.post(url=sparql_endpoint, headers=headers, data=query) + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as err: + raise exceptions.RequestError(err.response.text) from err + + # TODO: raise empty response error here. + + return response + + +def get( + query: str, sparql_endpoint: str, accept: str = "application/sparql-results+json" +) -> requests.Response: + """Make a SPARQL GET request + + If the response is JSON, use `response.json()` to get the Python dict. + + :param query: SPARQL query + :param sparql_endpoint: SPARQL endpoint to query + :param accept: The mimetype of the response value + :raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range. + """ + headers = { + "accept": accept, + } + params = {"query": query} + + response = requests.get(url=sparql_endpoint, headers=headers, params=params) + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as err: + raise exceptions.RequestError(err.response.text) from err + + # TODO: raise empty response error here. + + return response diff --git a/src/linkeddata_api/vocab_viewer/nrm/exceptions.py b/src/linkeddata_api/vocab_viewer/nrm/exceptions.py index 6628db2..56d6c70 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/exceptions.py +++ b/src/linkeddata_api/vocab_viewer/nrm/exceptions.py @@ -16,6 +16,7 @@ def __init__(self, description: str) -> None: class SPARQLNotFoundError(Exception): """SPARQL Not Found Error""" + def __init__(self, description: str) -> None: super().__init__(description) - self.description = description \ No newline at end of file + self.description = description From 8c74a87ab3221e88d180809c86b93f80bfce10fe Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 01:59:47 +0000 Subject: [PATCH 07/16] Refactor code by moving business logic from /vocab_viewer/nrm/resource to /ld_viewer/resource --- src/linkeddata_api/data/sparql.py | 2 + src/linkeddata_api/domain/__init__.py | 7 + src/linkeddata_api/domain/curie.py | 68 ++++ .../domain/internal_resource.py | 47 +++ src/linkeddata_api/domain/label.py | 98 ++++++ .../domain/ld_viewer/__init__.py | 1 + .../domain/ld_viewer/resource/__init__.py | 64 ++++ .../ld_viewer/resource/json/__init__.py | 327 ++++++++++++++++++ .../ld_viewer/resource/json/exists_uri.py | 8 + .../ld_viewer/resource/json/profiles.py | 51 +++ .../resource/json/sort_property_objects.py | 13 + src/linkeddata_api/domain/namespaces.py | 3 + src/linkeddata_api/domain/rdf.py | 8 + src/linkeddata_api/domain/schema.py | 58 ++++ .../views/api_v1/ld_viewer/resource.py | 48 ++- src/linkeddata_api/views/api_v1/openapi.yaml | 24 +- .../vocab_viewer/nrm/resource/__init__.py | 3 +- 17 files changed, 800 insertions(+), 30 deletions(-) create mode 100644 src/linkeddata_api/domain/__init__.py create mode 100644 src/linkeddata_api/domain/curie.py create mode 100644 src/linkeddata_api/domain/internal_resource.py create mode 100644 src/linkeddata_api/domain/label.py create mode 100644 src/linkeddata_api/domain/ld_viewer/__init__.py create mode 100644 src/linkeddata_api/domain/ld_viewer/resource/__init__.py create mode 100644 src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py create mode 100644 src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py create mode 100644 src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py create mode 100644 src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py create mode 100644 src/linkeddata_api/domain/namespaces.py create mode 100644 src/linkeddata_api/domain/rdf.py create mode 100644 src/linkeddata_api/domain/schema.py diff --git a/src/linkeddata_api/data/sparql.py b/src/linkeddata_api/data/sparql.py index dce99e2..825e926 100644 --- a/src/linkeddata_api/data/sparql.py +++ b/src/linkeddata_api/data/sparql.py @@ -13,6 +13,7 @@ def post( :param query: SPARQL query :param sparql_endpoint: SPARQL endpoint to query :param accept: The mimetype of the response value + :return: Response object :raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range. """ headers = { @@ -42,6 +43,7 @@ def get( :param query: SPARQL query :param sparql_endpoint: SPARQL endpoint to query :param accept: The mimetype of the response value + :return: Response object :raises exceptions.RequestError: An error occurred and the response status code is not in the 200 range. """ headers = { diff --git a/src/linkeddata_api/domain/__init__.py b/src/linkeddata_api/domain/__init__.py new file mode 100644 index 0000000..e148dd9 --- /dev/null +++ b/src/linkeddata_api/domain/__init__.py @@ -0,0 +1,7 @@ +from . import schema +from . import ld_viewer +from . import rdf +from . import namespaces +from . import label +from . import internal_resource +from . import curie diff --git a/src/linkeddata_api/domain/curie.py b/src/linkeddata_api/domain/curie.py new file mode 100644 index 0000000..614d90f --- /dev/null +++ b/src/linkeddata_api/domain/curie.py @@ -0,0 +1,68 @@ +import requests + +# URIs that don't have curies in external service. +not_found = {} + +# Predefined prefixes. New prefixes get added at runtime. +prefixes = { + "http://purl.org/dc/terms/": "dcterms", + "http://www.w3.org/2004/02/skos/core#": "skos", + "http://www.w3.org/2000/01/rdf-schema#": "rdfs", + "https://schema.org/": "schema", + "https://w3id.org/tern/ontologies/tern/": "tern", + "http://www.w3.org/2002/07/owl#": "owl", + "http://www.w3.org/2001/XMLSchema#": "xsd", +} + +# Don't find curies for these. +skips = [ + "https://linked.data.gov.au/def/nrm", + "https://linked.data.gov.au/def/test/dawe-cv", +] + + +def uri_in_skips(uri: str) -> bool: + for skip in skips: + if uri.startswith(skip): + return True + return False + + +def get(uri: str): + """Get curie + + 1. Check if it exists in prefixes. + 2. Check if it exists in cache. + 3. Make an expensive request to an external service. Cache the result. + + If all steps fail to find a curie, return the uri as-is. + """ + + for key, val in prefixes.items(): + if uri.startswith(key): + localname = uri.split("#")[-1].split("/")[-1] + curie = f"{val}:{localname}" + return curie + + if uri in not_found: + return not_found.get(uri) + if uri_in_skips(uri): + return uri + + localname = uri.split("#")[-1].split("/")[-1] + r_index = uri.rfind(localname) + base_uri = uri[:r_index] + + response = requests.post( + "https://prefix.zazuko.com/api/v1/shrink", params={"q": base_uri} + ) + + try: + response.raise_for_status() + except requests.exceptions.HTTPError: + not_found[uri] = uri + return uri + + prefix = response.json()["value"][:-1] + prefixes[base_uri] = prefix + return f"{prefix}:{localname}" diff --git a/src/linkeddata_api/domain/internal_resource.py b/src/linkeddata_api/domain/internal_resource.py new file mode 100644 index 0000000..1f2ff77 --- /dev/null +++ b/src/linkeddata_api/domain/internal_resource.py @@ -0,0 +1,47 @@ +from jinja2 import Template + +from linkeddata_api import data + + +def _get_from_list_query(uris: list[str]) -> str: + template = Template( + """ + PREFIX skos: + SELECT distinct ?uri ?internal + WHERE { + VALUES (?uri) { + {% for uri in uris %} + (<{{ uri }}>) + {% endfor %} + } + + bind(exists{ ?uri ?p ?o } as ?internal) + } + """ + ) + return template.render(uris=uris) + + +def get_from_list( + uris: list[str], + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> dict[str, str]: + query = _get_from_list_query(uris) + + result = data.sparql.post(query, sparql_endpoint).json() + + return_results = {} + + try: + rows = result["results"]["bindings"] + for row in rows: + uri = str(row["uri"]["value"]) + internal = str(row["internal"]["value"]) + return_results[uri] = True if internal == "true" else False + + except KeyError as err: + raise data.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + return return_results diff --git a/src/linkeddata_api/domain/label.py b/src/linkeddata_api/domain/label.py new file mode 100644 index 0000000..cbac23b --- /dev/null +++ b/src/linkeddata_api/domain/label.py @@ -0,0 +1,98 @@ +from typing import Union + +from jinja2 import Template + +from linkeddata_api import data + + +def get( + uri: str, + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> Union[str, None]: + """ + Returns a label or None if no label found. + """ + query = f""" + PREFIX skos: + SELECT DISTINCT ?label + WHERE {{ + VALUES (?labelProperty) {{ + (skos:prefLabel) + }} + <{uri}> ?labelProperty ?label . + }} + """ + + result = data.sparql.post(query, sparql_endpoint).json() + + try: + rows = result["results"]["bindings"] + for row in rows: + return row["label"]["value"] + except KeyError as err: + raise data.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + +def _get_from_list_query(uris: list[str]) -> str: + # TODO: Currently, we try and fetch from TERN's controlled vocabularies. + # We may want to also fetch with a SERVICE query from other repositories in the future. + template = Template( + """ + PREFIX skos: + SELECT DISTINCT ?uri (SAMPLE(?_label) AS ?label) + WHERE { + VALUES (?uri) { + {% for uri in uris %} + (<{{ uri }}>) + {% endfor %} + } + + { + ?uri skos:prefLabel ?_label . + } + UNION { + # Also try and fetch label from TERN's controlled vocabularies. + SERVICE { + ?uri skos:prefLabel ?_label . + } + } + } + GROUP BY ?uri + """ + ) + return template.render(uris=uris) + + +def get_from_list( + uris: list[str], + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> dict[str, str]: + """Returns a dict of uri keys and label values. + + In addition to the SPARQL endpoint provided, it also fetches labels + from TERN's controlled vocabularies via a federated SPARQL query. + """ + query = _get_from_list_query(uris) + + result = data.sparql.post(query, sparql_endpoint).json() + + labels = {} + + try: + rows = result["results"]["bindings"] + for row in rows: + uri = str(row["uri"]["value"]) + label = str(row["label"]["value"]) + labels[uri] = label + + except KeyError as err: + if result["results"]["bindings"] == [{}]: + return {} + + raise data.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + return labels diff --git a/src/linkeddata_api/domain/ld_viewer/__init__.py b/src/linkeddata_api/domain/ld_viewer/__init__.py new file mode 100644 index 0000000..364a06e --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/__init__.py @@ -0,0 +1 @@ +from . import resource diff --git a/src/linkeddata_api/domain/ld_viewer/resource/__init__.py b/src/linkeddata_api/domain/ld_viewer/resource/__init__.py new file mode 100644 index 0000000..0f5c7c8 --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/resource/__init__.py @@ -0,0 +1,64 @@ +from rdflib import URIRef + +from linkeddata_api import data, domain +from linkeddata_api.data.exceptions import ( + RequestError, + SPARQLNotFoundError, + SPARQLResultJSONError, +) +from . import json + + +def _handle_json_response(uri: str, sparql_endpoint: str) -> domain.schema.Resource: + try: + result = json.get(uri, sparql_endpoint) + except (RequestError, SPARQLNotFoundError, SPARQLResultJSONError) as err: + raise err + + return result.json() + + +def _handle_rdf_response( + uri: str, sparql_endpoint: str, format_: str, include_incoming_relationships: bool +) -> str: + try: + response = data.sparql.post( + f"DESCRIBE <{uri}>", sparql_endpoint, accept=format_ + ) + except RequestError as err: + raise err + + graph = domain.rdf.create_graph() + + graph.parse(data=response.text, format=format_) + + if len(graph) == 0: + return "Resource not found", 404 + + if not include_incoming_relationships: + graph.remove((None, None, URIRef(uri))) + + result = graph.serialize(format=format_) + return result + + +def get( + uri: str, sparql_endpoint: str, format_: str, include_incoming_relationships: bool +) -> str: + """Get an RDF resource + + :param uri: URI of resource + :param sparql_endpoint: SPARQL endpoint to query + :param format_: Response format one of ["text/turtle", "application/n-triples", "application/ld+json", "application/json] + :param include_incoming_relationships: Some RDF stores include statements of incoming relationships in DESCRIBE queries. If this is False, it will filter the incoming statements out of the response value + :return: Response value + :raises RequestError: An error occurred in the data layer + """ + + if format_ == "application/json": + result = _handle_json_response(uri, sparql_endpoint) + else: + result = _handle_rdf_response( + uri, sparql_endpoint, format_, include_incoming_relationships + ) + return result diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py b/src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py new file mode 100644 index 0000000..2778814 --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py @@ -0,0 +1,327 @@ +from rdflib import RDF + +from linkeddata_api import data, domain +from linkeddata_api.domain.ld_viewer.resource.json.exists_uri import exists_uri +from linkeddata_api.domain.ld_viewer.resource.json.profiles import method_profile +from linkeddata_api.domain.ld_viewer.resource.json.sort_property_objects import ( + sort_property_objects, +) + + +def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[str]: + new_uris = [] + for row in rows: + if row["o"]["type"] == "bnode" and row["listItem"]["value"] == "true": + # TODO: error handling - move empty result exception to nrm.sparql.post/nrm.sparql.get + query = f""" + PREFIX skos: + PREFIX rdf: + SELECT DISTINCT ?p ?o + where {{ + BIND(<{row["p"]["value"]}> AS ?p) + <{uri}> ?p ?list . + ?list rdf:rest* ?rest . + ?rest rdf:first ?o . + }} + """ + result = data.sparql.post( + query, + sparql_endpoint, + ) + + for result_row in result["results"]["bindings"]: + new_uris.append(result_row) + + return new_uris + + +def _get_uri_values_and_list_items( + result: dict, uri: str, sparql_endpoint: str +) -> tuple[list[str], list[str]]: + uri_values = filter( + lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] + ) + + uri_values = [value["o"]["value"] for value in uri_values] + uri_values.append(uri) + + # Replace value of blank node list head with items. + list_items = _get_uris_from_rdf_list( + uri, result["results"]["bindings"], sparql_endpoint + ) + + for row in list_items: + uri_values.append(row["o"]["value"]) + + return uri_values, list_items + + +def _add_rows_for_rdf_list_items(result: dict, uri: str, sparql_endpoint: str) -> dict: + """Add rdf:List items as new rows to the SPARQL result object + + :param result: The SPARQL result dict object + :param uri: URI of the resource + :param sparql_endpoint: SPARQL endpoint to fetch the list items from + :return: An updated SPARQL result dict object + """ + _, list_items = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + + # Add additional rows to the `result` representing the RDF List items. + for i, list_item in enumerate(list_items): + list_item.update( + { + "listItem": { + "datatype": "http://www.w3.org/2001/XMLSchema#boolean", + "type": "literal", + "value": "true", + }, + "listItemNumber": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": str(i), + }, + } + ) + result["results"]["bindings"].append(list_item) + + return result + + +def _get_uri_label_index( + result: dict, uri: str, sparql_endpoint: str +) -> dict[str, str]: + uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + uri_label_index = domain.label.get_from_list(uri_values, sparql_endpoint) + return uri_label_index + + +def _get_uri_internal_index( + result: dict, uri: str, sparql_endpoint: str +) -> dict[str, str]: + uri_values, _ = _get_uri_values_and_list_items(result, uri, sparql_endpoint) + uri_internal_index = domain.internal_resource.get_from_list( + uri_values, sparql_endpoint + ) + return uri_internal_index + + +def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: + query = f""" + SELECT ?p ?o ?listItem ?listItemNumber + WHERE {{ + <{uri}> ?p ?o . + BIND(EXISTS{{?o rdf:rest ?rest}} as ?listItem) + + # This gets set later with the listItemNumber value. + BIND(0 AS ?listItemNumber) + }} + """ + + result = data.sparql.post(query, sparql_endpoint).json() + + try: + result = _add_rows_for_rdf_list_items(result, uri, sparql_endpoint) + label = domain.label.get(uri, sparql_endpoint) or uri + types, properties = _get_types_and_properties(result, uri, sparql_endpoint) + + profile = "" + if exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): + profile = "https://w3id.org/tern/ontologies/tern/MethodCollection" + properties = method_profile(properties) + elif exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): + profile = "https://w3id.org/tern/ontologies/tern/Method" + properties = method_profile(properties) + + incoming_properties = _get_incoming_properties(uri, sparql_endpoint) + + return domain.schema.Resource( + uri=uri, + profile=profile, + label=label, + types=types, + properties=properties, + # incoming_properties=incoming_properties, + incoming_properties=[], # TODO: + ) + except data.exceptions.SPARQLNotFoundError as err: + raise err + except Exception as err: + import traceback + import sys + + print(traceback.format_exc()) + raise data.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result.\n{result}\n{err}" + ) from err + + +def _get_incoming_properties(uri: str, sparql_endpoint: str): + query = f""" + SELECT ?p ?o ?listItem ?listItemNumber + WHERE {{ + ?o ?p <{uri}> . + + # This is not required for `incoming_properties` + # but we need to set the values for compatibility with `properties`. + BIND(EXISTS{{?o rdf:rest ?rest}} as ?listItem) + BIND(0 AS ?listItemNumber) + }} + """ + + result = data.sparql.post( + query, + sparql_endpoint, + ).json() + + uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) + uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) + + incoming_properties = [] + + for row in result["results"]["bindings"]: + subject_label = uri_label_index.get(row["o"]["value"]) or domain.curie.get( + row["o"]["value"] + ) + item = domain.schema.URI( + label=subject_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + predicate_label = domain.curie.get(row["p"]["value"]) + predicate = domain.schema.URI( + label=predicate_label, + value=row["p"]["value"], + internal=uri_internal_index.get(row["p"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=int(row["listItemNumber"]["value"]) + if row["listItem"]["value"] == "true" + else None, + ) + + found = False + for p in incoming_properties: + if p.predicate.value == predicate.value: + found = True + p.subjects.append(item) + + if not found: + incoming_properties.append( + domain.schema.SubjectPredicates(predicate=predicate, subjects=[item]) + ) + + return incoming_properties + + +def _get_types_and_properties( + result: dict, uri: str, sparql_endpoint: str +) -> tuple[list[domain.schema.URI], list[domain.schema.PredicateObjects]]: + + types: list[domain.schema.URI] = [] + properties: list[domain.schema.PredicateObjects] = [] + + # An index of URIs with label values. + uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) + + # An index of all the URIs linked to and from this resource that are available internally. + uri_internal_index = _get_uri_internal_index(result, uri, sparql_endpoint) + + if not uri_internal_index.get(uri): + raise data.exceptions.SPARQLNotFoundError(f"Resource with URI {uri} not found.") + + for row in result["results"]["bindings"]: + if row["p"]["value"] == str(RDF.type): + type_label = uri_label_index.get(row["o"]["value"]) or domain.curie.get( + row["o"]["value"] + ) + types.append( + domain.schema.URI( + label=type_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + ) + ) + else: + predicate_label = domain.curie.get(row["p"]["value"]) + predicate = domain.schema.URI( + label=predicate_label, + value=row["p"]["value"], + internal=uri_internal_index.get(row["p"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=int(row["listItemNumber"]["value"]) + if row["listItem"]["value"] == "true" + else None, + ) + if row["o"]["type"] == "uri": + object_label = uri_label_index.get( + row["o"]["value"] + ) or domain.curie.get(row["o"]["value"]) + item = domain.schema.URI( + label=object_label, + value=row["o"]["value"], + internal=uri_internal_index.get(row["o"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + elif row["o"]["type"] == "literal": + datatype = row["o"].get("datatype", "") + if datatype: + datatype = domain.schema.URI( + label=datatype, + value=datatype, + internal=uri_internal_index.get(datatype, False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + else: + datatype = None + + item = domain.schema.Literal( + value=row["o"]["value"], + datatype=datatype, + language=row["o"].get("xml:lang", ""), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, + ) + elif row["o"]["type"] == "bnode": + # TODO: Handle blank nodes. + pass + else: + raise ValueError( + f"Expected type to be uri or literal but got {row['o']['type']}" + ) + + found = False + for p in properties: + if p.predicate.value == predicate.value: + found = True + p.objects.append(item) + + if not found: + properties.append( + domain.schema.PredicateObjects(predicate=predicate, objects=[item]) + ) + + # Duplicates may occur due to processing RDF lists. + # Remove duplicates, if any. + for property_ in properties: + if property_.predicate.list_item: + for obj in property_.objects: + if not obj.list_item: + property_.objects.remove(obj) + + # Sort all property objects by label. + properties.sort(key=lambda x: x.predicate.label) + for property_ in properties: + property_.objects.sort(key=sort_property_objects) + + return types, properties diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py b/src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py new file mode 100644 index 0000000..62c6d96 --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py @@ -0,0 +1,8 @@ +from linkeddata_api import domain + + +def exists_uri(target_uri: str, uris: list[domain.schema.URI]) -> bool: + for uri in uris: + if uri.value == target_uri: + return True + return False diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py b/src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py new file mode 100644 index 0000000..64c530a --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py @@ -0,0 +1,51 @@ +from rdflib import RDFS, SKOS, SDO, DCTERMS + +from linkeddata_api import domain +from linkeddata_api.domain.namespaces import TERN + + +def _add_and_remove_property( + predicate_uri: str, + old_list: list[domain.schema.PredicateObjects], + new_list: list[domain.schema.PredicateObjects], +) -> None: + """Add and remove the PredicateObjects object if matched by predicate_uri in + the referenced lists, 'old_list' and 'new_list' + + Returns a copy of the PredicateObjects object. + """ + predicate_object = None + for property_ in old_list: + if property_.predicate.value == predicate_uri: + new_list.append(property_) + predicate_object = property_ + old_list.remove(property_) + return predicate_object + + +def method_profile( + properties: list[domain.schema.PredicateObjects], +) -> list[domain.schema.PredicateObjects]: + new_properties = [] + + _add_and_remove_property(str(RDFS.isDefinedBy), properties, new_properties) + + # Omit skos:prefLabel + _add_and_remove_property(str(SKOS.prefLabel), properties, new_properties) + new_properties.pop() + + _add_and_remove_property(str(TERN), properties, new_properties) + _add_and_remove_property(str(SDO.url), properties, new_properties) + _add_and_remove_property(str(SKOS.memberList), properties, new_properties) + _add_and_remove_property(str(TERN.scope), properties, new_properties) + _add_and_remove_property(str(SKOS.definition), properties, new_properties) + _add_and_remove_property(str(TERN.purpose), properties, new_properties) + # TODO: Change to different property due to issue with RVA + _add_and_remove_property(str(DCTERMS.description), properties, new_properties) + _add_and_remove_property(str(TERN.equipment), properties, new_properties) + _add_and_remove_property(str(TERN.instructions), properties, new_properties) + _add_and_remove_property(str(SKOS.note), properties, new_properties) + _add_and_remove_property(str(DCTERMS.source), properties, new_properties) + _add_and_remove_property(str(TERN.appendix), properties, new_properties) + + return new_properties + properties diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py b/src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py new file mode 100644 index 0000000..7bd28e8 --- /dev/null +++ b/src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py @@ -0,0 +1,13 @@ +from typing import Union + +from linkeddata_api import domain + + +def sort_property_objects(item: list[Union[domain.schema.URI, domain.schema.Literal]]): + if item.list_item: + return item.list_item_number + else: + if item.type == "uri": + return item.label + else: + return item.value diff --git a/src/linkeddata_api/domain/namespaces.py b/src/linkeddata_api/domain/namespaces.py new file mode 100644 index 0000000..84e5eca --- /dev/null +++ b/src/linkeddata_api/domain/namespaces.py @@ -0,0 +1,3 @@ +from rdflib import Namespace + +TERN = Namespace("https://w3id.org/tern/ontologies/tern/") diff --git a/src/linkeddata_api/domain/rdf.py b/src/linkeddata_api/domain/rdf.py new file mode 100644 index 0000000..62113ac --- /dev/null +++ b/src/linkeddata_api/domain/rdf.py @@ -0,0 +1,8 @@ +from rdflib import Graph + + +def create_graph() -> Graph: + """Create a new RDFLib Graph object with opinionated namespace prefix bindings.""" + graph = Graph() + graph.bind("tern", "https://w3id.org/tern/ontologies/tern/") + return graph diff --git a/src/linkeddata_api/domain/schema.py b/src/linkeddata_api/domain/schema.py new file mode 100644 index 0000000..19fd668 --- /dev/null +++ b/src/linkeddata_api/domain/schema.py @@ -0,0 +1,58 @@ +from typing import Union + +from pydantic import BaseModel + + +class Item(BaseModel): + id: str + label: str + description: str = None + created: str = None + modified: str = None + + +class RDFListItemMixin(BaseModel): + """An item in an RDF List""" + + list_item: bool = False + list_item_number: int | None = None + + +class URI(RDFListItemMixin): + type: str = "uri" + label: str + value: str + internal: bool + + def __hash__(self): + return hash(self.value) + + +class Literal(RDFListItemMixin): + type: str = "literal" + value: str + datatype: URI = None + language: str = "" + + def __hash__(self): + datatype = self.datatype.value if self.datatype else "" + return hash(self.value + datatype + self.language) + + +class SubjectPredicates(BaseModel): + predicate: URI + subjects: list[URI] + + +class PredicateObjects(BaseModel): + predicate: URI + objects: list[Union[URI, Literal]] + + +class Resource(BaseModel): + uri: str + profile: str = "" + label: str + types: list[URI] + properties: list[PredicateObjects] + incoming_properties: list[SubjectPredicates] diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py index f934d61..e6b832a 100644 --- a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py @@ -1,11 +1,14 @@ -import requests from flask import request, Response from werkzeug.exceptions import HTTPException from flask_tern import openapi -from rdflib import URIRef from linkeddata_api.views.api_v1.blueprint import bp -from linkeddata_api import rdf +from linkeddata_api import domain +from linkeddata_api.domain.ld_viewer.resource import ( + RequestError, + SPARQLNotFoundError, + SPARQLResultJSONError, +) @bp.get("/ld_viewer/resource") @@ -13,9 +16,8 @@ def get_resource(): sparql_endpoint = request.args.get("sparql_endpoint") uri = request.args.get("uri") - format_ = request.headers.get("accept") - # TODO: Support 'format' query arg? It would make it easier to configure persistent redirect services. - # TODO: Curently we don't support multiple format types. + format_ = request.args.get("format") or request.headers.get("accept") + # TODO: Curently we don't support multiple format types in accept headers. if not format_ or "," in format_: format_ = "text/turtle" include_incoming_relationships = request.args.get("include_incoming_relationships") @@ -23,29 +25,21 @@ def get_resource(): True if include_incoming_relationships == "true" else False ) - response = requests.get( - sparql_endpoint, - headers={"accept": format_}, - params={"query": f"DESCRIBE <{uri}>"}, - ) - try: - response.raise_for_status() - except requests.exceptions.HTTPError as err: + result = domain.ld_viewer.resource.get( + uri, sparql_endpoint, format_, include_incoming_relationships + ) + except SPARQLNotFoundError as err: + raise HTTPException(err.description, Response(err.description, 404)) from err + except (RequestError, SPARQLResultJSONError) as err: raise HTTPException( - description=err.response.text, - response=Response(err.response.text, status=502), + description=err.description, + response=Response(err.description, status=502), + ) from err + except Exception as err: + raise HTTPException( + description=str(err), + response=Response(str(err), mimetype="text/plain", status=500), ) from err - graph = rdf.create_graph() - - graph.parse(data=response.text, format=format_) - - if len(graph) == 0: - return "Resource not found", 404 - - if not include_incoming_relationships: - graph.remove((None, None, URIRef(uri))) - - result = graph.serialize(format=format_) return Response(result, mimetype=format_) diff --git a/src/linkeddata_api/views/api_v1/openapi.yaml b/src/linkeddata_api/views/api_v1/openapi.yaml index 32fda45..fd3d8fc 100644 --- a/src/linkeddata_api/views/api_v1/openapi.yaml +++ b/src/linkeddata_api/views/api_v1/openapi.yaml @@ -120,7 +120,7 @@ paths: schema: type: string required: true - description: The SPARQL endpoint for querying\. + description: The SPARQL endpoint for querying. examples: nrm_vocabs: summary: NRM vocabs SPARQL endpoint @@ -135,11 +135,25 @@ paths: nrm_index: summary: NRM vocab URI value: https://linked.data.gov.au/def/nrm + - in: query + name: format + schema: + type: string + description: The format of the response value. This takes precedence over the request accept header. + examples: + text/turtle: + value: text/turtle + application/n-triples: + value: application/n-triples + application/json: + value: application/json + application/ld+json: + value: application/ld+json - in: query name: include_incoming_relationships schema: type: string - description: Include incoming relationships + description: Include incoming relationships. This defaults to `false` if the `format` query parameter is `application/json`. examples: true: value: true @@ -161,6 +175,12 @@ paths: text/html: schema: type: string + "500": + description: Internal server error + content: + text/plain: + schema: + type: string "502": description: Error communicating with the database. content: diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py index cc9103a..e45da64 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource/__init__.py @@ -141,7 +141,8 @@ def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: label=label, types=types, properties=properties, - incoming_properties=incoming_properties, + # incoming_properties=incoming_properties, + incoming_properties=[] # TODO: ) except nrm.exceptions.SPARQLNotFoundError as err: raise err From d1f8529ac300dd348edded2275f8858a4c985f3d Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 02:04:27 +0000 Subject: [PATCH 08/16] Use /viewer/resource instead of /ld_viewer/resource --- src/linkeddata_api/views/api_v1/ld_viewer/resource.py | 2 +- src/linkeddata_api/views/api_v1/openapi.yaml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py index e6b832a..0cf9e15 100644 --- a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py @@ -11,7 +11,7 @@ ) -@bp.get("/ld_viewer/resource") +@bp.get("/viewer/resource") @openapi.validate(validate_request=False, validate_response=False) def get_resource(): sparql_endpoint = request.args.get("sparql_endpoint") diff --git a/src/linkeddata_api/views/api_v1/openapi.yaml b/src/linkeddata_api/views/api_v1/openapi.yaml index fd3d8fc..3454a17 100644 --- a/src/linkeddata_api/views/api_v1/openapi.yaml +++ b/src/linkeddata_api/views/api_v1/openapi.yaml @@ -108,7 +108,7 @@ paths: schema: type: string - /ld_viewer/resource: + /viewer/resource: get: tags: - Linked Data viewer @@ -135,6 +135,9 @@ paths: nrm_index: summary: NRM vocab URI value: https://linked.data.gov.au/def/nrm + nrm_feature_types: + summary: NRM feature types collection + value: https://linked.data.gov.au/def/test/dawe-cv/31a9f83d-9c8b-4d68-8dd7-d1b7a9a4197b - in: query name: format schema: From e346dc3bafc67f053d586fd4999861dbb0fad384 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 03:55:27 +0000 Subject: [PATCH 09/16] Refactor pydantic_jsonify. Rename ld_viewer to viewer. Add entrypoint endpoint. Add JSON resource handler --- src/linkeddata_api/domain/__init__.py | 3 +- .../domain/ld_viewer/__init__.py | 1 - .../{ => domain}/pydantic_jsonify.py | 0 src/linkeddata_api/domain/viewer/__init__.py | 2 + .../domain/viewer/entrypoints/__init__.py | 2 + .../domain/viewer/entrypoints/exceptions.py | 5 + .../domain/viewer/entrypoints/nrm.py | 68 ++++++++++++++ .../resource/__init__.py | 0 .../resource/json/__init__.py | 10 +- .../resource/json/exists_uri.py | 0 .../resource/json/profiles.py | 0 .../resource/json/sort_property_objects.py | 0 .../views/api_v1/ld_viewer/__init__.py | 1 + .../views/api_v1/ld_viewer/entrypoint.py | 34 +++++++ .../views/api_v1/ld_viewer/resource.py | 2 +- .../ontology_viewer/classes/flat/__init__.py | 2 +- src/linkeddata_api/views/api_v1/openapi.yaml | 92 +++++++++++++++++-- .../api_v1/rdf_tools/convert/__init__.py | 2 +- .../views/api_v1/vocab_viewer/nrm/resource.py | 2 +- .../views/api_v1/vocab_viewer/nrm/vocabs.py | 2 +- 20 files changed, 205 insertions(+), 23 deletions(-) delete mode 100644 src/linkeddata_api/domain/ld_viewer/__init__.py rename src/linkeddata_api/{ => domain}/pydantic_jsonify.py (100%) create mode 100644 src/linkeddata_api/domain/viewer/__init__.py create mode 100644 src/linkeddata_api/domain/viewer/entrypoints/__init__.py create mode 100644 src/linkeddata_api/domain/viewer/entrypoints/exceptions.py create mode 100644 src/linkeddata_api/domain/viewer/entrypoints/nrm.py rename src/linkeddata_api/domain/{ld_viewer => viewer}/resource/__init__.py (100%) rename src/linkeddata_api/domain/{ld_viewer => viewer}/resource/json/__init__.py (97%) rename src/linkeddata_api/domain/{ld_viewer => viewer}/resource/json/exists_uri.py (100%) rename src/linkeddata_api/domain/{ld_viewer => viewer}/resource/json/profiles.py (100%) rename src/linkeddata_api/domain/{ld_viewer => viewer}/resource/json/sort_property_objects.py (100%) create mode 100644 src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py diff --git a/src/linkeddata_api/domain/__init__.py b/src/linkeddata_api/domain/__init__.py index e148dd9..1f062ce 100644 --- a/src/linkeddata_api/domain/__init__.py +++ b/src/linkeddata_api/domain/__init__.py @@ -1,7 +1,8 @@ from . import schema -from . import ld_viewer +from . import viewer from . import rdf from . import namespaces from . import label from . import internal_resource from . import curie +from . import pydantic_jsonify diff --git a/src/linkeddata_api/domain/ld_viewer/__init__.py b/src/linkeddata_api/domain/ld_viewer/__init__.py deleted file mode 100644 index 364a06e..0000000 --- a/src/linkeddata_api/domain/ld_viewer/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import resource diff --git a/src/linkeddata_api/pydantic_jsonify.py b/src/linkeddata_api/domain/pydantic_jsonify.py similarity index 100% rename from src/linkeddata_api/pydantic_jsonify.py rename to src/linkeddata_api/domain/pydantic_jsonify.py diff --git a/src/linkeddata_api/domain/viewer/__init__.py b/src/linkeddata_api/domain/viewer/__init__.py new file mode 100644 index 0000000..96d5933 --- /dev/null +++ b/src/linkeddata_api/domain/viewer/__init__.py @@ -0,0 +1,2 @@ +from . import resource +from . import entrypoints diff --git a/src/linkeddata_api/domain/viewer/entrypoints/__init__.py b/src/linkeddata_api/domain/viewer/entrypoints/__init__.py new file mode 100644 index 0000000..0bc2814 --- /dev/null +++ b/src/linkeddata_api/domain/viewer/entrypoints/__init__.py @@ -0,0 +1,2 @@ +from . import exceptions +from . import nrm diff --git a/src/linkeddata_api/domain/viewer/entrypoints/exceptions.py b/src/linkeddata_api/domain/viewer/entrypoints/exceptions.py new file mode 100644 index 0000000..5dec25c --- /dev/null +++ b/src/linkeddata_api/domain/viewer/entrypoints/exceptions.py @@ -0,0 +1,5 @@ +from linkeddata_api.data.exceptions import RequestError, SPARQLResultJSONError + + +class ViewerIDNotFoundError(Exception): + """This is raised when an unrecognised viewer ID is provided""" diff --git a/src/linkeddata_api/domain/viewer/entrypoints/nrm.py b/src/linkeddata_api/domain/viewer/entrypoints/nrm.py new file mode 100644 index 0000000..fcdfbd4 --- /dev/null +++ b/src/linkeddata_api/domain/viewer/entrypoints/nrm.py @@ -0,0 +1,68 @@ +from typing import Optional + +from linkeddata_api import data +from linkeddata_api.domain import schema + + +def get_optional_value(row: dict, key: str) -> Optional[str]: + return row.get(key)["value"] if row.get(key) else None + + +def get( + sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", +) -> schema.Item: + """Get + + Raises RequestError and SPARQLResultJSONError + """ + + query = """ + PREFIX skos: + PREFIX dcterms: + PREFIX reg: + SELECT + ?uri + (SAMPLE(?_label) as ?label) + (SAMPLE(?_description) as ?description) + (SAMPLE(?_created) as ?created) + (SAMPLE(?_modified) as ?modified) + FROM + FROM + WHERE { + dcterms:hasPart ?uri . + VALUES (?vocabularyType) { + (skos:ConceptScheme) + (skos:Collection) + } + ?uri a ?vocabularyType ; + skos:prefLabel ?_label . + + OPTIONAL { ?uri dcterms:description ?_description } + OPTIONAL { ?uri dcterms:created ?_created } + OPTIONAL { ?uri dcterms:modified ?_modified } + } + GROUP by ?uri + ORDER by ?label + """ + + result = data.sparql.post(query, sparql_endpoint).json() + + vocabs = [] + + try: + for row in result["results"]["bindings"]: + vocabs.append( + schema.Item( + id=str(row["uri"]["value"]), + label=str(row["label"]["value"]), + description=get_optional_value(row, "description"), + created=get_optional_value(row, "created"), + modified=get_optional_value(row, "modified"), + ) + ) + except KeyError as err: + raise data.exceptions.SPARQLResultJSONError( + f"Unexpected SPARQL result set.\n{result}\n{err}" + ) from err + + return vocabs diff --git a/src/linkeddata_api/domain/ld_viewer/resource/__init__.py b/src/linkeddata_api/domain/viewer/resource/__init__.py similarity index 100% rename from src/linkeddata_api/domain/ld_viewer/resource/__init__.py rename to src/linkeddata_api/domain/viewer/resource/__init__.py diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py similarity index 97% rename from src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py rename to src/linkeddata_api/domain/viewer/resource/json/__init__.py index 2778814..21b70b6 100644 --- a/src/linkeddata_api/domain/ld_viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -1,9 +1,9 @@ from rdflib import RDF from linkeddata_api import data, domain -from linkeddata_api.domain.ld_viewer.resource.json.exists_uri import exists_uri -from linkeddata_api.domain.ld_viewer.resource.json.profiles import method_profile -from linkeddata_api.domain.ld_viewer.resource.json.sort_property_objects import ( +from linkeddata_api.domain.viewer.resource.json.exists_uri import exists_uri +from linkeddata_api.domain.viewer.resource.json.profiles import method_profile +from linkeddata_api.domain.viewer.resource.json.sort_property_objects import ( sort_property_objects, ) @@ -146,10 +146,6 @@ def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: except data.exceptions.SPARQLNotFoundError as err: raise err except Exception as err: - import traceback - import sys - - print(traceback.format_exc()) raise data.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py b/src/linkeddata_api/domain/viewer/resource/json/exists_uri.py similarity index 100% rename from src/linkeddata_api/domain/ld_viewer/resource/json/exists_uri.py rename to src/linkeddata_api/domain/viewer/resource/json/exists_uri.py diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py b/src/linkeddata_api/domain/viewer/resource/json/profiles.py similarity index 100% rename from src/linkeddata_api/domain/ld_viewer/resource/json/profiles.py rename to src/linkeddata_api/domain/viewer/resource/json/profiles.py diff --git a/src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py b/src/linkeddata_api/domain/viewer/resource/json/sort_property_objects.py similarity index 100% rename from src/linkeddata_api/domain/ld_viewer/resource/json/sort_property_objects.py rename to src/linkeddata_api/domain/viewer/resource/json/sort_property_objects.py diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py b/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py index 364a06e..f7801e3 100644 --- a/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py @@ -1 +1,2 @@ from . import resource +from . import entrypoint diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py b/src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py new file mode 100644 index 0000000..579d6d4 --- /dev/null +++ b/src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py @@ -0,0 +1,34 @@ +from flask import Response +from werkzeug.exceptions import HTTPException +from flask_tern import openapi + +from linkeddata_api.views.api_v1.blueprint import bp +from linkeddata_api import domain +from linkeddata_api.domain.viewer.entrypoints.exceptions import ( + RequestError, + SPARQLResultJSONError, + ViewerIDNotFoundError, +) +from linkeddata_api.domain.pydantic_jsonify import jsonify + + +mapping = {"nrm": domain.viewer.entrypoints.nrm.get} + + +@bp.get("/viewer/entrypoint/") +@openapi.validate(validate_request=False, validate_response=False) +def get_entrypoint(viewer_id: str): + try: + func = mapping.get(viewer_id) + if func is None: + raise ViewerIDNotFoundError(f"Key '{viewer_id}' not found") + + items = func() + except ViewerIDNotFoundError as err: + raise HTTPException(str(err), Response(str(err), 404)) from err + except (RequestError, SPARQLResultJSONError) as err: + raise HTTPException(err.description, Response(err.description, 502)) from err + except Exception as err: + raise HTTPException(str(err), Response(str(err), 500)) from err + + return jsonify(items, headers={"cache-control": "max-age=600, s-maxage=3600"}) diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py index 0cf9e15..b952e64 100644 --- a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py @@ -4,7 +4,7 @@ from linkeddata_api.views.api_v1.blueprint import bp from linkeddata_api import domain -from linkeddata_api.domain.ld_viewer.resource import ( +from linkeddata_api.domain.viewer.resource import ( RequestError, SPARQLNotFoundError, SPARQLResultJSONError, diff --git a/src/linkeddata_api/views/api_v1/ontology_viewer/classes/flat/__init__.py b/src/linkeddata_api/views/api_v1/ontology_viewer/classes/flat/__init__.py index 039386b..d8e5812 100644 --- a/src/linkeddata_api/views/api_v1/ontology_viewer/classes/flat/__init__.py +++ b/src/linkeddata_api/views/api_v1/ontology_viewer/classes/flat/__init__.py @@ -2,7 +2,7 @@ from flask_tern import openapi from flask_tern.logging import create_audit_event, log_audit -from linkeddata_api.pydantic_jsonify import jsonify +from linkeddata_api.domain.pydantic_jsonify import jsonify from linkeddata_api.views.api_v1.blueprint import bp from . import crud diff --git a/src/linkeddata_api/views/api_v1/openapi.yaml b/src/linkeddata_api/views/api_v1/openapi.yaml index 3454a17..6e6cb09 100644 --- a/src/linkeddata_api/views/api_v1/openapi.yaml +++ b/src/linkeddata_api/views/api_v1/openapi.yaml @@ -1,4 +1,4 @@ -openapi: "3.0.0" +openapi: "3.0.3" info: title: TERN's Linked Data Services API @@ -38,6 +38,46 @@ components: in: header # can be "header", "query" or "cookie" name: Authorization schemas: + EntrypointItemList: + title: EntrypointItemList + type: array + items: + title: Entrypoint items + type: object + properties: + id: + title: IRI of item + type: string + label: + title: Label of item + type: string + description: + title: The description of the item + type: string + created: + title: The date when the resource was created + type: string + modified: + title: The date when the resource was modified + type: string + Resource: + title: Resource + type: object + properties: + uri: + type: string + label: + type: string + types: + type: array + items: + $ref: "#/components/schemas/URI" + profile: + type: string + properties: + type: array + items: + $ref: "#/components/schemas/PredicateObjects" ClassItem: title: ClassItem type: object @@ -60,6 +100,12 @@ components: type: string internal: type: boolean + list_item: + type: boolean + list_item_number: + oneOf: + - type: number + nullable: true Literal: title: Literal type: object @@ -107,7 +153,29 @@ paths: plain/text: schema: type: string - + /viewer/entrypoint/{viewer_id}: + get: + tags: + - Linked Data viewer + summary: Get the viewer's entrypoint data + parameters: + - in: path + name: viewer_id + schema: + type: string + required: true + examples: + nrm: + value: nrm + responses: + "200": + description: A list of entrypoint items + content: + application/json: + schema: + $ref: "#/components/schemas/EntrypointItemList" + "502": + description: Gateway error /viewer/resource: get: tags: @@ -172,6 +240,12 @@ paths: text/turtle: schema: type: string + application/ld+json: + schema: + type: string + application/json: + schema: + $ref: "#/components/schemas/Resource" "404": description: Resource of URI not found. content: @@ -377,13 +451,13 @@ paths: type: string example: "@context": - "name": "http://schema.org/name" - "image": - "@id": "http://schema.org/image" - "@type": "@id" - "homepage": - "@id": "http://schema.org/url" - "@type": "@id" + "name": "http://schema.org/name" + "image": + "@id": "http://schema.org/image" + "@type": "@id" + "homepage": + "@id": "http://schema.org/url" + "@type": "@id" "name": "Manu Sporny" "homepage": "http://manu.sporny.org/" "image": "http://manu.sporny.org/images/manu.png" \ No newline at end of file diff --git a/src/linkeddata_api/views/api_v1/rdf_tools/convert/__init__.py b/src/linkeddata_api/views/api_v1/rdf_tools/convert/__init__.py index 8d8d206..1e0cbb6 100644 --- a/src/linkeddata_api/views/api_v1/rdf_tools/convert/__init__.py +++ b/src/linkeddata_api/views/api_v1/rdf_tools/convert/__init__.py @@ -4,7 +4,7 @@ from flask_tern import openapi from flask_tern.logging import create_audit_event, log_audit -from linkeddata_api.pydantic_jsonify import jsonify +from linkeddata_api.domain.pydantic_jsonify import jsonify from linkeddata_api.views.api_v1.blueprint import bp from linkeddata_api import rdf diff --git a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py index 998abbf..79c263c 100644 --- a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py @@ -4,7 +4,7 @@ from werkzeug.exceptions import HTTPException from werkzeug.wrappers import Response -from linkeddata_api.pydantic_jsonify import jsonify +from linkeddata_api.domain.pydantic_jsonify import jsonify from linkeddata_api.views.api_v1.blueprint import bp from linkeddata_api.vocab_viewer import nrm diff --git a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/vocabs.py b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/vocabs.py index ae2f771..f93435f 100644 --- a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/vocabs.py +++ b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/vocabs.py @@ -3,7 +3,7 @@ from werkzeug.exceptions import HTTPException from werkzeug.wrappers import Response -from linkeddata_api.pydantic_jsonify import jsonify +from linkeddata_api.domain.pydantic_jsonify import jsonify from linkeddata_api.views.api_v1.blueprint import bp from linkeddata_api.vocab_viewer import nrm From 9e7bed9a9b9e4ffc5cfd82db8669e42158655aa9 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 03:57:16 +0000 Subject: [PATCH 10/16] Fix import path --- src/linkeddata_api/views/api_v1/ld_viewer/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py index b952e64..53e054b 100644 --- a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/ld_viewer/resource.py @@ -26,7 +26,7 @@ def get_resource(): ) try: - result = domain.ld_viewer.resource.get( + result = domain.viewer.resource.get( uri, sparql_endpoint, format_, include_incoming_relationships ) except SPARQLNotFoundError as err: From c5246fae7e19fb47bd4763870ce38ecb25b70adb Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 04:03:17 +0000 Subject: [PATCH 11/16] Fix by calling json() --- src/linkeddata_api/domain/viewer/resource/json/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/domain/viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py index 21b70b6..ed2eb9c 100644 --- a/src/linkeddata_api/domain/viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -27,7 +27,7 @@ def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[ result = data.sparql.post( query, sparql_endpoint, - ) + ).json() for result_row in result["results"]["bindings"]: new_uris.append(result_row) From c4a30c9d9fd9e19e363c1886309f54344850585c Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 04:03:52 +0000 Subject: [PATCH 12/16] In views, rename ld_viewer to viewer --- src/linkeddata_api/views/api_v1/__init__.py | 2 +- .../views/api_v1/{ld_viewer => viewer}/__init__.py | 0 .../views/api_v1/{ld_viewer => viewer}/entrypoint.py | 0 .../views/api_v1/{ld_viewer => viewer}/resource.py | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename src/linkeddata_api/views/api_v1/{ld_viewer => viewer}/__init__.py (100%) rename src/linkeddata_api/views/api_v1/{ld_viewer => viewer}/entrypoint.py (100%) rename src/linkeddata_api/views/api_v1/{ld_viewer => viewer}/resource.py (100%) diff --git a/src/linkeddata_api/views/api_v1/__init__.py b/src/linkeddata_api/views/api_v1/__init__.py index 21bd89a..d4f20e3 100644 --- a/src/linkeddata_api/views/api_v1/__init__.py +++ b/src/linkeddata_api/views/api_v1/__init__.py @@ -5,4 +5,4 @@ from . import vocab_viewer from . import version_info from . import rdf_tools -from . import ld_viewer +from . import viewer diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/__init__.py b/src/linkeddata_api/views/api_v1/viewer/__init__.py similarity index 100% rename from src/linkeddata_api/views/api_v1/ld_viewer/__init__.py rename to src/linkeddata_api/views/api_v1/viewer/__init__.py diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py b/src/linkeddata_api/views/api_v1/viewer/entrypoint.py similarity index 100% rename from src/linkeddata_api/views/api_v1/ld_viewer/entrypoint.py rename to src/linkeddata_api/views/api_v1/viewer/entrypoint.py diff --git a/src/linkeddata_api/views/api_v1/ld_viewer/resource.py b/src/linkeddata_api/views/api_v1/viewer/resource.py similarity index 100% rename from src/linkeddata_api/views/api_v1/ld_viewer/resource.py rename to src/linkeddata_api/views/api_v1/viewer/resource.py From 183d7abdc8ebea3a666d4793d1cc0fba3debd595 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 05:30:56 +0000 Subject: [PATCH 13/16] Remove default sparql_endpoint argument value --- src/linkeddata_api/domain/internal_resource.py | 2 +- src/linkeddata_api/domain/label.py | 4 ++-- .../domain/viewer/entrypoints/nrm.py | 2 +- .../views/api_v1/viewer/entrypoint.py | 14 ++++++++++---- .../vocab_viewer/nrm/internal_resource.py | 2 +- src/linkeddata_api/vocab_viewer/nrm/label.py | 4 ++-- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/linkeddata_api/domain/internal_resource.py b/src/linkeddata_api/domain/internal_resource.py index 1f2ff77..270fa85 100644 --- a/src/linkeddata_api/domain/internal_resource.py +++ b/src/linkeddata_api/domain/internal_resource.py @@ -24,7 +24,7 @@ def _get_from_list_query(uris: list[str]) -> str: def get_from_list( uris: list[str], - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> dict[str, str]: query = _get_from_list_query(uris) diff --git a/src/linkeddata_api/domain/label.py b/src/linkeddata_api/domain/label.py index cbac23b..62a1af6 100644 --- a/src/linkeddata_api/domain/label.py +++ b/src/linkeddata_api/domain/label.py @@ -7,7 +7,7 @@ def get( uri: str, - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> Union[str, None]: """ Returns a label or None if no label found. @@ -67,7 +67,7 @@ def _get_from_list_query(uris: list[str]) -> str: def get_from_list( uris: list[str], - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> dict[str, str]: """Returns a dict of uri keys and label values. diff --git a/src/linkeddata_api/domain/viewer/entrypoints/nrm.py b/src/linkeddata_api/domain/viewer/entrypoints/nrm.py index fcdfbd4..b129276 100644 --- a/src/linkeddata_api/domain/viewer/entrypoints/nrm.py +++ b/src/linkeddata_api/domain/viewer/entrypoints/nrm.py @@ -9,7 +9,7 @@ def get_optional_value(row: dict, key: str) -> Optional[str]: def get( - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> schema.Item: """Get diff --git a/src/linkeddata_api/views/api_v1/viewer/entrypoint.py b/src/linkeddata_api/views/api_v1/viewer/entrypoint.py index 579d6d4..4569474 100644 --- a/src/linkeddata_api/views/api_v1/viewer/entrypoint.py +++ b/src/linkeddata_api/views/api_v1/viewer/entrypoint.py @@ -12,18 +12,24 @@ from linkeddata_api.domain.pydantic_jsonify import jsonify -mapping = {"nrm": domain.viewer.entrypoints.nrm.get} +mapping = { + "nrm": { + "func": domain.viewer.entrypoints.nrm.get, + "sparql_endpoint": "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + } +} @bp.get("/viewer/entrypoint/") @openapi.validate(validate_request=False, validate_response=False) def get_entrypoint(viewer_id: str): try: - func = mapping.get(viewer_id) - if func is None: + obj = mapping.get(viewer_id) + if obj is None: raise ViewerIDNotFoundError(f"Key '{viewer_id}' not found") - items = func() + sparql_endpoint = obj["sparql_endpoint"] + items = obj["func"](sparql_endpoint) except ViewerIDNotFoundError as err: raise HTTPException(str(err), Response(str(err), 404)) from err except (RequestError, SPARQLResultJSONError) as err: diff --git a/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py b/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py index 32c89eb..fc90a95 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/internal_resource.py @@ -24,7 +24,7 @@ def _get_from_list_query(uris: list[str]) -> str: def get_from_list( uris: list[str], - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> dict[str, str]: query = _get_from_list_query(uris) diff --git a/src/linkeddata_api/vocab_viewer/nrm/label.py b/src/linkeddata_api/vocab_viewer/nrm/label.py index b22c1a0..62ebe13 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/label.py +++ b/src/linkeddata_api/vocab_viewer/nrm/label.py @@ -7,7 +7,7 @@ def get( uri: str, - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> Union[str, None]: """ Returns a label or None if no label found. @@ -67,7 +67,7 @@ def _get_from_list_query(uris: list[str]) -> str: def get_from_list( uris: list[str], - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", + sparql_endpoint: str, ) -> dict[str, str]: """Returns a dict of uri keys and label values. From 4d3e92f2f9195f15c48fdbf98870474d3d111f7c Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 05:31:10 +0000 Subject: [PATCH 14/16] Comment out incoming_properties fetch --- src/linkeddata_api/domain/viewer/resource/json/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkeddata_api/domain/viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py index ed2eb9c..6b07ea6 100644 --- a/src/linkeddata_api/domain/viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -132,7 +132,7 @@ def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: profile = "https://w3id.org/tern/ontologies/tern/Method" properties = method_profile(properties) - incoming_properties = _get_incoming_properties(uri, sparql_endpoint) + # incoming_properties = _get_incoming_properties(uri, sparql_endpoint) return domain.schema.Resource( uri=uri, From 3536adb44865112300204176b23d76dea8e16ea1 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 30 Aug 2022 23:58:39 +0000 Subject: [PATCH 15/16] Add logging with execution time. Disable fetching of curie for subjects and objects in an RDF statement, to improve speed. --- src/linkeddata_api/app.py | 4 +++ src/linkeddata_api/domain/curie.py | 15 ++++++++-- .../domain/viewer/resource/json/__init__.py | 29 ++++++++++++++---- src/linkeddata_api/log_time.py | 30 +++++++++++++++++++ .../views/api_v1/viewer/resource.py | 23 ++++++++++++++ 5 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 src/linkeddata_api/log_time.py diff --git a/src/linkeddata_api/app.py b/src/linkeddata_api/app.py index 5c1d1e2..cd203c9 100644 --- a/src/linkeddata_api/app.py +++ b/src/linkeddata_api/app.py @@ -1,4 +1,5 @@ import os +import logging from flask import Flask, redirect, url_for @@ -21,6 +22,9 @@ def create_app(config=None) -> Flask: app = Flask("linkeddata_api") app.config["VERSION"] = version + if app.config["ENV"] == "development": + logging.basicConfig(level=logging.INFO) + ################################################### # custom json encoder ################################################### diff --git a/src/linkeddata_api/domain/curie.py b/src/linkeddata_api/domain/curie.py index 614d90f..f36e8f7 100644 --- a/src/linkeddata_api/domain/curie.py +++ b/src/linkeddata_api/domain/curie.py @@ -1,5 +1,9 @@ +import logging + import requests +logger = logging.getLogger(__name__) + # URIs that don't have curies in external service. not_found = {} @@ -14,10 +18,13 @@ "http://www.w3.org/2001/XMLSchema#": "xsd", } -# Don't find curies for these. +# Don't find curies for these - speeds up request processing. +# TODO: these may no longer be needed since we don't fetch for subjects or objects of an RDF statement anymore. skips = [ "https://linked.data.gov.au/def/nrm", "https://linked.data.gov.au/def/test/dawe-cv", + "http://linked.data.gov.au/dataset", + "https://linked.data.gov.au/dataset", ] @@ -49,6 +56,7 @@ def get(uri: str): if uri_in_skips(uri): return uri + logger.info("Fetching curie from external service - %s", uri) localname = uri.split("#")[-1].split("/")[-1] r_index = uri.rfind(localname) base_uri = uri[:r_index] @@ -65,4 +73,7 @@ def get(uri: str): prefix = response.json()["value"][:-1] prefixes[base_uri] = prefix - return f"{prefix}:{localname}" + curie = f"{prefix}:{localname}" + logger.info("Curie fetch completed for %s, found %s", uri, curie) + + return curie diff --git a/src/linkeddata_api/domain/viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py index 6b07ea6..9d7c537 100644 --- a/src/linkeddata_api/domain/viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -1,3 +1,5 @@ +import logging + from rdflib import RDF from linkeddata_api import data, domain @@ -6,8 +8,12 @@ from linkeddata_api.domain.viewer.resource.json.sort_property_objects import ( sort_property_objects, ) +from linkeddata_api.log_time import log_time + +logger = logging.getLogger(__name__) +@log_time def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[str]: new_uris = [] for row in rows: @@ -35,6 +41,7 @@ def _get_uris_from_rdf_list(uri: str, rows: list, sparql_endpoint: str) -> list[ return new_uris +@log_time def _get_uri_values_and_list_items( result: dict, uri: str, sparql_endpoint: str ) -> tuple[list[str], list[str]]: @@ -56,6 +63,7 @@ def _get_uri_values_and_list_items( return uri_values, list_items +@log_time def _add_rows_for_rdf_list_items(result: dict, uri: str, sparql_endpoint: str) -> dict: """Add rdf:List items as new rows to the SPARQL result object @@ -87,6 +95,7 @@ def _add_rows_for_rdf_list_items(result: dict, uri: str, sparql_endpoint: str) - return result +@log_time def _get_uri_label_index( result: dict, uri: str, sparql_endpoint: str ) -> dict[str, str]: @@ -95,6 +104,7 @@ def _get_uri_label_index( return uri_label_index +@log_time def _get_uri_internal_index( result: dict, uri: str, sparql_endpoint: str ) -> dict[str, str]: @@ -105,6 +115,7 @@ def _get_uri_internal_index( return uri_internal_index +@log_time def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: query = f""" SELECT ?p ?o ?listItem ?listItemNumber @@ -151,6 +162,7 @@ def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: ) from err +@log_time def _get_incoming_properties(uri: str, sparql_endpoint: str): query = f""" SELECT ?p ?o ?listItem ?listItemNumber @@ -175,9 +187,10 @@ def _get_incoming_properties(uri: str, sparql_endpoint: str): incoming_properties = [] for row in result["results"]["bindings"]: - subject_label = uri_label_index.get(row["o"]["value"]) or domain.curie.get( - row["o"]["value"] - ) + # subject_label = uri_label_index.get(row["o"]["value"]) or domain.curie.get( + # row["o"]["value"] + # ) + subject_label = uri_label_index.get(row["o"]["value"]) or row["o"]["value"] item = domain.schema.URI( label=subject_label, value=row["o"]["value"], @@ -212,6 +225,7 @@ def _get_incoming_properties(uri: str, sparql_endpoint: str): return incoming_properties +@log_time def _get_types_and_properties( result: dict, uri: str, sparql_endpoint: str ) -> tuple[list[domain.schema.URI], list[domain.schema.PredicateObjects]]: @@ -252,9 +266,12 @@ def _get_types_and_properties( else None, ) if row["o"]["type"] == "uri": - object_label = uri_label_index.get( - row["o"]["value"] - ) or domain.curie.get(row["o"]["value"]) + # object_label = uri_label_index.get( + # row["o"]["value"] + # ) or domain.curie.get(row["o"]["value"]) + object_label = ( + uri_label_index.get(row["o"]["value"]) or row["o"]["value"] + ) item = domain.schema.URI( label=object_label, value=row["o"]["value"], diff --git a/src/linkeddata_api/log_time.py b/src/linkeddata_api/log_time.py new file mode 100644 index 0000000..7d74d3a --- /dev/null +++ b/src/linkeddata_api/log_time.py @@ -0,0 +1,30 @@ +import inspect +import logging +import time + +from functools import wraps + +logger = logging.getLogger(__name__) + + +def log_time(func): + """This decorator prints the execution time for the decorated function""" + + @wraps(func) + def wrapper(*args, **kwargs): + start = time.time() + result = func(*args, **kwargs) + end = time.time() + + callerframerecord = inspect.stack()[1] + frame = callerframerecord[0] + info = inspect.getframeinfo(frame) + + logger.debug( + "%s ran in %ss", + f"{info.filename}:{info.lineno}", + round(end - start, 2), + ) + return result + + return wrapper diff --git a/src/linkeddata_api/views/api_v1/viewer/resource.py b/src/linkeddata_api/views/api_v1/viewer/resource.py index 53e054b..47a88a5 100644 --- a/src/linkeddata_api/views/api_v1/viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/viewer/resource.py @@ -1,3 +1,5 @@ +import logging + from flask import request, Response from werkzeug.exceptions import HTTPException from flask_tern import openapi @@ -10,6 +12,8 @@ SPARQLResultJSONError, ) +logger = logging.getLogger(__name__) + @bp.get("/viewer/resource") @openapi.validate(validate_request=False, validate_response=False) @@ -25,6 +29,25 @@ def get_resource(): True if include_incoming_relationships == "true" else False ) + logger.info( + """ +GET /viewer/resource + query parameters: + uri: + %s + sparql_endpoint: + %s + format: + %s + include_incoming_relationships: + %s + """, + uri, + sparql_endpoint, + format_, + include_incoming_relationships, + ) + try: result = domain.viewer.resource.get( uri, sparql_endpoint, format_, include_incoming_relationships From 393ecfce6f20f52356d0419c748b6e73590e780f Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Wed, 31 Aug 2022 05:45:36 +0000 Subject: [PATCH 16/16] Fix tests --- .../domain/viewer/resource/__init__.py | 4 ++-- .../views/api_v1/viewer/resource.py | 10 ++++++++- .../api_v1/resource/test_resource_describe.py | 22 +++++++++---------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/linkeddata_api/domain/viewer/resource/__init__.py b/src/linkeddata_api/domain/viewer/resource/__init__.py index 0f5c7c8..42dfece 100644 --- a/src/linkeddata_api/domain/viewer/resource/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/__init__.py @@ -9,7 +9,7 @@ from . import json -def _handle_json_response(uri: str, sparql_endpoint: str) -> domain.schema.Resource: +def _handle_json_response(uri: str, sparql_endpoint: str) -> str: try: result = json.get(uri, sparql_endpoint) except (RequestError, SPARQLNotFoundError, SPARQLResultJSONError) as err: @@ -33,7 +33,7 @@ def _handle_rdf_response( graph.parse(data=response.text, format=format_) if len(graph) == 0: - return "Resource not found", 404 + raise SPARQLNotFoundError(f"Resource with URI {uri} not found.") if not include_incoming_relationships: graph.remove((None, None, URIRef(uri))) diff --git a/src/linkeddata_api/views/api_v1/viewer/resource.py b/src/linkeddata_api/views/api_v1/viewer/resource.py index 47a88a5..0b30f72 100644 --- a/src/linkeddata_api/views/api_v1/viewer/resource.py +++ b/src/linkeddata_api/views/api_v1/viewer/resource.py @@ -29,6 +29,10 @@ def get_resource(): True if include_incoming_relationships == "true" else False ) + if uri is None or sparql_endpoint is None: + err_msg = "Required query parameters 'uri' or 'sparql_endpoint' was not provided." + raise HTTPException(err_msg, Response(err_msg, 404)) + logger.info( """ GET /viewer/resource @@ -65,4 +69,8 @@ def get_resource(): response=Response(str(err), mimetype="text/plain", status=500), ) from err - return Response(result, mimetype=format_) + return Response( + result, + mimetype=format_, + headers={"cache-control": "max-age=600, s-maxage=3600"}, + ) diff --git a/tests/api_v1/resource/test_resource_describe.py b/tests/api_v1/resource/test_resource_describe.py index b08a62b..98e7905 100644 --- a/tests/api_v1/resource/test_resource_describe.py +++ b/tests/api_v1/resource/test_resource_describe.py @@ -8,7 +8,7 @@ @pytest.fixture def url() -> str: - return "/api/v1.0/resource" + return "/api/v1.0/viewer/resource" value = """ @@ -56,7 +56,7 @@ def url() -> str: "text/turtle", "text/turtle", "https://linked.data.gov.au/def/nrm", - "dawe_vocabs_core", + "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", "false", value, 22, @@ -68,10 +68,10 @@ def url() -> str: "text/turtle", "text/html", "https://linked.data.gov.au/def/nrm/not-exist", - "dawe_vocabs_core", + "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", "false", "", - 22, + None, ), # RDF4J repository does not exist ( @@ -80,10 +80,10 @@ def url() -> str: "text/turtle", "text/html", "https://linked.data.gov.au/def/nrm", - "dawe_vocabs_core-not-exist", + "https://graphdb.tern.org.au/repositories/dawe_vocabs_core-not-exist", "false", "", - 22, + None, ), # Include incoming relationships ( @@ -92,10 +92,10 @@ def url() -> str: "text/turtle", "text/turtle", "https://linked.data.gov.au/def/nrm", - "dawe_vocabs_core", + "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", "true", value, - 23, + 3179, ), # No accepted format, default to text/turtle ( @@ -104,7 +104,7 @@ def url() -> str: "", "text/turtle", "https://linked.data.gov.au/def/nrm", - "dawe_vocabs_core", + "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", "false", value, 22, @@ -134,11 +134,11 @@ def test_describe( response: TestResponse = client.get( url, query_string={ - "repository_id": repository_id, + "sparql_endpoint": repository_id, "uri": uri, "include_incoming_relationships": include_incoming_relationships, + "format": accept_format, }, - headers={"accept": accept_format}, ) assert response.status_code == response_status_code assert expected_format in response.headers.get("content-type")