diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3fae070..96c4c34 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -40,11 +40,16 @@ ], // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + "forwardPorts": [5000], // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": "pip install -e .", // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. - "remoteUser": "vscode" + "remoteUser": "vscode", + + // Network to connect to GraphDB on port 7200 in a separate devcontainer environment. + "runArgs": [ + "--network=dawe-rlp-vocabs_devcontainer_default" + ] } diff --git a/src/linkeddata_api/views/api_v1/openapi.yaml b/src/linkeddata_api/views/api_v1/openapi.yaml index 1e1820b..5dfb9a4 100644 --- a/src/linkeddata_api/views/api_v1/openapi.yaml +++ b/src/linkeddata_api/views/api_v1/openapi.yaml @@ -258,6 +258,16 @@ paths: nrm: summary: NRM index value: https://linked.data.gov.au/def/nrm + - in: query + name: sparql_endpoint + schema: + type: string + required: true + description: SPARQL endpoint + examples: + nrm: + summary: NRM SPARQL endpoint + value: https://graphdb.tern.org.au/repositories/dawe_vocabs_core responses: "200": description: A resource's description diff --git a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py index a2a4333..998abbf 100644 --- a/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/views/api_v1/vocab_viewer/nrm/resource.py @@ -13,9 +13,10 @@ @openapi.validate(validate_response=False) def get_nrm_resource(): uri = request.args.get("uri") + sparql_endpoint = request.args.get("sparql_endpoint") try: - result = nrm.resource.get(uri) + result = nrm.resource.get(uri, sparql_endpoint) except nrm.exceptions.SPARQLNotFoundError as err: raise HTTPException(err.description, Response(err.description, 404)) from err except (nrm.exceptions.RequestError, nrm.exceptions.SPARQLResultJSONError) as err: diff --git a/src/linkeddata_api/vocab_viewer/nrm/__init__.py b/src/linkeddata_api/vocab_viewer/nrm/__init__.py index c9d19de..b220b10 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/__init__.py +++ b/src/linkeddata_api/vocab_viewer/nrm/__init__.py @@ -5,3 +5,4 @@ from . import label from . import curie from . import internal_resource +from . import namespaces diff --git a/src/linkeddata_api/vocab_viewer/nrm/label.py b/src/linkeddata_api/vocab_viewer/nrm/label.py index 10ccf8d..b22c1a0 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/label.py +++ b/src/linkeddata_api/vocab_viewer/nrm/label.py @@ -36,6 +36,8 @@ def get( def _get_from_list_query(uris: list[str]) -> str: + # TODO: Currently, we try and fetch from TERN's controlled vocabularies. + # We may want to also fetch with a SERVICE query from other repositories in the future. template = Template( """ PREFIX skos: @@ -52,7 +54,7 @@ def _get_from_list_query(uris: list[str]) -> str: } UNION { # Also try and fetch label from TERN's controlled vocabularies. - SERVICE { + SERVICE { ?uri skos:prefLabel ?_label . } } diff --git a/src/linkeddata_api/vocab_viewer/nrm/namespaces.py b/src/linkeddata_api/vocab_viewer/nrm/namespaces.py new file mode 100644 index 0000000..84e5eca --- /dev/null +++ b/src/linkeddata_api/vocab_viewer/nrm/namespaces.py @@ -0,0 +1,3 @@ +from rdflib import Namespace + +TERN = Namespace("https://w3id.org/tern/ontologies/tern/") diff --git a/src/linkeddata_api/vocab_viewer/nrm/resource.py b/src/linkeddata_api/vocab_viewer/nrm/resource.py index ccefe93..584ecc6 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/resource.py +++ b/src/linkeddata_api/vocab_viewer/nrm/resource.py @@ -1,17 +1,99 @@ -from rdflib import RDF +from rdflib import RDF, RDFS, SKOS, SDO, DCTERMS from linkeddata_api.vocab_viewer import nrm +from linkeddata_api.vocab_viewer.nrm.namespaces import TERN -def get( - uri: str, - profile: str = None, # TODO: Add presentation handling for different kinds of data - sparql_endpoint: str = "https://graphdb.tern.org.au/repositories/dawe_vocabs_core", -) -> nrm.schema.Resource: +def _exists_uri(target_uri: str, uris: list[nrm.schema.URI]) -> bool: + for uri in uris: + if uri.value == target_uri: + return True + return False + + +def _add_and_remove_property( + predicate_uri: str, + old_list: list[nrm.schema.PredicateObjects], + new_list: list[nrm.schema.PredicateObjects], +) -> None: + """Add and remove the PredicateObjects object if matched by predicate_uri in + the referenced lists, 'old_list' and 'new_list' + + Returns a copy of the PredicateObjects object. + """ + predicate_object = None + for property_ in old_list: + if property_.predicate.value == predicate_uri: + new_list.append(property_) + predicate_object = property_ + old_list.remove(property_) + return predicate_object + + +def _method_profile( + properties: list[nrm.schema.PredicateObjects], +) -> list[nrm.schema.PredicateObjects]: + new_properties = [] + + _add_and_remove_property(str(RDFS.isDefinedBy), properties, new_properties) + + # Omit skos:prefLabel + _add_and_remove_property(str(SKOS.prefLabel), properties, new_properties) + new_properties.pop() + + _add_and_remove_property(str(TERN), properties, new_properties) + _add_and_remove_property(str(SDO.url), properties, new_properties) + _add_and_remove_property(str(SKOS.memberList), properties, new_properties) + _add_and_remove_property(str(TERN.scope), properties, new_properties) + _add_and_remove_property(str(SKOS.definition), properties, new_properties) + _add_and_remove_property(str(TERN.purpose), properties, new_properties) + # TODO: Change to different property due to issue with RVA + _add_and_remove_property(str(DCTERMS.description), properties, new_properties) + _add_and_remove_property(str(TERN.equipment), properties, new_properties) + _add_and_remove_property(str(TERN.instructions), properties, new_properties) + _add_and_remove_property(str(SKOS.note), properties, new_properties) + _add_and_remove_property(str(DCTERMS.source), properties, new_properties) + _add_and_remove_property(str(TERN.appendix), properties, new_properties) + + return new_properties + properties + + +def _get_rdf_list_item_uris(uri: str, rows: list, sparql_endpoint: str) -> list[str]: + new_uris = [] + for row in rows: + if row["o"]["type"] == "bnode" and row["listItem"]["value"] == "true": + # TODO: error handling - move empty result exception to nrm.sparql.post/nrm.sparql.get + query = f""" + PREFIX skos: + PREFIX rdf: + SELECT DISTINCT ?p ?o + where {{ + BIND(<{row["p"]["value"]}> AS ?p) + <{uri}> ?p ?list . + ?list rdf:rest* ?rest . + ?rest rdf:first ?o . + }} + """ + result = nrm.sparql.post( + query, + sparql_endpoint, + ) + + for result_row in result["results"]["bindings"]: + new_uris.append(result_row) + + return new_uris + + +def get(uri: str, sparql_endpoint: str) -> nrm.schema.Resource: query = f""" - SELECT * + SELECT ?p ?o ?listItem ?listItemNumber WHERE {{ <{uri}> ?p ?o . + BIND(EXISTS{{?o rdf:rest ?rest}} as ?listItem) + + # This gets set later with the listItemNumber value. + BIND(0 AS ?listItemNumber) }} """ @@ -25,9 +107,35 @@ def get( uri_values = filter( lambda x: x["o"]["type"] == "uri", result["results"]["bindings"] ) + uri_values = [value["o"]["value"] for value in uri_values] uri_values.append(uri) + # Replace value of blank node list head with items. + list_items = _get_rdf_list_item_uris( + uri, result["results"]["bindings"], sparql_endpoint + ) + + for row in list_items: + uri_values.append(row["o"]["value"]) + + for i, list_item in enumerate(list_items): + list_item.update( + { + "listItem": { + "datatype": "http://www.w3.org/2001/XMLSchema#boolean", + "type": "literal", + "value": "true", + }, + "listItemNumber": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": str(i), + }, + } + ) + result["results"]["bindings"].append(list_item) + uri_label_index = nrm.label.get_from_list(uri_values, sparql_endpoint) label = nrm.label.get(uri, sparql_endpoint) or uri @@ -57,6 +165,10 @@ def get( label=predicate_label, value=row["p"]["value"], internal=uri_internal_index.get(row["p"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=int(row["listItemNumber"]["value"]) + if row["listItem"]["value"] == "true" + else None, ) if row["o"]["type"] == "uri": object_label = uri_label_index.get( @@ -66,6 +178,10 @@ def get( label=object_label, value=row["o"]["value"], internal=uri_internal_index.get(row["o"]["value"], False), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, ) elif row["o"]["type"] == "literal": datatype = row["o"].get("datatype", "") @@ -74,6 +190,12 @@ def get( label=datatype, value=datatype, internal=uri_internal_index.get(datatype, False), + list_item=True + if row["listItem"]["value"] == "true" + else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, ) else: datatype = None @@ -82,6 +204,10 @@ def get( value=row["o"]["value"], datatype=datatype, language=row["o"].get("xml:lang", ""), + list_item=True if row["listItem"]["value"] == "true" else False, + list_item_number=row["listItemNumber"]["value"] + if row["listItem"]["value"] == "true" + else None, ) elif row["o"]["type"] == "bnode": # TODO: Handle blank nodes. @@ -101,8 +227,29 @@ def get( nrm.schema.PredicateObjects(predicate=predicate, objects=[item]) ) + # Duplicates may occur due to processing RDF lists. + # Remove duplicates, if any. + for property_ in properties: + if property_.predicate.list_item: + for obj in property_.objects: + if not obj.list_item: + property_.objects.remove(obj) + + # Sort all property objects by label. + properties.sort(key=lambda x: x.predicate.label) + for property_ in properties: + property_.objects.sort(key=sort_property_objects) + + profile = "" + if _exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): + profile = "https://w3id.org/tern/ontologies/tern/MethodCollection" + properties = _method_profile(properties) + elif _exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): + profile = "https://w3id.org/tern/ontologies/tern/Method" + properties = _method_profile(properties) + return nrm.schema.Resource( - uri=uri, label=label, types=types, properties=properties + uri=uri, profile=profile, label=label, types=types, properties=properties ) except nrm.exceptions.SPARQLNotFoundError as err: raise err @@ -110,3 +257,13 @@ def get( raise nrm.exceptions.SPARQLResultJSONError( f"Unexpected SPARQL result.\n{result}\n{err}" ) from err + + +def sort_property_objects(x): + if x.list_item: + return x.list_item_number + else: + if x.type == "uri": + return x.label + else: + return x.value diff --git a/src/linkeddata_api/vocab_viewer/nrm/schema.py b/src/linkeddata_api/vocab_viewer/nrm/schema.py index b790648..b9fc700 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/schema.py +++ b/src/linkeddata_api/vocab_viewer/nrm/schema.py @@ -11,19 +11,31 @@ class Item(BaseModel): modified: str = None -class URI(BaseModel): +class RDFListItemMixin(BaseModel): + list_item: bool = False + list_item_number: int | None = None + + +class URI(RDFListItemMixin): type: str = "uri" label: str value: str internal: bool + def __hash__(self): + return hash(self.value) -class Literal(BaseModel): + +class Literal(RDFListItemMixin): type: str = "literal" value: str datatype: URI = None language: str = "" + def __hash__(self): + datatype = self.datatype.value if self.datatype else "" + return hash(self.value + datatype + self.language) + class PredicateObjects(BaseModel): predicate: URI @@ -32,6 +44,7 @@ class PredicateObjects(BaseModel): class Resource(BaseModel): uri: str + profile: str = "" label: str types: list[URI] properties: list[PredicateObjects] diff --git a/src/linkeddata_api/vocab_viewer/nrm/sparql.py b/src/linkeddata_api/vocab_viewer/nrm/sparql.py index 0d53bdc..b390987 100644 --- a/src/linkeddata_api/vocab_viewer/nrm/sparql.py +++ b/src/linkeddata_api/vocab_viewer/nrm/sparql.py @@ -16,4 +16,24 @@ def post(query: str, sparql_endpoint: str) -> dict: except requests.exceptions.HTTPError as err: raise nrm.exceptions.RequestError(err.response.text) from err + # TODO: raise empty response error here. + + return response.json() + + +def get(query: str, sparql_endpoint: str) -> dict: + headers = { + "accept": "application/sparql-results+json", + } + params = {"query": query} + + response = requests.get(url=sparql_endpoint, headers=headers, params=params) + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as err: + raise nrm.exceptions.RequestError(err.response.text) from err + + # TODO: raise empty response error here. + return response.json()