From 890df0f84920f5974bed31d223de136ec5b7084c Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Wed, 4 Jan 2023 07:31:07 +0000 Subject: [PATCH 1/2] Use sets instead of lists when performing checks --- src/linkeddata_api/domain/schema.py | 2 +- .../domain/viewer/resource/json/__init__.py | 57 +++++++++---------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/src/linkeddata_api/domain/schema.py b/src/linkeddata_api/domain/schema.py index 19fd668..770ca52 100644 --- a/src/linkeddata_api/domain/schema.py +++ b/src/linkeddata_api/domain/schema.py @@ -46,7 +46,7 @@ class SubjectPredicates(BaseModel): class PredicateObjects(BaseModel): predicate: URI - objects: list[Union[URI, Literal]] + objects: set[Union[URI, Literal]] class Resource(BaseModel): diff --git a/src/linkeddata_api/domain/viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py index a7c66bb..7ee8652 100644 --- a/src/linkeddata_api/domain/viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -130,36 +130,29 @@ def get(uri: str, sparql_endpoint: str) -> domain.schema.Resource: result = data.sparql.post(query, sparql_endpoint).json() - try: - result = _add_rows_for_rdf_list_items(result, uri, sparql_endpoint) - label = domain.label.get(uri, sparql_endpoint) or uri - types, properties = _get_types_and_properties(result, uri, sparql_endpoint) - - profile = "" - if exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): - profile = "https://w3id.org/tern/ontologies/tern/MethodCollection" - properties = method_profile(properties) - elif exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): - profile = "https://w3id.org/tern/ontologies/tern/Method" - properties = method_profile(properties) - - # incoming_properties = _get_incoming_properties(uri, sparql_endpoint) - - return domain.schema.Resource( - uri=uri, - profile=profile, - label=label, - types=types, - properties=properties, - # incoming_properties=incoming_properties, - incoming_properties=[], # TODO: - ) - except data.exceptions.SPARQLNotFoundError as err: - raise err - except Exception as err: - raise data.exceptions.SPARQLResultJSONError( - f"Unexpected SPARQL result.\n{result}\n{err}" - ) from err + result = _add_rows_for_rdf_list_items(result, uri, sparql_endpoint) + label = domain.label.get(uri, sparql_endpoint) or uri + types, properties = _get_types_and_properties(result, uri, sparql_endpoint) + + profile = "" + if exists_uri("https://w3id.org/tern/ontologies/tern/MethodCollection", types): + profile = "https://w3id.org/tern/ontologies/tern/MethodCollection" + properties = method_profile(properties) + elif exists_uri("https://w3id.org/tern/ontologies/tern/Method", types): + profile = "https://w3id.org/tern/ontologies/tern/Method" + properties = method_profile(properties) + + # incoming_properties = _get_incoming_properties(uri, sparql_endpoint) + + return domain.schema.Resource( + uri=uri, + profile=profile, + label=label, + types=types, + properties=properties, + # incoming_properties=incoming_properties, + incoming_properties=[], # TODO: + ) @log_time @@ -319,11 +312,12 @@ def _get_types_and_properties( ) found = False + for p in properties: if p.predicate.value == predicate.value: found = True if item not in p.objects: - p.objects.append(item) + p.objects.add(item) if not found: properties.append( @@ -341,6 +335,7 @@ def _get_types_and_properties( # Sort all property objects by label. properties.sort(key=lambda x: x.predicate.label) for property_ in properties: + property_.objects = list(property_.objects) property_.objects.sort(key=sort_property_objects) return types, properties From 151008b84b660599c8b087e85cd97d00673355bf Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Thu, 5 Jan 2023 00:58:47 +0000 Subject: [PATCH 2/2] Use dict and set for performance and reduce time complexity of lookups --- .../domain/viewer/resource/json/__init__.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/linkeddata_api/domain/viewer/resource/json/__init__.py b/src/linkeddata_api/domain/viewer/resource/json/__init__.py index 7ee8652..4c845cf 100644 --- a/src/linkeddata_api/domain/viewer/resource/json/__init__.py +++ b/src/linkeddata_api/domain/viewer/resource/json/__init__.py @@ -1,4 +1,6 @@ import logging +from typing import Union +from collections import defaultdict from rdflib import RDF @@ -224,7 +226,9 @@ def _get_types_and_properties( ) -> tuple[list[domain.schema.URI], list[domain.schema.PredicateObjects]]: types: list[domain.schema.URI] = [] - properties: list[domain.schema.PredicateObjects] = [] + properties: dict[ + str, set[Union[domain.schema.URI, domain.schema.Literal]] + ] = defaultdict(set) # An index of URIs with label values. uri_label_index = _get_uri_label_index(result, uri, sparql_endpoint) @@ -311,18 +315,14 @@ def _get_types_and_properties( f"Expected type to be uri or literal but got {row['o']['type']}" ) - found = False + # Use dict and set for performance + properties[predicate].add(item) - for p in properties: - if p.predicate.value == predicate.value: - found = True - if item not in p.objects: - p.objects.add(item) - - if not found: - properties.append( - domain.schema.PredicateObjects(predicate=predicate, objects=[item]) - ) + # Convert to a list of PredicateObjects + properties = [ + domain.schema.PredicateObjects(predicate=k, objects=v) + for k, v in properties.items() + ] # Duplicates may occur due to processing RDF lists. # Remove duplicates, if any.