Skip to content

Commit

Permalink
feature/mx-1673 add matched test data (#227)
Browse files Browse the repository at this point in the history
# PR Context
- prep for #226

# Added
- add two matched organizations to the test dummy data

# Changed
- rename `stable_target_id` to more appropriate `identifier` argument
for merged queries
  • Loading branch information
cutoffthetop authored Jan 9, 2025
1 parent 412cf26 commit 735c51d
Show file tree
Hide file tree
Showing 17 changed files with 500 additions and 209 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- add `extracted_or_rule_labels` to query builder globals
- add two matched organizations to the test dummy data

### Changes

- rename short and obscure cypher query variables to more expressive and verbose ones
- rename `stable_target_id` to more appropriate `identifier` argument for merged queries

### Deprecated

Expand Down
8 changes: 4 additions & 4 deletions mex/backend/graph/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def fetch_rule_items(
def fetch_merged_items(
self,
query_string: str | None,
stable_target_id: str | None,
identifier: str | None,
entity_type: Sequence[str] | None,
skip: int,
limit: int,
Expand All @@ -298,7 +298,7 @@ def fetch_merged_items(
Args:
query_string: Optional full text search query term
stable_target_id: Optional stable target ID filter
identifier: Optional merged item identifier filter
entity_type: Optional merged entity type filter
skip: How many items to skip for pagination
limit: How many items to return at most
Expand All @@ -309,12 +309,12 @@ def fetch_merged_items(
query_builder = QueryBuilder.get()
query = query_builder.fetch_merged_items(
filter_by_query_string=bool(query_string),
filter_by_stable_target_id=bool(stable_target_id),
filter_by_identifier=bool(identifier),
)
result = self.commit(
query,
query_string=query_string,
stable_target_id=stable_target_id,
identifier=identifier,
labels=entity_type or list(MERGED_MODEL_CLASSES_BY_NAME),
skip=skip,
limit=limit,
Expand Down
6 changes: 3 additions & 3 deletions mex/backend/graph/cypher/fetch_merged_items.cql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Globals:

Args:
filter_by_query_string: Whether the final query should accept a full text query string
filter_by_stable_target_id: Whether the final query should filter by stableTargetId
filter_by_identifier: Whether the final query should filter by the merged identifier

Returns:
total: Count of all items found by this query
Expand All @@ -31,8 +31,8 @@ CALL () {
<%- if filter_by_query_string %>
<<and_()>>elementId(hit) = elementId(extracted_or_rule_node)
<%- endif %>
<%- if filter_by_stable_target_id %>
<<and_()>>merged_node.identifier = $stable_target_id
<%- if filter_by_identifier %>
<<and_()>>merged_node.identifier = $identifier
<%- endif %>
<<and_()>>ANY(label IN labels(merged_node) WHERE label IN $labels)
WITH DISTINCT merged_node AS merged_node
Expand Down
10 changes: 5 additions & 5 deletions mex/backend/merged/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def merge_search_result_item(
@overload
def search_merged_items_in_graph(
query_string: str | None = None,
stable_target_id: str | None = None,
identifier: str | None = None,
entity_type: list[str] | None = None,
skip: int = 0,
limit: int = 100,
Expand All @@ -235,7 +235,7 @@ def search_merged_items_in_graph(
@overload
def search_merged_items_in_graph(
query_string: str | None = None,
stable_target_id: str | None = None,
identifier: str | None = None,
entity_type: list[str] | None = None,
skip: int = 0,
limit: int = 100,
Expand All @@ -245,7 +245,7 @@ def search_merged_items_in_graph(

def search_merged_items_in_graph( # noqa: PLR0913
query_string: str | None = None,
stable_target_id: str | None = None,
identifier: str | None = None,
entity_type: list[str] | None = None,
skip: int = 0,
limit: int = 100,
Expand All @@ -255,7 +255,7 @@ def search_merged_items_in_graph( # noqa: PLR0913
Args:
query_string: Full text search query term
stable_target_id: Optional stable target ID filter
identifier: Optional merged item identifier filter
entity_type: Optional entity type filter
skip: How many items to skip for pagination
limit: How many items to return at most
Expand All @@ -272,7 +272,7 @@ def search_merged_items_in_graph( # noqa: PLR0913
graph = GraphConnector.get()
result = graph.fetch_merged_items(
query_string=query_string,
stable_target_id=stable_target_id,
identifier=identifier,
entity_type=entity_type,
skip=skip,
limit=limit,
Expand Down
14 changes: 7 additions & 7 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies = [
"fastapi>=0.115,<1",
"httpx>=0.27,<1",
"jinja2>=3,<4",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.45.0",
"mex-common @ git+https://github.com/robert-koch-institut/mex-common.git@0.46.0",
"neo4j>=5,<6",
"pydantic>=2,<3",
"starlette>=0.41,<1",
Expand Down
60 changes: 53 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pytest import MonkeyPatch

from mex.backend.graph.connector import GraphConnector
from mex.backend.identity.provider import GraphIdentityProvider
from mex.backend.main import app
from mex.backend.rules.helpers import create_and_get_rule_set
from mex.backend.settings import BackendSettings
Expand All @@ -22,6 +23,7 @@
AnyExtractedModel,
ExtractedActivity,
ExtractedContactPoint,
ExtractedOrganization,
ExtractedOrganizationalUnit,
ExtractedPrimarySource,
OrganizationalUnitRuleSetRequest,
Expand Down Expand Up @@ -182,7 +184,7 @@ def generate(cls: type[Identifier], seed: int | None = None) -> Identifier:
@pytest.fixture(autouse=True)
def set_identity_provider(is_integration_test: bool, monkeypatch: MonkeyPatch) -> None:
"""Ensure the identifier provider is set correctly for unit and int tests."""
# yuck, all this needs cleaning up after MX-1596
# TODO(ND): yuck, all this needs cleaning up after MX-1596
for settings in (BaseSettings.get(), BackendSettings.get()):
if is_integration_test:
monkeypatch.setitem(settings.model_config, "validate_assignment", False)
Expand Down Expand Up @@ -234,7 +236,9 @@ def get_graph() -> list[dict[str, Any]]:


@pytest.fixture
def dummy_data() -> dict[str, AnyExtractedModel]:
def dummy_data(
set_identity_provider: None, # noqa: ARG001
) -> dict[str, AnyExtractedModel]:
"""Create a set of interlinked dummy data."""
primary_source_1 = ExtractedPrimarySource(
hadPrimarySource=MEX_PRIMARY_SOURCE_STABLE_TARGET_ID,
Expand All @@ -255,16 +259,34 @@ def dummy_data() -> dict[str, AnyExtractedModel]:
hadPrimarySource=primary_source_1.stableTargetId,
identifierInPrimarySource="cp-2",
)
organization_1 = ExtractedOrganization(
hadPrimarySource=primary_source_1.stableTargetId,
identifierInPrimarySource="rki",
officialName=[
Text(value="RKI", language=TextLanguage.DE),
Text(value="Robert Koch Institut ist the best", language=TextLanguage.DE),
],
)
organization_2 = ExtractedOrganization(
hadPrimarySource=primary_source_2.stableTargetId,
identifierInPrimarySource="robert-koch-institute",
officialName=[
Text(value="RKI", language=TextLanguage.DE),
Text(value="Robert Koch Institute", language=TextLanguage.EN),
],
)
organizational_unit_1 = ExtractedOrganizationalUnit(
hadPrimarySource=primary_source_2.stableTargetId,
identifierInPrimarySource="ou-1",
name=[Text(value="Unit 1", language=TextLanguage.EN)],
unitOf=[organization_1.stableTargetId],
)
organizational_unit_2 = ExtractedOrganizationalUnit(
hadPrimarySource=primary_source_2.stableTargetId,
identifierInPrimarySource="ou-1.6",
name=[Text(value="Unit 1.6", language=TextLanguage.EN)],
parentUnit=organizational_unit_1.stableTargetId,
unitOf=[organization_1.stableTargetId],
)
activity_1 = ExtractedActivity(
abstract=[
Expand All @@ -289,18 +311,42 @@ def dummy_data() -> dict[str, AnyExtractedModel]:
"primary_source_2": primary_source_2,
"contact_point_1": contact_point_1,
"contact_point_2": contact_point_2,
"organization_1": organization_1,
"organization_2": organization_2,
"organizational_unit_1": organizational_unit_1,
"organizational_unit_2": organizational_unit_2,
"activity_1": activity_1,
}


def _match_organization_items(dummy_data: dict[str, AnyExtractedModel]) -> None:
# TODO(ND): replace this crude item matching implementation (stopgap MX-1530)
connector = GraphConnector.get()
# remove the merged item for org2
connector.commit(
f"""\
MATCH(n) WHERE n.identifier='{dummy_data['organization_2'].stableTargetId}'
DETACH DELETE n;"""
)
# connect the extracted item for org2 with the merged item for org1
connector.commit(
f"""\
MATCH(n :ExtractedOrganization) WHERE n.identifier = '{dummy_data['organization_2'].identifier}'
MATCH(m :MergedOrganization) WHERE m.identifier = '{dummy_data['organization_1'].stableTargetId}'
MERGE (n)-[:stableTargetId {{position:0}}]->(m);"""
)
# clear the identity provider cache to refresh the `stableTargetId` property on org2
provider = GraphIdentityProvider.get()
provider._cached_assign.cache_clear()


@pytest.fixture
def load_dummy_data(
dummy_data: dict[str, AnyExtractedModel],
) -> dict[str, AnyExtractedModel]:
"""Ingest dummy data into the graph."""
GraphConnector.get().ingest(list(dummy_data.values()))
_match_organization_items(dummy_data)
return dummy_data


Expand Down Expand Up @@ -329,19 +375,19 @@ def organizational_unit_rule_set_request(
@pytest.fixture
def load_dummy_rule_set(
organizational_unit_rule_set_request: OrganizationalUnitRuleSetRequest,
dummy_data: dict[str, AnyExtractedModel],
load_dummy_data: dict[str, AnyExtractedModel],
) -> OrganizationalUnitRuleSetResponse:
GraphConnector.get().ingest(
[
dummy_data["primary_source_2"],
dummy_data["organizational_unit_1"],
dummy_data["organizational_unit_2"],
load_dummy_data["primary_source_2"],
load_dummy_data["organizational_unit_1"],
load_dummy_data["organizational_unit_2"],
]
)
return cast(
OrganizationalUnitRuleSetResponse,
create_and_get_rule_set(
organizational_unit_rule_set_request,
stable_target_id=dummy_data["organizational_unit_2"].stableTargetId,
stable_target_id=load_dummy_data["organizational_unit_2"].stableTargetId,
),
)
30 changes: 15 additions & 15 deletions tests/extracted/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,23 +86,23 @@ def test_search_extracted_items_mocked(
"version": None,
}
],
"total": 8,
"total": 10,
},
),
(
"?limit=1&skip=6",
"?limit=1&skip=9",
{
"items": [
{
"$type": "ExtractedContactPoint",
"email": ["[email protected]"],
"hadPrimarySource": "bFQoRhcVH5DHUr",
"identifier": "bFQoRhcVH5DHUw",
"identifier": "bFQoRhcVH5DHUy",
"identifierInPrimarySource": "cp-1",
"stableTargetId": "bFQoRhcVH5DHUx",
"stableTargetId": "bFQoRhcVH5DHUz",
}
],
"total": 8,
"total": 10,
},
),
(
Expand All @@ -111,18 +111,18 @@ def test_search_extracted_items_mocked(
"items": [
{
"$type": "ExtractedContactPoint",
"email": ["info@contact-point.one"],
"email": ["help@contact-point.two"],
"hadPrimarySource": "bFQoRhcVH5DHUr",
"identifier": "bFQoRhcVH5DHUw",
"identifierInPrimarySource": "cp-1",
"stableTargetId": "bFQoRhcVH5DHUx",
"identifier": "bFQoRhcVH5DHUA",
"identifierInPrimarySource": "cp-2",
"stableTargetId": "bFQoRhcVH5DHUB",
},
{
"$type": "ExtractedContactPoint",
"email": ["help@contact-point.two"],
"email": ["info@contact-point.one"],
"hadPrimarySource": "bFQoRhcVH5DHUr",
"identifier": "bFQoRhcVH5DHUy",
"identifierInPrimarySource": "cp-2",
"identifierInPrimarySource": "cp-1",
"stableTargetId": "bFQoRhcVH5DHUz",
},
],
Expand Down Expand Up @@ -153,21 +153,21 @@ def test_search_extracted_items_mocked(
},
),
(
"?stableTargetId=bFQoRhcVH5DHUv",
"?stableTargetId=bFQoRhcVH5DHUx",
{
"items": [
{
"$type": "ExtractedOrganizationalUnit",
"alternativeName": [],
"email": [],
"hadPrimarySource": "bFQoRhcVH5DHUt",
"identifier": "bFQoRhcVH5DHUu",
"identifier": "bFQoRhcVH5DHUw",
"identifierInPrimarySource": "ou-1",
"name": [{"language": "en", "value": "Unit 1"}],
"parentUnit": None,
"shortName": [],
"stableTargetId": "bFQoRhcVH5DHUv",
"unitOf": [],
"stableTargetId": "bFQoRhcVH5DHUx",
"unitOf": ["bFQoRhcVH5DHUv"],
"website": [],
}
],
Expand Down
Loading

0 comments on commit 735c51d

Please sign in to comment.