diff --git a/CHANGELOG.md b/CHANGELOG.md index a0c4fef..2ab5482 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Unit tests for the migrated tools - CRUDs. - BlueNaas CRUD tools +- Cell types, resolving and utils tests - app unit tests - Tests of AgentsRoutine. - Unit tests for database diff --git a/swarm_copy_tests/data/kg_cell_types_hierarchy_test.json b/swarm_copy_tests/data/kg_cell_types_hierarchy_test.json index e7cb024..294e509 100644 --- a/swarm_copy_tests/data/kg_cell_types_hierarchy_test.json +++ b/swarm_copy_tests/data/kg_cell_types_hierarchy_test.json @@ -10,4 +10,4 @@ {"@id": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC", "@type": "Class", "label": "L23_PTPC", "subClassOf": ["https://bbp.epfl.ch/ontologies/core/bmo/HumanNeocortexMType", "https://neuroshapes.org/PyramidalNeuron", "https://bbp.epfl.ch/ontologies/core/bmo/NeuronMorphologicalType", "https://neuroshapes.org/MType", "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType"], "notation": "L2_MC", "atlasRelease": {"@id": "https://bbp.epfl.ch/neurosciencegraph/data/brainatlasrelease/c96c71a8-4c0d-4bc1-8a1a-141d9ed6693d", "@type": "BrainAtlasRelease", "_rev": 45}} ], "label": "Cell Types Ontology" -} \ No newline at end of file +} diff --git a/swarm_copy_tests/test_cell_types.py b/swarm_copy_tests/test_cell_types.py new file mode 100644 index 0000000..aabd7e5 --- /dev/null +++ b/swarm_copy_tests/test_cell_types.py @@ -0,0 +1,190 @@ +"""Test cell types meta functions.""" +import logging +from pathlib import Path + +import pytest + +from swarm_copy.cell_types import CellTypesMeta, get_celltypes_descendants + +CELL_TYPES_FILE = Path(__file__).parent / "data" / "kg_cell_types_hierarchy_test.json" + + +@pytest.mark.parametrize( + "cell_type_id,expected_descendants", + [ + ( + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC", + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + }, + ), + ( + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + { + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + }, + ), + ( + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + }, + ), + ], +) +def test_get_celltypes_descendants(cell_type_id, expected_descendants, tmp_path): + cell_types_meta = CellTypesMeta.from_json(CELL_TYPES_FILE) + save_file = tmp_path / "tmp_config_cell_types_meta.json" + cell_types_meta.save_config(save_file) + + descendants = get_celltypes_descendants(cell_type_id, json_path=save_file) + assert expected_descendants == descendants + + +class TestCellTypesMeta: + def test_from_json(self): + ct_meta = CellTypesMeta.from_json(CELL_TYPES_FILE) + assert isinstance(ct_meta.name_, dict) + assert isinstance(ct_meta.descendants_ids, dict) + + expected_names = { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint": "cACint", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC": "GCL_GC", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC": ( + "L23_PTPC" + ), + } + + assert ct_meta.name_ == expected_names + assert ct_meta.descendants_ids[ + "https://bbp.epfl.ch/ontologies/core/mtypes/HippocampusMType" + ] == {"http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC"} + assert ct_meta.descendants_ids[ + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType" + ] == { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC", + } + + def test_from_dict(self): + test_dict = { + "defines": [ + { + "@id": "id1", + "label": "cell1", + "subClassOf": [] + }, + { + "@id": "id2", + "label": "cell2", + "subClassOf": ["id1"] + }, + { + "@id": "id3", + "subClassOf": ["id2"] + } + ] + } + cell_meta = CellTypesMeta.from_dict(test_dict) + assert isinstance(cell_meta, CellTypesMeta) + assert cell_meta.name_ == {"id1": "cell1", "id2": "cell2", "id3": None} + assert cell_meta.descendants_ids == {"id1": {"id2", "id3"}, "id2": {"id3"}} + + def test_from_dict_missing_label(self): + test_dict = { + "defines": [ + { + "@id": "id1", + "subClassOf": [] + }, + { + "@id": "id2", + "subClassOf": ["id1"] + } + ] + } + cell_meta = CellTypesMeta.from_dict(test_dict) + assert cell_meta.name_ == {"id1": None, "id2": None} + assert cell_meta.descendants_ids == {"id1": {"id2"}} + + def test_from_dict_missing_subClassOf(self): + test_dict = { + "defines": [ + { + "@id": "id1", + "label": "cell1", + }, + { + "@id": "id2", + "label": "cell2", + "subClassOf": ["id1"] + } + ] + } + cell_meta = CellTypesMeta.from_dict(test_dict) + assert cell_meta.name_ == {"id1": "cell1", "id2": "cell2"} + assert cell_meta.descendants_ids == {"id1": {"id2"}} + + @pytest.mark.parametrize( + "cell_type_id,expected_descendants", + [ + ( + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC", + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + }, + ), + ( + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + { + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + }, + ), + ( + [ + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + ], + { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/L23_PTPC", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes/GCL_GC", + "https://bbp.epfl.ch/ontologies/core/bmo/BrainCellType", + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + }, + ), + ( + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + { + "https://bbp.epfl.ch/ontologies/core/bmo/NeuronElectricalType", + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + }, + ), + ( + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + { + "http://bbp.epfl.ch/neurosciencegraph/ontologies/etypes/cACint", + }, + ), + ], + ) + def test_descendants(self, cell_type_id, expected_descendants): + ct_meta = CellTypesMeta.from_json(CELL_TYPES_FILE) + assert ct_meta.descendants(cell_type_id) == expected_descendants + + def test_load_and_save_config(self, tmp_path): + ct_meta = CellTypesMeta.from_json(CELL_TYPES_FILE) + file_path = tmp_path / "ct_meta_tmp.json" + ct_meta.save_config(file_path) + ct_meta2 = CellTypesMeta.load_config(file_path) + assert ct_meta.name_ == ct_meta2.name_ + assert ct_meta.descendants_ids == ct_meta2.descendants_ids diff --git a/swarm_copy_tests/test_resolving.py b/swarm_copy_tests/test_resolving.py new file mode 100644 index 0000000..1378416 --- /dev/null +++ b/swarm_copy_tests/test_resolving.py @@ -0,0 +1,319 @@ +import pytest +from httpx import AsyncClient + +from swarm_copy.resolving import ( + es_resolve, + escape_punctuation, + resolve_query, + sparql_exact_resolve, + sparql_fuzzy_resolve, +) + + +@pytest.mark.asyncio +async def test_sparql_exact_resolve(httpx_mock, get_resolve_query_output): + brain_region = "Thalamus" + url = "http://fakeurl.com" + mocked_response = get_resolve_query_output[0] + httpx_mock.add_response( + url=url, + json=mocked_response, + ) + response = await sparql_exact_resolve( + query=brain_region, + resource_type="nsg:BrainRegion", + sparql_view_url=url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + ) + assert response == [ + { + "label": "Thalamus", + "id": "http://api.brain-map.org/api/v2/data/Structure/549", + } + ] + + httpx_mock.reset() + + mtype = "Interneuron" + mocked_response = get_resolve_query_output[1] + httpx_mock.add_response( + url=url, + json=mocked_response, + ) + response = await sparql_exact_resolve( + query=mtype, + resource_type="bmo:BrainCellType", + sparql_view_url=url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + ) + assert response == [ + {"label": "Interneuron", "id": "https://neuroshapes.org/Interneuron"} + ] + + +@pytest.mark.asyncio +async def test_sparql_fuzzy_resolve(httpx_mock, get_resolve_query_output): + brain_region = "Field" + url = "http://fakeurl.com" + mocked_response = get_resolve_query_output[2] + httpx_mock.add_response( + url=url, + json=mocked_response, + ) + response = await sparql_fuzzy_resolve( + query=brain_region, + resource_type="nsg:BrainRegion", + sparql_view_url=url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + ) + assert response == [ + { + "label": "Field CA1", + "id": "http://api.brain-map.org/api/v2/data/Structure/382", + }, + { + "label": "Field CA2", + "id": "http://api.brain-map.org/api/v2/data/Structure/423", + }, + { + "label": "Field CA3", + "id": "http://api.brain-map.org/api/v2/data/Structure/463", + }, + ] + httpx_mock.reset() + + mtype = "Interneu" + mocked_response = get_resolve_query_output[3] + httpx_mock.add_response( + url=url, + json=mocked_response, + ) + response = await sparql_fuzzy_resolve( + query=mtype, + resource_type="bmo:BrainCellType", + sparql_view_url=url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + ) + assert response == [ + {"label": "Interneuron", "id": "https://neuroshapes.org/Interneuron"}, + { + "label": "Hippocampus CA3 Oriens Interneuron", + "id": "http://uri.interlex.org/base/ilx_0105044", + }, + { + "label": "Spinal Cord Ventral Horn Interneuron IA", + "id": "http://uri.interlex.org/base/ilx_0110929", + }, + ] + + +@pytest.mark.asyncio +async def test_es_resolve(httpx_mock, get_resolve_query_output): + brain_region = "Auditory Cortex" + mocked_response = get_resolve_query_output[4] + httpx_mock.add_response( + url="http://goodurl.com", + json=mocked_response, + ) + response = await es_resolve( + query=brain_region, + resource_type="nsg:BrainRegion", + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + es_view_url="http://goodurl.com", + ) + assert response == [ + { + "label": "Cerebral cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/688", + }, + { + "label": "Cerebellar cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/528", + }, + { + "label": "Frontal pole, cerebral cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/184", + }, + ] + httpx_mock.reset() + + mtype = "Ventral neuron" + mocked_response = get_resolve_query_output[5] + httpx_mock.add_response( + url="http://goodurl.com", + json=mocked_response, + ) + response = await es_resolve( + query=mtype, + resource_type="bmo:BrainCellType", + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + es_view_url="http://goodurl.com", + ) + assert response == [ + { + "label": "Ventral Tegmental Area Dopamine Neuron", + "id": "http://uri.interlex.org/base/ilx_0112352", + }, + { + "label": "Spinal Cord Ventral Horn Motor Neuron Gamma", + "id": "http://uri.interlex.org/base/ilx_0110943", + }, + { + "label": "Hypoglossal Nucleus Motor Neuron", + "id": "http://uri.interlex.org/base/ilx_0105169", + }, + ] + + +@pytest.mark.asyncio +async def test_resolve_query(httpx_mock, get_resolve_query_output): + url = "http://terribleurl.com" + class_view_url = "http://somewhatokurl.com" + # Mock exact match to fail + httpx_mock.add_response( + url=url, + json={ + "head": {"vars": ["subject", "predicate", "object", "context"]}, + "results": {"bindings": []}, + }, + ) + + # Hit fuzzy match + httpx_mock.add_response( + url=url, + json=get_resolve_query_output[2], + ) + + # Hit ES match + httpx_mock.add_response( + url=class_view_url, + json=get_resolve_query_output[4], + ) + response = await resolve_query( + query="Field", + resource_type="nsg:BrainRegion", + sparql_view_url=url, + es_view_url=class_view_url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + ) + assert response == [ + { + "label": "Field CA1", + "id": "http://api.brain-map.org/api/v2/data/Structure/382", + }, + { + "label": "Field CA2", + "id": "http://api.brain-map.org/api/v2/data/Structure/423", + }, + { + "label": "Field CA3", + "id": "http://api.brain-map.org/api/v2/data/Structure/463", + }, + ] + httpx_mock.reset() + + httpx_mock.add_response(url=url, json=get_resolve_query_output[0]) + + # Hit fuzzy match + httpx_mock.add_response( + url=url, + json={ + "head": {"vars": ["subject", "predicate", "object", "context"]}, + "results": {"bindings": []}, + }, + ) + + # Hit ES match + httpx_mock.add_response(url=class_view_url, json={"hits": {"hits": []}}) + + response = await resolve_query( + query="Thalamus", + resource_type="nsg:BrainRegion", + sparql_view_url=url, + es_view_url=class_view_url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + ) + assert response == [ + { + "label": "Thalamus", + "id": "http://api.brain-map.org/api/v2/data/Structure/549", + } + ] + httpx_mock.reset() + httpx_mock.add_response( + url=url, + json={ + "head": {"vars": ["subject", "predicate", "object", "context"]}, + "results": {"bindings": []}, + }, + ) + + # Hit fuzzy match + httpx_mock.add_response( + url=url, + json={ + "head": {"vars": ["subject", "predicate", "object", "context"]}, + "results": {"bindings": []}, + }, + ) + + # Hit ES match + httpx_mock.add_response( + url=class_view_url, + json=get_resolve_query_output[4], + ) + response = await resolve_query( + query="Auditory Cortex", + resource_type="nsg:BrainRegion", + sparql_view_url=url, + es_view_url=class_view_url, + token="greattokenpleasedontexpire", + httpx_client=AsyncClient(), + search_size=3, + ) + assert response == [ + { + "label": "Cerebral cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/688", + }, + { + "label": "Cerebellar cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/528", + }, + { + "label": "Frontal pole, cerebral cortex", + "id": "http://api.brain-map.org/api/v2/data/Structure/184", + }, + ] + + +@pytest.mark.parametrize( + "before,after", + [ + ("this is a text", "this is a text"), + ("this is text with punctuation!", "this is text with punctuation\\\\!"), + ], +) +def test_escape_punctuation(before, after): + assert after == escape_punctuation(before) + + +def test_failing_escape_punctuation(): + text = 15 # this is not a string + with pytest.raises(TypeError) as e: + escape_punctuation(text) + assert e.value.args[0] == "Only accepting strings." diff --git a/swarm_copy_tests/test_utils.py b/swarm_copy_tests/test_utils.py new file mode 100644 index 0000000..43579d8 --- /dev/null +++ b/swarm_copy_tests/test_utils.py @@ -0,0 +1,510 @@ +"""Test utility functions.""" + +import json +from pathlib import Path + +import pytest +from httpx import AsyncClient + +from swarm_copy.schemas import KGMetadata +from swarm_copy.utils import ( + RegionMeta, + get_descendants_id, + merge_chunk, + merge_fields, get_file_from_KG, is_lnmc, get_kg_data, +) + + +def test_merge_fields_str(): + target = {"key_1": "abc", "key_2": ""} + source = {"key_1": "def"} + merge_fields(target, source) + assert target == {"key_1": "abcdef", "key_2": ""} + + source = {"key_1": "", "key_2": ""} + target = {"key_1": "value_1"} + with pytest.raises(KeyError): + merge_fields(target, source) + + +def test_merge_fields_dict(): + target = {"key_1": "abc", "key_2": {"sub_key_1": "", "sub_key_2": "abc"}} + source = {"key_1": "def", "key_2": {"sub_key_1": "hello", "sub_key_2": "cba"}} + merge_fields(target, source) + assert target == { + "key_1": "abcdef", + "key_2": {"sub_key_1": "hello", "sub_key_2": "abccba"}, + } + + +def test_merge_chunk(): + message = { + "content": "", + "sender": "test agent", + "role": "assistant", + "function_call": None, + "tool_calls": [ + { + "function": {"arguments": "", "name": ""}, + "id": "", + "type": "", + } + ], + } + delta = { + "content": "Great content", + "function_call": None, + "refusal": None, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_NDiPAjDW4oLef44xIptVSAZC", + "function": {"arguments": "Thalamus", "name": "resolve-entities-tool"}, + "type": "function", + } + ], + } + merge_chunk(message, delta) + assert message == { + "content": "Great content", + "sender": "test agent", + "role": "assistant", + "function_call": None, + "tool_calls": [ + { + "function": {"arguments": "Thalamus", "name": "resolve-entities-tool"}, + "id": "call_NDiPAjDW4oLef44xIptVSAZC", + "type": "function", + } + ], + } + + +@pytest.mark.parametrize( + "brain_region_id,expected_descendants", + [ + ("brain-region-id/68", {"brain-region-id/68"}), + ( + "another-brain-region-id/985", + { + "another-brain-region-id/320", + "another-brain-region-id/648", + "another-brain-region-id/844", + "another-brain-region-id/882", + "another-brain-region-id/943", + "another-brain-region-id/985", + "another-brain-region-id/3718675619", + "another-brain-region-id/1758306548", + }, + ), + ( + "another-brain-region-id/369", + { + "another-brain-region-id/450", + "another-brain-region-id/369", + "another-brain-region-id/1026", + "another-brain-region-id/854", + "another-brain-region-id/577", + "another-brain-region-id/625", + "another-brain-region-id/945", + "another-brain-region-id/1890964946", + "another-brain-region-id/3693772975", + }, + ), + ( + "another-brain-region-id/178", + { + "another-brain-region-id/316", + "another-brain-region-id/178", + "another-brain-region-id/300", + "another-brain-region-id/1043765183", + }, + ), + ("brain-region-id/not-a-int", {"brain-region-id/not-a-int"}), + ], +) +def test_get_descendants(brain_region_id, expected_descendants, brain_region_json_path): + descendants = get_descendants_id(brain_region_id, json_path=brain_region_json_path) + assert expected_descendants == descendants + + +def test_get_descendants_errors(brain_region_json_path): + brain_region_id = "does-not-exits/1111111111" + with pytest.raises(KeyError): + get_descendants_id(brain_region_id, json_path=brain_region_json_path) + + +def test_RegionMeta_from_KG_dict(): + with open( + Path(__file__).parent / "data" / "KG_brain_regions_hierarchy_test.json" + ) as fh: + KG_hierarchy = json.load(fh) + + RegionMeta_test = RegionMeta.from_KG_dict(KG_hierarchy) + + # check names. + assert RegionMeta_test.name_[1] == "Tuberomammillary nucleus, ventral part" + assert ( + RegionMeta_test.name_[2] + == "Superior colliculus, motor related, intermediate gray layer" + ) + assert RegionMeta_test.name_[3] == "Primary Motor Cortex" + + # check parents / childrens. + assert RegionMeta_test.parent_id[1] == 2 + assert RegionMeta_test.parent_id[2] == 0 + assert RegionMeta_test.parent_id[3] == 2 + assert RegionMeta_test.children_ids[1] == [] + assert RegionMeta_test.children_ids[2] == [1, 3] + assert RegionMeta_test.children_ids[3] == [] + + +def test_RegionMeta_save_load(tmp_path: Path): + # load fake file from KG + with open( + Path(__file__).parent / "data" / "KG_brain_regions_hierarchy_test.json" + ) as fh: + KG_hierarchy = json.load(fh) + + RegionMeta_test = RegionMeta.from_KG_dict(KG_hierarchy) + + # save / load file. + json_file = tmp_path / "test.json" + RegionMeta_test.save_config(json_file) + RegionMeta_test.load_config(json_file) + + # check names. + assert RegionMeta_test.name_[1] == "Tuberomammillary nucleus, ventral part" + assert ( + RegionMeta_test.name_[2] + == "Superior colliculus, motor related, intermediate gray layer" + ) + assert RegionMeta_test.name_[3] == "Primary Motor Cortex" + + # check parents / childrens. + assert RegionMeta_test.parent_id[1] == 2 + assert RegionMeta_test.parent_id[2] == 0 + assert RegionMeta_test.parent_id[3] == 2 + assert RegionMeta_test.children_ids[1] == [] + assert RegionMeta_test.children_ids[2] == [1, 3] + assert RegionMeta_test.children_ids[3] == [] + + +def test_RegionMeta_load_real_file(brain_region_json_path): + RegionMeta_test = RegionMeta.load_config(brain_region_json_path) + + # check root. + assert RegionMeta_test.root_id == 997 + assert RegionMeta_test.parent_id[997] == 0 + + # check some names / st_levels. + assert RegionMeta_test.name_[123] == "Koelliker-Fuse subnucleus" + assert RegionMeta_test.name_[78] == "middle cerebellar peduncle" + assert RegionMeta_test.st_level[55] == 10 + + # check some random parents / childrens. + assert RegionMeta_test.parent_id[12] == 165 + assert RegionMeta_test.parent_id[78] == 752 + assert RegionMeta_test.parent_id[700] == 88 + assert RegionMeta_test.parent_id[900] == 840 + assert RegionMeta_test.children_ids[12] == [] + assert RegionMeta_test.children_ids[23] == [] + assert RegionMeta_test.children_ids[670] == [2260827822, 3562104832] + assert RegionMeta_test.children_ids[31] == [1053, 179, 227, 39, 48, 572, 739] + + +@pytest.mark.asyncio +async def test_get_file_from_KG_errors(httpx_mock): + file_url = "http://fake_url.com" + file_name = "fake_file" + view_url = "http://fake_url_view.com" + token = "fake_token" + client = AsyncClient() + + # first response from KG is not a json + httpx_mock.add_response(url=view_url, text="not a json") + + with pytest.raises(ValueError) as not_json: + await get_file_from_KG( + file_url=file_url, + file_name=file_name, + view_url=view_url, + token=token, + httpx_client=client, + ) + assert not_json.value.args[0] == "url_response did not return a Json." + + # no file url found in the KG + httpx_mock.add_response( + url=view_url, json={"head": {"vars": ["file_url"]}, "results": {"bindings": []}} + ) + + with pytest.raises(IndexError) as not_found: + await get_file_from_KG( + file_url=file_url, + file_name=file_name, + view_url=view_url, + token=token, + httpx_client=client, + ) + assert not_found.value.args[0] == "No file url was found." + + httpx_mock.reset() + # no file found corresponding to file_url + test_file_url = "http://test_url.com" + json_response = { + "head": {"vars": ["file_url"]}, + "results": { + "bindings": [{"file_url": {"type": "uri", "value": test_file_url}}] + }, + } + + httpx_mock.add_response(url=view_url, json=json_response) + httpx_mock.add_response(url=test_file_url, status_code=401) + + with pytest.raises(ValueError) as not_found: + await get_file_from_KG( + file_url=file_url, + file_name=file_name, + view_url=view_url, + token=token, + httpx_client=client, + ) + assert not_found.value.args[0] == "Could not find the file, status code : 401" + + # Problem finding the file url + httpx_mock.add_response(url=view_url, status_code=401) + + with pytest.raises(ValueError) as not_found: + await get_file_from_KG( + file_url=file_url, + file_name=file_name, + view_url=view_url, + token=token, + httpx_client=client, + ) + assert not_found.value.args[0] == "Could not find the file url, status code : 401" + + +@pytest.mark.asyncio +async def test_get_file_from_KG(httpx_mock): + file_url = "http://fake_url" + file_name = "fake_file" + view_url = "http://fake_url" + token = "fake_token" + test_file_url = "http://test_url" + client = AsyncClient() + + json_response_url = { + "head": {"vars": ["file_url"]}, + "results": { + "bindings": [{"file_url": {"type": "uri", "value": test_file_url}}] + }, + } + with open( + Path(__file__).parent / "data" / "KG_brain_regions_hierarchy_test.json" + ) as fh: + json_response_file = json.load(fh) + + httpx_mock.add_response(url=view_url, json=json_response_url) + httpx_mock.add_response(url=test_file_url, json=json_response_file) + + response = await get_file_from_KG( + file_url=file_url, + file_name=file_name, + view_url=view_url, + token=token, + httpx_client=client, + ) + + assert response == json_response_file + + +@pytest.mark.asyncio +async def test_get_kg_data_errors(httpx_mock): + url = "http://fake_url" + token = "fake_token" + client = AsyncClient() + + # First failure: invalid object_id + with pytest.raises(ValueError) as invalid_object_id: + await get_kg_data( + object_id="invalid_object_id", + httpx_client=client, + url=url, + token=token, + preferred_format="preferred_format", + ) + + assert ( + invalid_object_id.value.args[0] + == "The provided ID (invalid_object_id) is not valid." + ) + + # Second failure: Number of hits = 0 + httpx_mock.add_response(url=url, json={"hits": {"hits": []}}) + + with pytest.raises(ValueError) as no_hits: + await get_kg_data( + object_id="https://object-id", + httpx_client=client, + url=url, + token=token, + preferred_format="preferred_format", + ) + + assert ( + no_hits.value.args[0] + == "We did not find the object https://object-id you are asking" + ) + + # Third failure: Wrong object id + httpx_mock.add_response( + url=url, json={"hits": {"hits": [{"_source": {"@id": "wrong-object-id"}}]}} + ) + + with pytest.raises(ValueError) as wrong_object_id: + await get_kg_data( + object_id="https://object-id", + httpx_client=client, + url=url, + token=token, + preferred_format="preferred_format", + ) + + assert ( + wrong_object_id.value.args[0] + == "We did not find the object https://object-id you are asking" + ) + + +@pytest.mark.httpx_mock(can_send_already_matched_responses=True) +@pytest.mark.asyncio +async def test_get_kg_data(httpx_mock): + url = "http://fake_url" + token = "fake_token" + client = AsyncClient() + preferred_format = "txt" + object_id = "https://object-id" + + response_json = { + "hits": { + "hits": [ + { + "_source": { + "@id": object_id, + "distribution": [ + { + "encodingFormat": f"application/{preferred_format}", + "contentUrl": "http://content-url-txt", + } + ], + "contributors": [ + { + "@id": "https://www.grid.ac/institutes/grid.5333.6", + } + ], + "brainRegion": { + "@id": "http://api.brain-map.org/api/v2/data/Structure/252", + "idLabel": ( + "http://api.brain-map.org/api/v2/data/Structure/252|Dorsal" + " auditory area, layer 5" + ), + "identifier": ( + "http://api.brain-map.org/api/v2/data/Structure/252" + ), + "label": "Dorsal auditory area, layer 5", + }, + } + } + ] + } + } + httpx_mock.add_response( + url=url, + json=response_json, + ) + + httpx_mock.add_response( + url="http://content-url-txt", + content=b"this is the txt content", + ) + + # Response with preferred format + object_content, metadata = await get_kg_data( + object_id="https://object-id", + httpx_client=client, + url=url, + token=token, + preferred_format=preferred_format, + ) + + assert isinstance(object_content, bytes) + assert isinstance(metadata, KGMetadata) + assert metadata.file_extension == "txt" + assert metadata.is_lnmc is True + + # Response without preferred format + object_content, reader = await get_kg_data( + object_id="https://object-id", + httpx_client=client, + url=url, + token=token, + preferred_format="no_preferred_format_available", + ) + + assert isinstance(object_content, bytes) + assert isinstance(metadata, KGMetadata) + assert metadata.file_extension == "txt" + assert metadata.is_lnmc is True + + +@pytest.mark.parametrize( + "contributors,expected_bool", + [ + ( + [ + { + "@id": "https://www.grid.ac/institutes/grid.5333.6", + "@type": ["http://schema.org/Organization"], + "label": "École Polytechnique Fédérale de Lausanne", + } + ], + True, + ), + ( + [ + { + "@id": "https://bbp.epfl.ch/nexus/v1/realms/bbp/users/gevaert", + "@type": ["http://schema.org/Person"], + "affiliation": "École Polytechnique Fédérale de Lausanne", + } + ], + True, + ), + ( + [ + {}, + { + "@id": "https://bbp.epfl.ch/nexus/v1/realms/bbp/users/kanari", + "@type": ["http://schema.org/Person"], + "affiliation": "École Polytechnique Fédérale de Lausanne", + }, + ], + True, + ), + ( + [ + { + "@id": "wrong-id", + "@type": ["http://schema.org/Person"], + "affiliation": "Another school", + } + ], + False, + ), + ], +) +def test_is_lnmc(contributors, expected_bool): + assert is_lnmc(contributors) is expected_bool