From d4655fe1e7d56d6cbe4d3c08829f51319a7bcffc Mon Sep 17 00:00:00 2001 From: domna Date: Thu, 18 Apr 2024 15:33:06 +0200 Subject: [PATCH 01/20] Add required under optional in group --- tests/data/dataconverter/NXtest.nxdl.xml | 8 ++++++++ tests/dataconverter/test_helpers.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/data/dataconverter/NXtest.nxdl.xml b/tests/data/dataconverter/NXtest.nxdl.xml index f4aa0aab4..79e0d0c1d 100644 --- a/tests/data/dataconverter/NXtest.nxdl.xml +++ b/tests/data/dataconverter/NXtest.nxdl.xml @@ -19,6 +19,14 @@ + + + A dummy entry for a required field. + + + A dummy entry for an optional field. + + A dummy entry for a float value. diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 48a67b12e..24bfd40ca 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -351,6 +351,20 @@ def fixture_filled_test_data(template, tmp_path): ), id="atleast-one-required-child-not-provided-optional-parent", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_child", + 5.0, + ), + ( + "The data entry, /ENTRY[my_entry]/optional_parent/optional_child, has an " + "optional parent, /ENTRY[entry]/optional_parent, with required children set" + ". Either provide no children for /ENTRY[entry]/optional_parent or provide " + "all required ones." + ), + id="required-field-not-provided-in-optional-group", + ), pytest.param( alter_dict( alter_dict( From 175534722b544f0e98bfa659272ed5d6487c2974 Mon Sep 17 00:00:00 2001 From: domna Date: Thu, 18 Apr 2024 15:45:27 +0200 Subject: [PATCH 02/20] rename to field --- tests/dataconverter/test_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 24bfd40ca..0d66100bf 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -354,7 +354,7 @@ def fixture_filled_test_data(template, tmp_path): pytest.param( alter_dict( TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_child", + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", 5.0, ), ( From debcf517e5daabbe0522333aa2edaa19e35c1fdb Mon Sep 17 00:00:00 2001 From: domna Date: Thu, 18 Apr 2024 17:09:17 +0200 Subject: [PATCH 03/20] Fix other tests --- pynxtools/dataconverter/readers/example/reader.py | 1 + tests/data/dataconverter/NXtest.nxdl.xml | 8 ++++---- tests/dataconverter/test_helpers.py | 11 ++--------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pynxtools/dataconverter/readers/example/reader.py b/pynxtools/dataconverter/readers/example/reader.py index 76294b3ac..44ff6ace6 100644 --- a/pynxtools/dataconverter/readers/example/reader.py +++ b/pynxtools/dataconverter/readers/example/reader.py @@ -59,6 +59,7 @@ def read( if ( k.startswith("/ENTRY[entry]/required_group") or k == "/ENTRY[entry]/optional_parent/req_group_in_opt_group" + or k.startswith("/ENTRY[entry]/OPTIONAL_group") ): continue diff --git a/tests/data/dataconverter/NXtest.nxdl.xml b/tests/data/dataconverter/NXtest.nxdl.xml index 79e0d0c1d..d9a049925 100644 --- a/tests/data/dataconverter/NXtest.nxdl.xml +++ b/tests/data/dataconverter/NXtest.nxdl.xml @@ -20,11 +20,11 @@ - - A dummy entry for a required field. + + A dummy entry to test optional parent check for required child. - - A dummy entry for an optional field. + + A dummy entry to test optional parent check for required child. diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 0d66100bf..dea2de986 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -353,16 +353,9 @@ def fixture_filled_test_data(template, tmp_path): ), pytest.param( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", - 5.0, - ), - ( - "The data entry, /ENTRY[my_entry]/optional_parent/optional_child, has an " - "optional parent, /ENTRY[entry]/optional_parent, with required children set" - ". Either provide no children for /ENTRY[entry]/optional_parent or provide " - "all required ones." + TEMPLATE, "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", 1 ), + (""), id="required-field-not-provided-in-optional-group", ), pytest.param( From d632535a5484c1ecf8e5fe47b2f3e56fde1d9607 Mon Sep 17 00:00:00 2001 From: domna Date: Thu, 18 Apr 2024 17:16:19 +0200 Subject: [PATCH 04/20] Check required field provided --- tests/dataconverter/test_helpers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index dea2de986..43e42d0d0 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -356,7 +356,14 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE, "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", 1 ), (""), - id="required-field-not-provided-in-optional-group", + id="required-field-not-provided-in-variadic-optional-group", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", 1 + ), + (""), + id="required-field-provided-in-variadic-optional-group", ), pytest.param( alter_dict( @@ -422,6 +429,7 @@ def test_validate_data_dict( "int-instead-of-chars", "link-dict-instead-of-bool", "opt-group-completely-removed", + "required-field-provided-in-variadic-optional-group", ): helpers.validate_data_dict(template, data_dict, nxdl_root) # Missing required fields caught by logger with warning From b4686fc80bfbad38fce21ed03c1b6227fa7f5e1d Mon Sep 17 00:00:00 2001 From: domna Date: Fri, 19 Apr 2024 16:35:43 +0200 Subject: [PATCH 05/20] Fix all_required_children_are_set --- pynxtools/dataconverter/helpers.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 2dfbb6c43..7373921f6 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -432,17 +432,17 @@ def is_node_required(nxdl_key, nxdl_root): def all_required_children_are_set(optional_parent_path, data, nxdl_root): """Walks over optional parent's children and makes sure all required ones are set""" - optional_parent_path = convert_data_converter_dict_to_nxdl_path( - optional_parent_path - ) for key in data: if key in data["lone_groups"]: continue nxdl_key = convert_data_converter_dict_to_nxdl_path(key) + name = nxdl_key[nxdl_key.rfind("/") + 1 :] + renamed_path = f"{optional_parent_path}/{name}" if ( - nxdl_key[0 : nxdl_key.rfind("/")] == optional_parent_path + nxdl_key[: nxdl_key.rfind("/")] + == convert_data_converter_dict_to_nxdl_path(optional_parent_path) and is_node_required(nxdl_key, nxdl_root) - and data[path_in_data_dict(nxdl_key, tuple(data.keys()))[1]] is None + and (renamed_path not in data or data[renamed_path] is None) ): return False @@ -460,12 +460,19 @@ def is_nxdl_path_a_child(nxdl_path: str, parent: str): def check_optionality_based_on_parent_group(path, nxdl_path, nxdl_root, data, template): """Checks whether field is part of an optional parent and then confirms its optionality""" + + def trim_path_to(parent: str, path: str): + count = len(parent.split("/")) + return "/".join(path.split("/")[:count]) + for optional_parent in template["optional_parents"]: optional_parent_nxdl = convert_data_converter_dict_to_nxdl_path(optional_parent) if is_nxdl_path_a_child( nxdl_path, optional_parent_nxdl - ) and not all_required_children_are_set(optional_parent, data, nxdl_root): - raise LookupError( + ) and not all_required_children_are_set( + trim_path_to(optional_parent, path), data, nxdl_root + ): + logger.warning( f"The data entry, {path}, has an optional parent, " f"{optional_parent}, with required children set. Either" f" provide no children for {optional_parent} or provide" From 10b1c442d97647bf082d143bfca80d587ecd9118 Mon Sep 17 00:00:00 2001 From: domna Date: Fri, 19 Apr 2024 17:28:00 +0200 Subject: [PATCH 06/20] Fix tests --- tests/dataconverter/test_helpers.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 297b82de1..c2b64af68 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -195,6 +195,7 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( "just chars" # pylint: disable=E1126 ) +TEMPLATE["required"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition"] = "NXtest" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/program_name"] = "Testing program" # pylint: disable=E1126 @@ -202,6 +203,7 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 ) +TEMPLATE["optional"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field"] = 1 TEMPLATE["optional"]["/ENTRY[my_entry]/required_group/description"] = ( "An example description" ) @@ -352,15 +354,24 @@ def fixture_filled_test_data(template, tmp_path): id="atleast-one-required-child-not-provided-optional-parent", ), pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", 1 + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", + "required", + ), + ( + "The data entry, /ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field, has an " + "optional parent, /ENTRY[entry]/OPTIONAL_group[optional_group], with required children set" + ". Either provide no children for /ENTRY[entry]/OPTIONAL_group[optional_group] or provide " + "all required ones." ), - (""), id="required-field-not-provided-in-variadic-optional-group", ), pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", 1 + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", + "required", ), (""), id="required-field-provided-in-variadic-optional-group", @@ -448,6 +459,7 @@ def test_validate_data_dict( elif request.node.callspec.id in ( "wrong-enum-choice", "atleast-one-required-child-not-provided-optional-parent", + "required-field-not-provided-in-variadic-optional-group", ): with caplog.at_level(logging.WARNING): helpers.validate_data_dict(template, data_dict, nxdl_root) From d4dc235b82bc5a885600488953aae4a8aaa52584 Mon Sep 17 00:00:00 2001 From: domna Date: Tue, 23 Apr 2024 18:24:39 +0200 Subject: [PATCH 07/20] Use if checks instead of try..except --- pynxtools/dataconverter/template.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pynxtools/dataconverter/template.py b/pynxtools/dataconverter/template.py index b01a6dd05..fd67b36d9 100644 --- a/pynxtools/dataconverter/template.py +++ b/pynxtools/dataconverter/template.py @@ -149,16 +149,13 @@ def __getitem__(self, k): if k in ("optional_parents", "lone_groups"): return getattr(self, k) if k.startswith("/"): - try: + if k in self.optional: return self.optional[k] - except KeyError: - try: - return self.recommended[k] - except KeyError: - try: - return self.required[k] - except KeyError: - return self.undocumented[k] + if k in self.recommended: + return self.recommended[k] + if k in self.required: + return self.required[k] + return self.undocumented[k] if k in ("required", "optional", "recommended", "undocumented"): return self.get_optionality(k) raise KeyError( From 1c6884829f4b0e595059f97431364b1a069629e5 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 11:29:27 +0200 Subject: [PATCH 08/20] Add routine to check required fields for repeating groups --- .../ensure_all_required_fields_exist.py | 76 ++++++ pynxtools/dataconverter/helpers.py | 233 ++++++++++++++---- 2 files changed, 260 insertions(+), 49 deletions(-) create mode 100644 pynxtools/dataconverter/ensure_all_required_fields_exist.py diff --git a/pynxtools/dataconverter/ensure_all_required_fields_exist.py b/pynxtools/dataconverter/ensure_all_required_fields_exist.py new file mode 100644 index 000000000..3e2671d58 --- /dev/null +++ b/pynxtools/dataconverter/ensure_all_required_fields_exist.py @@ -0,0 +1,76 @@ +@lru_cache(maxsize=None) +def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: + """Checks if there is an accepted variation of path in the dictionary & returns the path.""" + found_keys = [] + for key in data_keys: + if nxdl_path == convert_data_converter_dict_to_nxdl_path(key): + found_keys.append(key) + return found_keys + + +def ensure_all_required_fields_exist(template, data, nxdl_root): + """Checks whether all the required fields are in the returned data object.""" + check_basepaths = set() + for path in template["required"]: + entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) + if entry_name == "@units": + continue + nxdl_path = convert_data_converter_dict_to_nxdl_path(path) + renamed_paths = path_in_data_dict(nxdl_path, tuple(data.keys())) + + if len(renamed_paths) > 1: + check_basepaths.add(get_concept_basepath(nxdl_path)) + continue + + if not renamed_paths: + logger.warning( + f"The data entry corresponding to {path} is required " + f"and hasn't been supplied by the reader.", + ) + continue + + for renamed_path in renamed_paths: + renamed_path = path if renamed_path is None else renamed_path + if path in template["lone_groups"]: + opt_parent = check_for_optional_parent(path, nxdl_root) + if opt_parent != "<>": + if does_group_exist(opt_parent, data) and not does_group_exist( + renamed_path, data + ): + logger.warning( + f"The required group, {path}, hasn't been supplied" + f" while its optional parent, {opt_parent}, is supplied." + ) + continue + if not does_group_exist(renamed_path, data): + logger.warning(f"The required group, {path}, hasn't been supplied.") + continue + continue + if data[renamed_path] is None: + logger.warning( + f"The data entry corresponding to {renamed_path} is required " + f"and hasn't been supplied by the reader.", + ) + + for base_path in check_basepaths: + required_fields = get_required_fields_for(base_path) + paths = get_concept_variations(base_path) + + missing_fields = set() + partially_present_path = set() + for path in paths: + for required_field in required_fields: + if ( + f"{path}/{required_field}" not in data + or data[f"{path}/{required_field}"] is None + ): + missing_fields.add(f"{path}/{required_field}") + + if data[f"{path}/{required_field}"] is not None: + partially_present_path.add(f"{path}") + + for missing_field in missing_fields: + logger.warning( + f"The data entry corresponding to {missing_field} is required " + "and hasn't been supplied by the reader.", + ) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 2cc7230ff..bf8c62c3b 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -22,7 +22,7 @@ import re from datetime import datetime, timezone from functools import lru_cache -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Set, Tuple, Union import h5py import lxml.etree as ET @@ -30,6 +30,11 @@ from ase.data import chemical_symbols from pynxtools import get_nexus_version, get_nexus_version_hash +from pynxtools.dataconverter.template import Template +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + get_inherited_nodes, + get_nx_namefit, +) from pynxtools.nexus import nexus from pynxtools.nexus.nexus import NxdlAttributeNotFoundError @@ -141,14 +146,17 @@ def generate_template_from_nxdl( return suffix = "" - if "name" in root.attrib: + if "name" in root.attrib and not contains_uppercase(root.attrib["name"]): suffix = root.attrib["name"] - if any(map(str.isupper, suffix)): - suffix = f"{suffix}[{suffix.lower()}]" elif "type" in root.attrib: nexus_class = convert_nexus_to_caps(root.attrib["type"]) - hdf5name = f"[{convert_nexus_to_suggested_name(root.attrib['type'])}]" - suffix = f"{nexus_class}{hdf5name}" + name = root.attrib.get("name") + nx_type = root.attrib.get("type")[2:] # .removeprefix("NX") (python > 3.8) + suffix = ( + f"{name}[{name.lower()}]" + if name is not None + else f"{nexus_class}[{nx_type}]" + ) path = path + "/" + (f"@{suffix}" if tag == "attribute" else suffix) @@ -213,8 +221,17 @@ def convert_nexus_to_caps(nexus_name): return nexus_name[2:].upper() +def contains_uppercase(field_name: Optional[str]) -> bool: + """Helper function to check if a field name contains uppercase characters.""" + if field_name is None: + return False + return any(char.isupper() for char in field_name) + + def convert_nexus_to_suggested_name(nexus_name): """Helper function to suggest a name for a group from its NeXus class.""" + if contains_uppercase(nexus_name): + return nexus_name return nexus_name[2:] @@ -390,13 +407,34 @@ def is_valid_data_field(value, nxdl_type, path): return value +def is_matching_variation(nxdl_path: str, key: str) -> bool: + """ + Checks if the given key is a matching variation of the given NXDL path. + """ + hdf_tokens = [ + g1 + g2 + for (g1, g2) in re.findall( + r"\/[a-zA-Z0-9_]+\[([a-zA-Z0-9_]+)\]|\/([a-zA-Z0-9_]+)", key + ) + ] + nxdl_path_tokens = nxdl_path[1:].split("/") + if len(hdf_tokens) != len(nxdl_path_tokens): + return False + + for file_token, nxdl_token in zip(hdf_tokens, nxdl_path_tokens): + if get_nx_namefit(file_token, nxdl_token) < 0: + return False + return True + + @lru_cache(maxsize=None) -def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> Tuple[bool, str]: +def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: """Checks if there is an accepted variation of path in the dictionary & returns the path.""" + found_keys = [] for key in data_keys: if nxdl_path == convert_data_converter_dict_to_nxdl_path(key): - return True, key - return False, None + found_keys.append(key) + return found_keys def check_for_optional_parent(path: str, nxdl_root: ET.Element) -> str: @@ -503,39 +541,115 @@ def does_group_exist(path_to_group, data): return False +def get_concept_basepath(path: str) -> str: + """Get the concept path from the path""" + path_list = path.split("/") + concept_path = [] + for p in path_list: + if re.search(r"[A-Z]", p): + concept_path.append(p) + return "/" + "/".join(concept_path) + + # pylint: disable=W1203 def ensure_all_required_fields_exist(template, data, nxdl_root): """Checks whether all the required fields are in the returned data object.""" + check_basepaths = set() for path in template["required"]: entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) if entry_name == "@units": continue nxdl_path = convert_data_converter_dict_to_nxdl_path(path) - is_path_in_data_dict, renamed_path = path_in_data_dict( - nxdl_path, tuple(data.keys()) - ) + renamed_paths = path_in_data_dict(nxdl_path, tuple(data.keys())) - renamed_path = path if renamed_path is None else renamed_path - if path in template["lone_groups"]: - opt_parent = check_for_optional_parent(path, nxdl_root) - if opt_parent != "<>": - if does_group_exist(opt_parent, data) and not does_group_exist( - renamed_path, data - ): - logger.warning( - f"The required group, {path}, hasn't been supplied" - f" while its optional parent, {opt_parent}, is supplied." - ) - continue - if not does_group_exist(renamed_path, data): - logger.warning(f"The required group, {path}, hasn't been supplied.") - continue + if len(renamed_paths) > 1: + check_basepaths.add(get_concept_basepath(nxdl_path)) continue - if not is_path_in_data_dict or data[renamed_path] is None: + + if not renamed_paths: logger.warning( f"The data entry corresponding to {path} is required " f"and hasn't been supplied by the reader.", ) + continue + + for renamed_path in renamed_paths: + renamed_path = path if renamed_path is None else renamed_path + if path in template["lone_groups"]: + opt_parent = check_for_optional_parent(path, nxdl_root) + if opt_parent != "<>": + if does_group_exist(opt_parent, data) and not does_group_exist( + renamed_path, data + ): + logger.warning( + f"The required group, {path}, hasn't been supplied" + f" while its optional parent, {opt_parent}, is supplied." + ) + continue + if not does_group_exist(renamed_path, data): + logger.warning(f"The required group, {path}, hasn't been supplied.") + continue + continue + if data[renamed_path] is None: + logger.warning( + f"The data entry corresponding to {renamed_path} is required " + f"and hasn't been supplied by the reader.", + ) + + @lru_cache(maxsize=None) + def get_required_fields_from(base_path: str) -> Set[str]: + required_fields = set() + for path in template["required"]: + if ( + get_concept_basepath(convert_data_converter_dict_to_nxdl_path(path)) + == base_path + ): + entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) + if entry_name == "@units": + required_fields.add(f"{path.rsplit('/', 2)[1]}/@units") + continue + required_fields.add( + get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) + ) + + return required_fields + + @lru_cache(maxsize=None) + def get_concept_variations(base_path: str) -> Set[str]: + paths = set() + for path in data: + if ( + get_concept_basepath(convert_data_converter_dict_to_nxdl_path(path)) + == base_path + ): + paths.add(get_concept_basepath(path)) + return paths + + @lru_cache(maxsize=None) + def are_all_entries_none(path: str) -> bool: + concept_path = get_concept_basepath(path) + for key in filter(lambda x: x.startswith(concept_path), data): + if data[key] is not None: + return False + return True + + for base_path in check_basepaths: + missing_fields = set() + for path in get_concept_variations(base_path): + for required_field in get_required_fields_from(base_path): + if ( + f"{path}/{required_field}" not in data + or data[f"{path}/{required_field}"] is None + ): + missing_fields.add(f"{path}/{required_field}") + continue + + for missing_field in missing_fields: + if not are_all_entries_none(missing_field): + logger.warning( + f"The data entry corresponding to {missing_field} is required " + "and hasn't been supplied by the reader.", + ) def try_undocumented(data, nxdl_root: ET.Element): @@ -547,14 +661,9 @@ def try_undocumented(data, nxdl_root: ET.Element): if entry_name == "@units": field_path = path.rsplit("/", 1)[0] - if field_path in data.get_documented() and path in data.undocumented: - field_requiredness = get_required_string( - nexus.get_node_at_nxdl_path( - nxdl_path=convert_data_converter_dict_to_nxdl_path(field_path), - elem=nxdl_root, - ) - ) - data[field_requiredness][path] = data.undocumented[path] + + # Remove units attribute if there is no associated field + if field_path not in data: del data.undocumented[path] continue @@ -564,11 +673,12 @@ def try_undocumented(data, nxdl_root: ET.Element): try: elem = nexus.get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) - data[get_required_string(elem)][path] = data.undocumented[path] + optionality = get_required_string(elem) + data[optionality][path] = data.undocumented[path] del data.undocumented[path] units = f"{path}/@units" if units in data.undocumented: - data[get_required_string(elem)][units] = data.undocumented[units] + data[optionality][units] = data.undocumented[units] del data.undocumented[units] except NxdlAttributeNotFoundError: pass @@ -578,9 +688,9 @@ def validate_data_dict(template, data, nxdl_root: ET.Element): """Checks whether all the required paths from the template are returned in data dict.""" assert nxdl_root is not None, "The NXDL file hasn't been loaded." - # nxdl_path_set helps to skip validation check on the same type of nxdl signiture - # This reduces huge amount of runing time - nxdl_path_to_elm: dict = {} + @lru_cache(maxsize=None) + def get_xml_node(nxdl_path: str) -> ET.Element: + return nexus.get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) # Make sure all required fields exist. ensure_all_required_fields_exist(template, data, nxdl_root) @@ -591,18 +701,43 @@ def validate_data_dict(template, data, nxdl_root: ET.Element): entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) nxdl_path = convert_data_converter_dict_to_nxdl_path(path) - if entry_name == "@units": - continue - if entry_name[0] == "@" and "@" in nxdl_path: index_of_at = nxdl_path.rindex("@") nxdl_path = nxdl_path[0:index_of_at] + nxdl_path[index_of_at + 1 :] - if nxdl_path in nxdl_path_to_elm: - elem = nxdl_path_to_elm[nxdl_path] - else: - elem = nexus.get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) - nxdl_path_to_elm[nxdl_path] = elem + if entry_name == "@units": + elempath = get_inherited_nodes(nxdl_path, None, nxdl_root)[1] + elem = elempath[-2] + field_path = path.rsplit("/", 1)[0] + if ( + field_path not in data.get_documented() + and "units" not in elem.attrib + ): + logger.warning( + "The unit, %s = %s, is being written but has no documentation.", + path, + data[path], + ) + continue + + # TODO: If we want we could also enable unit validation here + # field = nexus.get_node_at_nxdl_path( + # nxdl_path=convert_data_converter_dict_to_nxdl_path( + # # The part below is the backwards compatible version of + # # nxdl_path.removesuffix("/units") + # nxdl_path[:-6] if nxdl_path.endswith("/units") else nxdl_path + # ), + # elem=nxdl_root, + # ) + # nxdl_unit = field.attrib.get("units", "") + # if not is_valid_unit(data[path], nxdl_unit): + # raise ValueError( + # f"Invalid unit in {path}. {data[path]} " + # f"is not in unit category {nxdl_unit}" + # ) + continue + + elem = get_xml_node(nxdl_path) # Only check for validation in the NXDL if we did find the entry # otherwise we just pass it along From feb973e4dbb3cd86cc43a6fc4fad010d3621b9ec Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 11:33:35 +0200 Subject: [PATCH 09/20] Delete temporary file --- .../ensure_all_required_fields_exist.py | 76 ------------------- 1 file changed, 76 deletions(-) delete mode 100644 pynxtools/dataconverter/ensure_all_required_fields_exist.py diff --git a/pynxtools/dataconverter/ensure_all_required_fields_exist.py b/pynxtools/dataconverter/ensure_all_required_fields_exist.py deleted file mode 100644 index 3e2671d58..000000000 --- a/pynxtools/dataconverter/ensure_all_required_fields_exist.py +++ /dev/null @@ -1,76 +0,0 @@ -@lru_cache(maxsize=None) -def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: - """Checks if there is an accepted variation of path in the dictionary & returns the path.""" - found_keys = [] - for key in data_keys: - if nxdl_path == convert_data_converter_dict_to_nxdl_path(key): - found_keys.append(key) - return found_keys - - -def ensure_all_required_fields_exist(template, data, nxdl_root): - """Checks whether all the required fields are in the returned data object.""" - check_basepaths = set() - for path in template["required"]: - entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) - if entry_name == "@units": - continue - nxdl_path = convert_data_converter_dict_to_nxdl_path(path) - renamed_paths = path_in_data_dict(nxdl_path, tuple(data.keys())) - - if len(renamed_paths) > 1: - check_basepaths.add(get_concept_basepath(nxdl_path)) - continue - - if not renamed_paths: - logger.warning( - f"The data entry corresponding to {path} is required " - f"and hasn't been supplied by the reader.", - ) - continue - - for renamed_path in renamed_paths: - renamed_path = path if renamed_path is None else renamed_path - if path in template["lone_groups"]: - opt_parent = check_for_optional_parent(path, nxdl_root) - if opt_parent != "<>": - if does_group_exist(opt_parent, data) and not does_group_exist( - renamed_path, data - ): - logger.warning( - f"The required group, {path}, hasn't been supplied" - f" while its optional parent, {opt_parent}, is supplied." - ) - continue - if not does_group_exist(renamed_path, data): - logger.warning(f"The required group, {path}, hasn't been supplied.") - continue - continue - if data[renamed_path] is None: - logger.warning( - f"The data entry corresponding to {renamed_path} is required " - f"and hasn't been supplied by the reader.", - ) - - for base_path in check_basepaths: - required_fields = get_required_fields_for(base_path) - paths = get_concept_variations(base_path) - - missing_fields = set() - partially_present_path = set() - for path in paths: - for required_field in required_fields: - if ( - f"{path}/{required_field}" not in data - or data[f"{path}/{required_field}"] is None - ): - missing_fields.add(f"{path}/{required_field}") - - if data[f"{path}/{required_field}"] is not None: - partially_present_path.add(f"{path}") - - for missing_field in missing_fields: - logger.warning( - f"The data entry corresponding to {missing_field} is required " - "and hasn't been supplied by the reader.", - ) From 17eb06157a2395975dbdc76d8f91d9347af138af Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 11:49:04 +0200 Subject: [PATCH 10/20] Fix path in data dict test --- tests/dataconverter/test_helpers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index c2b64af68..0ca404bb1 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -476,12 +476,10 @@ def test_validate_data_dict( [ pytest.param( "/ENTRY/definition/@version", - (True, "/ENTRY[entry]/definition/@version"), + ["/ENTRY[entry]/definition/@version"], id="path-exists-in-dict", ), - pytest.param( - "/RANDOM/does/not/@exist", (False, None), id="path-does-not-exist-in-dict" - ), + pytest.param("/RANDOM/does/not/@exist", [], id="path-does-not-exist-in-dict"), ], ) def test_path_in_data_dict(nxdl_path, expected, template): From d83a6b80d01ad8295f484de35018a3162e5813f1 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 13:43:23 +0200 Subject: [PATCH 11/20] Fix tests --- pynxtools/dataconverter/helpers.py | 7 +------ tests/dataconverter/test_helpers.py | 5 +++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index bf8c62c3b..1cd6239d5 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -567,14 +567,9 @@ def ensure_all_required_fields_exist(template, data, nxdl_root): continue if not renamed_paths: - logger.warning( - f"The data entry corresponding to {path} is required " - f"and hasn't been supplied by the reader.", - ) - continue + renamed_paths = [path] for renamed_path in renamed_paths: - renamed_path = path if renamed_path is None else renamed_path if path in template["lone_groups"]: opt_parent = check_for_optional_parent(path, nxdl_root) if opt_parent != "<>": diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 0ca404bb1..3acd349fe 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -296,7 +296,7 @@ def fixture_filled_test_data(template, tmp_path): "required", ), ( - "The data entry corresponding to /ENTRY[entry]/NXODD_name[nxodd_name]/bool_value is" + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is" " required and hasn't been supplied by the reader." ), id="empty-required-field", @@ -403,7 +403,7 @@ def fixture_filled_test_data(template, tmp_path): remove_from_dict( TEMPLATE, "/ENTRY[my_entry]/required_group/description" ), - "/ENTRY[my_entry]/required_group", + "/ENTRY[entry]/required_group", None, ), "The required group, /ENTRY[entry]/required_group, hasn't been supplied.", @@ -455,6 +455,7 @@ def test_validate_data_dict( # logger records captured_logs = caplog.records helpers.validate_data_dict(template, data_dict, nxdl_root) + messages = [rec.message for rec in captured_logs] assert any(error_message in rec.message for rec in captured_logs) elif request.node.callspec.id in ( "wrong-enum-choice", From b3a0f1b6cd3e46a93e5e9469c809282733ab3d1d Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 14:07:32 +0200 Subject: [PATCH 12/20] Cleanup --- pynxtools/dataconverter/helpers.py | 120 ++++++++++++++--------------- 1 file changed, 59 insertions(+), 61 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 1cd6239d5..4f48637dd 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -151,11 +151,11 @@ def generate_template_from_nxdl( elif "type" in root.attrib: nexus_class = convert_nexus_to_caps(root.attrib["type"]) name = root.attrib.get("name") - nx_type = root.attrib.get("type")[2:] # .removeprefix("NX") (python > 3.8) + hdf_name = root.attrib.get("type")[2:] # .removeprefix("NX") (python > 3.8) suffix = ( f"{name}[{name.lower()}]" if name is not None - else f"{nexus_class}[{nx_type}]" + else f"{nexus_class}[{hdf_name}]" ) path = path + "/" + (f"@{suffix}" if tag == "attribute" else suffix) @@ -407,26 +407,6 @@ def is_valid_data_field(value, nxdl_type, path): return value -def is_matching_variation(nxdl_path: str, key: str) -> bool: - """ - Checks if the given key is a matching variation of the given NXDL path. - """ - hdf_tokens = [ - g1 + g2 - for (g1, g2) in re.findall( - r"\/[a-zA-Z0-9_]+\[([a-zA-Z0-9_]+)\]|\/([a-zA-Z0-9_]+)", key - ) - ] - nxdl_path_tokens = nxdl_path[1:].split("/") - if len(hdf_tokens) != len(nxdl_path_tokens): - return False - - for file_token, nxdl_token in zip(hdf_tokens, nxdl_path_tokens): - if get_nx_namefit(file_token, nxdl_token) < 0: - return False - return True - - @lru_cache(maxsize=None) def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: """Checks if there is an accepted variation of path in the dictionary & returns the path.""" @@ -551,45 +531,21 @@ def get_concept_basepath(path: str) -> str: return "/" + "/".join(concept_path) -# pylint: disable=W1203 -def ensure_all_required_fields_exist(template, data, nxdl_root): - """Checks whether all the required fields are in the returned data object.""" - check_basepaths = set() - for path in template["required"]: - entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) - if entry_name == "@units": - continue - nxdl_path = convert_data_converter_dict_to_nxdl_path(path) - renamed_paths = path_in_data_dict(nxdl_path, tuple(data.keys())) - - if len(renamed_paths) > 1: - check_basepaths.add(get_concept_basepath(nxdl_path)) - continue - - if not renamed_paths: - renamed_paths = [path] - - for renamed_path in renamed_paths: - if path in template["lone_groups"]: - opt_parent = check_for_optional_parent(path, nxdl_root) - if opt_parent != "<>": - if does_group_exist(opt_parent, data) and not does_group_exist( - renamed_path, data - ): - logger.warning( - f"The required group, {path}, hasn't been supplied" - f" while its optional parent, {opt_parent}, is supplied." - ) - continue - if not does_group_exist(renamed_path, data): - logger.warning(f"The required group, {path}, hasn't been supplied.") - continue - continue - if data[renamed_path] is None: - logger.warning( - f"The data entry corresponding to {renamed_path} is required " - f"and hasn't been supplied by the reader.", - ) +def ensure_all_required_fields_exist_in_variadic_groups( + template: Template, data: Template, check_basepaths: Set[str] +): + """ + Checks whether all required fields (according to `template`) + in `data` are present in their respective + variadic groups given by `check_basepaths`. + + Args: + template (Template): The template to use as reference. + data (Template): The template containing the actual data + check_basepaths (Set[str]): + A set of basepaths of the form /ENTRY/MY_GROUP to check for missing fields. + All groups matching the basepath will be checked for missing fields. + """ @lru_cache(maxsize=None) def get_required_fields_from(base_path: str) -> Set[str]: @@ -647,6 +603,48 @@ def are_all_entries_none(path: str) -> bool: ) +def ensure_all_required_fields_exist(template, data, nxdl_root): + """Checks whether all the required fields are in the returned data object.""" + check_basepaths = set() + for path in template["required"]: + entry_name = get_name_from_data_dict_entry(path[path.rindex("/") + 1 :]) + if entry_name == "@units": + continue + nxdl_path = convert_data_converter_dict_to_nxdl_path(path) + renamed_paths = path_in_data_dict(nxdl_path, tuple(data.keys())) + + if len(renamed_paths) > 1: + check_basepaths.add(get_concept_basepath(nxdl_path)) + continue + + if not renamed_paths: + renamed_paths = [path] + + for renamed_path in renamed_paths: + if path in template["lone_groups"]: + opt_parent = check_for_optional_parent(path, nxdl_root) + if opt_parent != "<>": + if does_group_exist(opt_parent, data) and not does_group_exist( + renamed_path, data + ): + logger.warning( + f"The required group, {path}, hasn't been supplied" + f" while its optional parent, {opt_parent}, is supplied." + ) + continue + if not does_group_exist(renamed_path, data): + logger.warning(f"The required group, {path}, hasn't been supplied.") + continue + continue + if data[renamed_path] is None: + logger.warning( + f"The data entry corresponding to {renamed_path} is required " + f"and hasn't been supplied by the reader.", + ) + + ensure_all_required_fields_exist_in_variadic_groups(template, data, check_basepaths) + + def try_undocumented(data, nxdl_root: ET.Element): """Tries to move entries used that are from base classes but not in AppDef""" for path in list(data.undocumented): From 0c9a0f4abeb848a13835ffc2d81a471329d9f51c Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 14:11:38 +0200 Subject: [PATCH 13/20] Remove debugging line --- tests/dataconverter/test_helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 3acd349fe..5801aedad 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -455,7 +455,6 @@ def test_validate_data_dict( # logger records captured_logs = caplog.records helpers.validate_data_dict(template, data_dict, nxdl_root) - messages = [rec.message for rec in captured_logs] assert any(error_message in rec.message for rec in captured_logs) elif request.node.callspec.id in ( "wrong-enum-choice", From dcb4d9badf6a187880ee9c93dbb6c488d7d46e77 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 15:46:58 +0200 Subject: [PATCH 14/20] Add collector class --- pynxtools/dataconverter/helpers.py | 154 +++++++++++++++++++++------- tests/dataconverter/test_helpers.py | 31 +++--- 2 files changed, 131 insertions(+), 54 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 4f48637dd..97bfb4c6a 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -21,6 +21,7 @@ import logging import re from datetime import datetime, timezone +from enum import Enum from functools import lru_cache from typing import Any, Callable, List, Optional, Set, Tuple, Union @@ -31,10 +32,7 @@ from pynxtools import get_nexus_version, get_nexus_version_hash from pynxtools.dataconverter.template import Template -from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( - get_inherited_nodes, - get_nx_namefit, -) +from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_inherited_nodes from pynxtools.nexus import nexus from pynxtools.nexus.nexus import NxdlAttributeNotFoundError @@ -42,6 +40,93 @@ logger.setLevel(logging.INFO) +class ValidationProblem(Enum): + UnitWithoutDocumentation = 1 + InvalidEnum = 2 + OptionalParentWithoutRequiredGroup = 3 + OptionalParentWithoutRequiredField = 4 + MissingRequiredGroup = 5 + MissingRequiredField = 6 + InvalidType = 7 + InvalidDatetime = 8 + IsNotPosInt = 9 + + +class Collector: + """A class to collect data and return it in a dictionary format.""" + + def __init__(self): + self.data = set() + + def insert_and_log( + self, path: str, log_type: ValidationProblem, value: Optional[Any], *args + ): + """Inserts a path into the data dictionary and logs the action.""" + if value is None: + value = "" + + if log_type == ValidationProblem.UnitWithoutDocumentation: + logger.warning( + f"The unit, {path} = {value}, " + "is being written but has no documentation" + ) + elif log_type == ValidationProblem.InvalidEnum: + logger.warning( + f"The value at {path} should be on of the " + f"following strings: {value}" + ) + elif log_type == ValidationProblem.OptionalParentWithoutRequiredGroup: + logger.warning( + f"The required group, {path}, hasn't been supplied" + f" while its optional parent, {value}, is supplied." + ) + elif log_type == ValidationProblem.OptionalParentWithoutRequiredField: + logger.warning( + f"The data entry, {path}, has an optional parent, " + f"{value}, with required children set. Either" + f" provide no children for {value} or provide" + f" all required ones." + ) + elif log_type == ValidationProblem.MissingRequiredGroup: + logger.warning(f"The required group, {path}, hasn't been supplied.") + elif log_type == ValidationProblem.MissingRequiredField: + logger.warning( + f"The data entry corresponding to {path} is required " + "and hasn't been supplied by the reader.", + ) + elif log_type == ValidationProblem.InvalidType: + logger.warning( + f"The value at {path} should be one of: {value}" + f", as defined in the NXDL as {args[0] if args else ''}." + ) + elif log_type == ValidationProblem.InvalidDatetime: + logger.warning( + f"The value at {path} = {value} should be a timezone aware ISO8601 " + "formatted str. For example, 2022-01-22T12:14:12.05018Z" + " or 2022-01-22T12:14:12.05018+00:00." + ) + elif log_type == ValidationProblem.IsNotPosInt: + logger.warning( + f"The value at {path} should be a positive int, but is {value}." + ) + self.data.add(path) + + def has_validation_problems(self): + """Returns True if there were any validation problems.""" + return len(self.data) > 0 + + def get(self): + """Returns the set of problematic paths.""" + return self.data + + def clear(self): + """Clears the collected data.""" + self.data = set() + + +collector = Collector() + + def is_a_lone_group(xml_element) -> bool: """Checks whether a given group XML element has no field or attributes mentioned""" if xml_element.get("type") == "NXentry": @@ -382,14 +467,13 @@ def is_valid_data_field(value, nxdl_type, path): if value is None: raise ValueError return accepted_types[0](value) - except ValueError as exc: - raise ValueError( - f"The value at {path} should be of Python type: {accepted_types}" - f", as defined in the NXDL as {nxdl_type}." - ) from exc + except ValueError: + collector.insert_and_log( + path, ValidationProblem.InvalidType, accepted_types, nxdl_type + ) if nxdl_type == "NX_POSINT" and not is_positive_int(value): - raise ValueError(f"The value at {path} should be a positive int.") + collector.insert_and_log(path, ValidationProblem.IsNotPosInt, value) if nxdl_type in ("ISO8601", "NX_DATE_TIME"): iso8601 = re.compile( @@ -398,11 +482,7 @@ def is_valid_data_field(value, nxdl_type, path): ) results = iso8601.search(value) if results is None: - raise ValueError( - f"The date at {path} should be a timezone aware ISO8601 " - f"formatted str. For example, 2022-01-22T12:14:12.05018Z" - f" or 2022-01-22T12:14:12.05018+00:00." - ) + collector.insert_and_log(path, ValidationProblem.InvalidDatetime, value) return value @@ -488,11 +568,10 @@ def trim_path_to(parent: str, path: str): ) and not all_required_children_are_set( trim_path_to(optional_parent, path), data, nxdl_root ): - logger.warning( - f"The data entry, {path}, has an optional parent, " - f"{optional_parent}, with required children set. Either" - f" provide no children for {optional_parent} or provide" - f" all required ones." + collector.insert_and_log( + path, + ValidationProblem.OptionalParentWithoutRequiredField, + optional_parent, ) @@ -597,9 +676,8 @@ def are_all_entries_none(path: str) -> bool: for missing_field in missing_fields: if not are_all_entries_none(missing_field): - logger.warning( - f"The data entry corresponding to {missing_field} is required " - "and hasn't been supplied by the reader.", + collector.insert_and_log( + missing_field, ValidationProblem.MissingRequiredField, None ) @@ -627,19 +705,21 @@ def ensure_all_required_fields_exist(template, data, nxdl_root): if does_group_exist(opt_parent, data) and not does_group_exist( renamed_path, data ): - logger.warning( - f"The required group, {path}, hasn't been supplied" - f" while its optional parent, {opt_parent}, is supplied." + collector.insert_and_log( + path, + ValidationProblem.OptionalParentWithoutRequiredGroup, + opt_parent, ) continue if not does_group_exist(renamed_path, data): - logger.warning(f"The required group, {path}, hasn't been supplied.") + collector.insert_and_log( + path, ValidationProblem.MissingRequiredGroup, None + ) continue continue if data[renamed_path] is None: - logger.warning( - f"The data entry corresponding to {renamed_path} is required " - f"and hasn't been supplied by the reader.", + collector.insert_and_log( + path, ValidationProblem.MissingRequiredField, None ) ensure_all_required_fields_exist_in_variadic_groups(template, data, check_basepaths) @@ -680,6 +760,7 @@ def try_undocumented(data, nxdl_root: ET.Element): def validate_data_dict(template, data, nxdl_root: ET.Element): """Checks whether all the required paths from the template are returned in data dict.""" assert nxdl_root is not None, "The NXDL file hasn't been loaded." + collector.clear() @lru_cache(maxsize=None) def get_xml_node(nxdl_path: str) -> ET.Element: @@ -706,10 +787,8 @@ def get_xml_node(nxdl_path: str) -> ET.Element: field_path not in data.get_documented() and "units" not in elem.attrib ): - logger.warning( - "The unit, %s = %s, is being written but has no documentation.", - path, - data[path], + collector.insert_and_log( + path, ValidationProblem.UnitWithoutDocumentation, data[path] ) continue @@ -755,12 +834,9 @@ def get_xml_node(nxdl_path: str) -> ET.Element: )[2] is_valid_enum, enums = is_value_valid_element_of_enum(data[path], elist) if not is_valid_enum: - logger.warning( - f"The value at {path} should be on of the " - f"following strings: {enums}" - ) + collector.insert_and_log(path, ValidationProblem.InvalidEnum, enums) - return True + return not collector.has_validation_problems() def remove_namespace_from_tag(tag): diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 5801aedad..6491eee1f 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -233,7 +233,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" - "t_value should be of Python type: (, , , )," " as defined in the NXDL as NX_INT." ), @@ -247,7 +247,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value sh" - "ould be of Python type: (, , , , ), as defined in the NXDL as NX_BOOLEAN." ), id="string-instead-of-int", @@ -267,7 +267,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " - "should be a positive int." + "should be a positive int, but is -1." ), id="negative-posint", ), @@ -296,7 +296,7 @@ def fixture_filled_test_data(template, tmp_path): "required", ), ( - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is" + "The data entry corresponding to /ENTRY[entry]/NXODD_name[nxodd_name]/bool_value is" " required and hasn't been supplied by the reader." ), id="empty-required-field", @@ -325,7 +325,8 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", "2022-01-22T12:14:12.05018-00:00", ), - "The date at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value should be a timezone aware" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" "T12:14:12.05018+00:00.", id="UTC-with--00:00", @@ -452,23 +453,23 @@ def test_validate_data_dict( "missing-empty-yet-required-group2", ): assert "" == caplog.text - # logger records captured_logs = caplog.records helpers.validate_data_dict(template, data_dict, nxdl_root) + messages = [rec.message for rec in captured_logs] assert any(error_message in rec.message for rec in captured_logs) - elif request.node.callspec.id in ( - "wrong-enum-choice", - "atleast-one-required-child-not-provided-optional-parent", - "required-field-not-provided-in-variadic-optional-group", - ): + else: with caplog.at_level(logging.WARNING): helpers.validate_data_dict(template, data_dict, nxdl_root) assert error_message in caplog.text - else: - with pytest.raises(Exception) as execinfo: - helpers.validate_data_dict(template, data_dict, nxdl_root) - assert (error_message) == str(execinfo.value) + # else: + # with caplog.at_level(logging.WARNING): + # helpers.validate_data_dict(template, data_dict, nxdl_root) + + # assert caplog.text + # with pytest.raises(Exception) as execinfo: + # helpers.validate_data_dict(template, data_dict, nxdl_root) + # assert (error_message) == str(execinfo.value) @pytest.mark.parametrize( From 56bf3a68faed4e7beb5050b632c0419333aeb994 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 15:52:15 +0200 Subject: [PATCH 15/20] Remove commented lines --- tests/dataconverter/test_helpers.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 6491eee1f..e0ed081fb 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -455,21 +455,12 @@ def test_validate_data_dict( assert "" == caplog.text captured_logs = caplog.records helpers.validate_data_dict(template, data_dict, nxdl_root) - messages = [rec.message for rec in captured_logs] assert any(error_message in rec.message for rec in captured_logs) else: with caplog.at_level(logging.WARNING): helpers.validate_data_dict(template, data_dict, nxdl_root) assert error_message in caplog.text - # else: - # with caplog.at_level(logging.WARNING): - # helpers.validate_data_dict(template, data_dict, nxdl_root) - - # assert caplog.text - # with pytest.raises(Exception) as execinfo: - # helpers.validate_data_dict(template, data_dict, nxdl_root) - # assert (error_message) == str(execinfo.value) @pytest.mark.parametrize( From a0ae25978504974eaf79f33bc57ad340463bb3b3 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 15:54:24 +0200 Subject: [PATCH 16/20] Check validation return type and logging --- tests/dataconverter/test_helpers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index e0ed081fb..fc3ffadf7 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -443,7 +443,9 @@ def test_validate_data_dict( "opt-group-completely-removed", "required-field-provided-in-variadic-optional-group", ): - helpers.validate_data_dict(template, data_dict, nxdl_root) + with caplog.at_level(logging.WARNING): + assert helpers.validate_data_dict(template, data_dict, nxdl_root) + assert caplog.text == "" # Missing required fields caught by logger with warning elif request.node.callspec.id in ( "empty-required-field", @@ -454,11 +456,11 @@ def test_validate_data_dict( ): assert "" == caplog.text captured_logs = caplog.records - helpers.validate_data_dict(template, data_dict, nxdl_root) + assert not helpers.validate_data_dict(template, data_dict, nxdl_root) assert any(error_message in rec.message for rec in captured_logs) else: with caplog.at_level(logging.WARNING): - helpers.validate_data_dict(template, data_dict, nxdl_root) + assert not helpers.validate_data_dict(template, data_dict, nxdl_root) assert error_message in caplog.text From 46f122bce50fca8c87a249dcb92556cb731cdfe3 Mon Sep 17 00:00:00 2001 From: domna Date: Wed, 24 Apr 2024 17:36:06 +0200 Subject: [PATCH 17/20] Add tests for repeating groups --- pynxtools/dataconverter/helpers.py | 14 +++---- pynxtools/dataconverter/template.py | 2 +- tests/data/dataconverter/NXtest.nxdl.xml | 4 +- tests/dataconverter/test_helpers.py | 49 ++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 97bfb4c6a..4f05fa3a3 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -664,21 +664,17 @@ def are_all_entries_none(path: str) -> bool: return True for base_path in check_basepaths: - missing_fields = set() for path in get_concept_variations(base_path): for required_field in get_required_fields_from(base_path): if ( f"{path}/{required_field}" not in data or data[f"{path}/{required_field}"] is None ): - missing_fields.add(f"{path}/{required_field}") - continue - - for missing_field in missing_fields: - if not are_all_entries_none(missing_field): - collector.insert_and_log( - missing_field, ValidationProblem.MissingRequiredField, None - ) + missing_field = f"{path}/{required_field}" + if not are_all_entries_none(missing_field): + collector.insert_and_log( + missing_field, ValidationProblem.MissingRequiredField, None + ) def ensure_all_required_fields_exist(template, data, nxdl_root): diff --git a/pynxtools/dataconverter/template.py b/pynxtools/dataconverter/template.py index fd67b36d9..11f1c1110 100644 --- a/pynxtools/dataconverter/template.py +++ b/pynxtools/dataconverter/template.py @@ -155,7 +155,7 @@ def __getitem__(self, k): return self.recommended[k] if k in self.required: return self.required[k] - return self.undocumented[k] + return self.undocumented.get(k) if k in ("required", "optional", "recommended", "undocumented"): return self.get_optionality(k) raise KeyError( diff --git a/tests/data/dataconverter/NXtest.nxdl.xml b/tests/data/dataconverter/NXtest.nxdl.xml index d9a049925..8695a20c9 100644 --- a/tests/data/dataconverter/NXtest.nxdl.xml +++ b/tests/data/dataconverter/NXtest.nxdl.xml @@ -21,10 +21,10 @@ - A dummy entry to test optional parent check for required child. + A dummy entry to test optional parent check for a required child. - A dummy entry to test optional parent check for required child. + A dummy entry to test optional parent check for an optional child. diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index fc3ffadf7..8a4ed77cf 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -195,6 +195,27 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( "just chars" # pylint: disable=E1126 ) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value"] = True # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( + "eV" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value"] = ( + np.array( + [1, 2, 3], # pylint: disable=E1126 + dtype=np.int8, + ) +) # pylint: disable=E1126 +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units" +] = "kg" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value"] = ( + "just chars" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type"] = "2nd type" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value"] = ( + "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 +) TEMPLATE["required"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition"] = "NXtest" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 @@ -295,6 +316,34 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", "required", ), + ( + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is" + " required and hasn't been supplied by the reader." + ), + id="empty-required-field", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", + "required", + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value is" + " required and hasn't been supplied by the reader." + ), + id="empty-required-field", + ), + pytest.param( + remove_from_dict( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", + "required", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "required", + ), ( "The data entry corresponding to /ENTRY[entry]/NXODD_name[nxodd_name]/bool_value is" " required and hasn't been supplied by the reader." From 2b0144f3681d19922799b357dbdc241b466c75fe Mon Sep 17 00:00:00 2001 From: domna Date: Thu, 25 Apr 2024 07:47:24 +0200 Subject: [PATCH 18/20] Fix report of variadic groups set to all None --- pynxtools/dataconverter/helpers.py | 52 ++++++++++++++++++----------- tests/dataconverter/test_helpers.py | 46 ++++++++++++++++++++----- 2 files changed, 69 insertions(+), 29 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 4f05fa3a3..11e0a33bc 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -664,6 +664,7 @@ def are_all_entries_none(path: str) -> bool: return True for base_path in check_basepaths: + count = 0 for path in get_concept_variations(base_path): for required_field in get_required_fields_from(base_path): if ( @@ -671,11 +672,19 @@ def are_all_entries_none(path: str) -> bool: or data[f"{path}/{required_field}"] is None ): missing_field = f"{path}/{required_field}" + count += 1 if not are_all_entries_none(missing_field): + count -= 1 collector.insert_and_log( missing_field, ValidationProblem.MissingRequiredField, None ) + if count > 0: + # All entries in all variadic groups are None + collector.insert_and_log( + base_path, ValidationProblem.MissingRequiredGroup, None + ) + def ensure_all_required_fields_exist(template, data, nxdl_root): """Checks whether all the required fields are in the returned data object.""" @@ -692,31 +701,34 @@ def ensure_all_required_fields_exist(template, data, nxdl_root): continue if not renamed_paths: - renamed_paths = [path] - - for renamed_path in renamed_paths: - if path in template["lone_groups"]: - opt_parent = check_for_optional_parent(path, nxdl_root) - if opt_parent != "<>": - if does_group_exist(opt_parent, data) and not does_group_exist( - renamed_path, data - ): - collector.insert_and_log( - path, - ValidationProblem.OptionalParentWithoutRequiredGroup, - opt_parent, - ) - continue - if not does_group_exist(renamed_path, data): + renamed_path = path + else: + renamed_path = renamed_paths[0] + + if path in template["lone_groups"]: + opt_parent = check_for_optional_parent(path, nxdl_root) + if opt_parent != "<>": + if does_group_exist(opt_parent, data) and not does_group_exist( + renamed_path, data + ): collector.insert_and_log( - path, ValidationProblem.MissingRequiredGroup, None + renamed_path, + ValidationProblem.OptionalParentWithoutRequiredGroup, + opt_parent, ) - continue continue - if data[renamed_path] is None: + if not does_group_exist(renamed_path, data): collector.insert_and_log( - path, ValidationProblem.MissingRequiredField, None + convert_data_converter_dict_to_nxdl_path(path), + ValidationProblem.MissingRequiredGroup, + None, ) + continue + continue + if data[renamed_path] is None: + collector.insert_and_log( + renamed_path, ValidationProblem.MissingRequiredField, None + ) ensure_all_required_fields_exist_in_variadic_groups(template, data, check_basepaths) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 8a4ed77cf..cced1f606 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -20,6 +20,7 @@ import logging import os import xml.etree.ElementTree as ET +from typing import Optional import numpy as np import pytest @@ -51,14 +52,28 @@ def alter_dict(data_dict: Template, key: str, value: object): return None -def set_to_none_in_dict(data_dict: Template, key: str, optionality: str): +def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): """Helper function to forcefully set path to 'None'""" - if data_dict is not None: - internal_dict = Template(data_dict) - internal_dict[optionality][key] = None - return internal_dict + if data_dict is None: + return None + + internal_dict = Template(data_dict) + internal_dict[optionality][key] = None + return internal_dict - return None + +def set_whole_group_to_none( + data_dict: Optional[Template], key: str, optionality: str +) -> Optional[Template]: + """Set a whole path to None in the dict""" + if data_dict is None: + return None + + internal_dict = Template(data_dict) + for path in data_dict[optionality]: + if path.startswith(key): + internal_dict[optionality][path] = None + return internal_dict def remove_from_dict(data_dict: Template, key: str, optionality: str = "optional"): @@ -350,6 +365,19 @@ def fixture_filled_test_data(template, tmp_path): ), id="empty-required-field", ), + pytest.param( + set_whole_group_to_none( + set_whole_group_to_none( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name", + "required", + ), + "/ENTRY[my_entry]/NXODD_name", + "optional", + ), + ("The required group, /ENTRY/NXODD_name, hasn't been supplied."), + id="all-required-fields-set-to-none", + ), pytest.param( alter_dict( TEMPLATE, @@ -440,12 +468,12 @@ def fixture_filled_test_data(template, tmp_path): pytest.param(TEMPLATE, "", id="valid-data-dict"), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"), - "The required group, /ENTRY[entry]/required_group, hasn't been supplied.", + "The required group, /ENTRY/required_group, hasn't been supplied.", id="missing-empty-yet-required-group", ), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"), - "The required group, /ENTRY[entry]/required_group2, hasn't been supplied.", + "The required group, /ENTRY/required_group2, hasn't been supplied.", id="missing-empty-yet-required-group2", ), pytest.param( @@ -456,7 +484,7 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[entry]/required_group", None, ), - "The required group, /ENTRY[entry]/required_group, hasn't been supplied.", + "The required group, /ENTRY/required_group, hasn't been supplied.", id="allow-required-and-empty-group", ), pytest.param( From 9b6276911bac901c77d29f346f91c5efb8f89562 Mon Sep 17 00:00:00 2001 From: domna Date: Fri, 26 Apr 2024 08:26:44 +0200 Subject: [PATCH 19/20] Fixes undocumented units and reporting of all none required groups --- pynxtools/dataconverter/helpers.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 11e0a33bc..0cae1400f 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -664,8 +664,9 @@ def are_all_entries_none(path: str) -> bool: return True for base_path in check_basepaths: - count = 0 + all_fields_are_none = True for path in get_concept_variations(base_path): + count = 0 for required_field in get_required_fields_from(base_path): if ( f"{path}/{required_field}" not in data @@ -679,7 +680,12 @@ def are_all_entries_none(path: str) -> bool: missing_field, ValidationProblem.MissingRequiredField, None ) - if count > 0: + if count == 0: + # There are either no required fields, all required fields are set, + # or the missing fields already have been reported. + all_fields_are_none = False + + if all_fields_are_none: # All entries in all variadic groups are None collector.insert_and_log( base_path, ValidationProblem.MissingRequiredGroup, None @@ -742,9 +748,14 @@ def try_undocumented(data, nxdl_root: ET.Element): if entry_name == "@units": field_path = path.rsplit("/", 1)[0] - - # Remove units attribute if there is no associated field - if field_path not in data: + if field_path in data.get_documented() and path in data.undocumented: + field_requiredness = get_required_string( + nexus.get_node_at_nxdl_path( + nxdl_path=convert_data_converter_dict_to_nxdl_path(field_path), + elem=nxdl_root, + ) + ) + data[field_requiredness][path] = data.undocumented[path] del data.undocumented[path] continue From 9327b29581486ac3ee693090896e7ae2348ba1c8 Mon Sep 17 00:00:00 2001 From: domna Date: Fri, 26 Apr 2024 08:37:37 +0200 Subject: [PATCH 20/20] Use dict paths everywhere --- pynxtools/dataconverter/helpers.py | 7 +++++-- tests/dataconverter/test_helpers.py | 10 ++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 0cae1400f..2e393d3df 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -687,8 +687,11 @@ def are_all_entries_none(path: str) -> bool: if all_fields_are_none: # All entries in all variadic groups are None + generic_dict_path = "/" + "/".join( + map(lambda path: f"{path}[{path.lower()}]", base_path.split("/")[1:]) + ) collector.insert_and_log( - base_path, ValidationProblem.MissingRequiredGroup, None + generic_dict_path, ValidationProblem.MissingRequiredGroup, None ) @@ -725,7 +728,7 @@ def ensure_all_required_fields_exist(template, data, nxdl_root): continue if not does_group_exist(renamed_path, data): collector.insert_and_log( - convert_data_converter_dict_to_nxdl_path(path), + path, ValidationProblem.MissingRequiredGroup, None, ) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index cced1f606..ccdcea79a 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -375,7 +375,9 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name", "optional", ), - ("The required group, /ENTRY/NXODD_name, hasn't been supplied."), + ( + "The required group, /ENTRY[entry]/NXODD_name[nxodd_name], hasn't been supplied." + ), id="all-required-fields-set-to-none", ), pytest.param( @@ -468,12 +470,12 @@ def fixture_filled_test_data(template, tmp_path): pytest.param(TEMPLATE, "", id="valid-data-dict"), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"), - "The required group, /ENTRY/required_group, hasn't been supplied.", + "The required group, /ENTRY[entry]/required_group, hasn't been supplied.", id="missing-empty-yet-required-group", ), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"), - "The required group, /ENTRY/required_group2, hasn't been supplied.", + "The required group, /ENTRY[entry]/required_group2, hasn't been supplied.", id="missing-empty-yet-required-group2", ), pytest.param( @@ -484,7 +486,7 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[entry]/required_group", None, ), - "The required group, /ENTRY/required_group, hasn't been supplied.", + "The required group, /ENTRY[entry]/required_group, hasn't been supplied.", id="allow-required-and-empty-group", ), pytest.param(