From 1207c263fbcd1833882a2a9e74d87619388764af Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 12:57:48 +0100 Subject: [PATCH 01/10] Added a test to test_parser.py which tests extraction of single character expressions and -references from block_content --- tests/test_parser.py | 86 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index 0f9a0eb1..1771d487 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -599,6 +599,92 @@ def test_extract_expressions(self): assert list(dict.expressions.values())[8]["name"][:10] == "EXPRESSION" assert list(dict.expressions.values())[8]["expression"] == "$varName1[1][2]" + def test_extract_single_character_expressions(self): + # sourcery skip: avoid-builtin-shadow + # Prepare + dict = CppDict() + parser = CppParser() + text_block_in = ( + "This is a text block\n" + "with multiple lines. Within this text block, there are key value pairs where the value\n" + "is a string surrounded by double quotes and containing at least one reference to a variable starting with $.\n" + "Such strings are identified as expressions. Expressions will be evaluated by DictReader.\n" + "The following examples will be identified as expressions:\n" + " reference1 $a\n" + " reference2 $a[0]\n" + " reference3 $a[1][2]\n" + ' expression1 "$a"\n' + ' expression2 "$b + 4"\n' + ' expression3 "4 + $b"\n' + ' expression4 "$b + $c" and some blabla thereafter\n' + ' expression5 "$a + $b + $c" and some blabla thereafter\n' + ' expression6 "$b + $c + $a" and some blabla thereafter\n' + "The following example will NOT be identified as expression but as string literal:\n" + " string1 '$a is not an expression but a string literal because it is in single instead of double quotes'\n" + ' string2 "not an expression but a string literal as it does not contain a Dollar character"\n' + "_extract_expressions() will extract expressions and substitute them with a placeholder\n" + "in the form E X P R E S S I O N 0 0 0 0 0 0." + "The actual evaluation of an expression is not part of _extract_expressions(). The evaluation is done within ()." + ) + text_block_expected = ( + "This is a text block\n" + "with multiple lines. Within this text block, there are key value pairs where the value\n" + "is a string surrounded by double quotes and containing at least one reference to a variable starting with $.\n" + "Such strings are identified as expressions. Expressions will be evaluated by DictReader.\n" + "The following examples will be identified as expressions:\n" + " reference1 EXPRESSION000000\n" + " reference2 EXPRESSION000000\n" + " reference3 EXPRESSION000000\n" + " expression1 EXPRESSION000000\n" + " expression2 EXPRESSION000000\n" + " expression3 EXPRESSION000000\n" + " expression4 EXPRESSION000000 and some blabla thereafter\n" + " expression5 EXPRESSION000000 and some blabla thereafter\n" + " expression6 EXPRESSION000000 and some blabla thereafter\n" + "The following example will NOT be identified as expression but as string literal:\n" + " string1 STRINGLITERAL000000\n" + " string2 STRINGLITERAL000000\n" + "_extract_expressions() will extract expressions and substitute them with a placeholder\n" + "in the form E X P R E S S I O N 0 0 0 0 0 0." + "The actual evaluation of an expression is not part of _extract_expressions(). The evaluation is done within ()." + ) + dict.block_content = text_block_in + parser._extract_string_literals(dict) + # Execute + parser._extract_expressions(dict) + # Assert + text_block_out = re.sub(r"[0-9]{6}", "000000", dict.block_content) + assert text_block_out == text_block_expected + string_diff(text_block_out, text_block_expected) + assert len(dict.expressions) == 9 + + assert list(dict.expressions.values())[0]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[0]["expression"] == "$a" + + assert list(dict.expressions.values())[1]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[1]["expression"] == "$b + 4" + + assert list(dict.expressions.values())[2]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[2]["expression"] == "4 + $b" + + assert list(dict.expressions.values())[3]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[3]["expression"] == "$b + $c" + + assert list(dict.expressions.values())[4]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[4]["expression"] == "$a + $b + $c" + + assert list(dict.expressions.values())[5]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[5]["expression"] == "$b + $c + $a" + + assert list(dict.expressions.values())[6]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[6]["expression"] == "$a" + + assert list(dict.expressions.values())[7]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[7]["expression"] == "$a[0]" + + assert list(dict.expressions.values())[8]["name"][:10] == "EXPRESSION" + assert list(dict.expressions.values())[8]["expression"] == "$a[1][2]" + def test_separate_delimiters(self): # sourcery skip: avoid-builtin-shadow # sourcery skip: no-loop-in-tests From c1c51c3ced418b9a1e9b44eb80d7d39c7da2b702 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 12:59:15 +0100 Subject: [PATCH 02/10] improved Parser classes in parser.py to correctly identify and resolve single character references --- src/dictIO/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dictIO/parser.py b/src/dictIO/parser.py index 2e74b42c..74636aa8 100644 --- a/src/dictIO/parser.py +++ b/src/dictIO/parser.py @@ -699,7 +699,7 @@ def _extract_expressions(self, dict: CppDict): dict.expressions |= {index: {"expression": expression, "name": placeholder}} # Step 2: Find references in .block_content (single references to key'd entries that are NOT in double quotes). - search_pattern = r"\$\w[\w\[\]]+" + search_pattern = r"\$\w[\w\[\]]*" while match := re.search(search_pattern, dict.block_content, re.MULTILINE): reference = match[0] index = self.counter() @@ -1346,14 +1346,14 @@ def _extract_expression( # References are denoted using the '$' syntax familiar from shell programming. # Any key'd entries in a dict are considered variables and can be referenced. # If string does not contain minimum one reference, return. - search_pattern = r"\$\w[\w\[\]]+" + search_pattern = r"\$\w[\w\[\]]*" references = re.findall(search_pattern, string, re.MULTILINE) if not references: return string # Case 1: Reference # The string contains only a single plain reference (single reference to a key'd entry in the parsed dict). - search_pattern = r"^\s*(\$\w[\w\[\]]+){1}\s*$" + search_pattern = r"^\s*(\$\w[\w\[\]]*){1}\s*$" if match := re.search(search_pattern, string, re.MULTILINE): reference: str = match.groups()[0] # Replace the reference in string with a placeholder (EXPRESSION000000) and register it in parsed_dict: From e872746cea504dee9def4864e9b226ebf8cb88a6 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 12:56:25 +0100 Subject: [PATCH 03/10] Added a test dict with single character variables and -references --- .../test_single_character_vars_dict | 21 +++++++++++++++++++ .../test_single_character_vars_paramDict | 10 +++++++++ 2 files changed, 31 insertions(+) create mode 100644 tests/test_dicts/test_single_character_vars_dict create mode 100644 tests/test_dicts/test_single_character_vars_paramDict diff --git a/tests/test_dicts/test_single_character_vars_dict b/tests/test_dicts/test_single_character_vars_dict new file mode 100644 index 00000000..71d8ba34 --- /dev/null +++ b/tests/test_dicts/test_single_character_vars_dict @@ -0,0 +1,21 @@ +/*---------------------------------*- C++ -*----------------------------------*\ +filetype dictionary; coding utf-8; version 0.1; local --; purpose --; +\*----------------------------------------------------------------------------*/ + +#include 'test_single_character_vars_paramDict' + +a 1.0; +b true; + +c +{ + d 4.0; + e false; + f $x; + g $y; + h $h; + i $i; +} + +j $z; +k $k; diff --git a/tests/test_dicts/test_single_character_vars_paramDict b/tests/test_dicts/test_single_character_vars_paramDict new file mode 100644 index 00000000..5fac978a --- /dev/null +++ b/tests/test_dicts/test_single_character_vars_paramDict @@ -0,0 +1,10 @@ +/*---------------------------------*- C++ -*----------------------------------*\ +filetype dictionary; coding utf-8; version 0.1; local --; purpose --; +\*----------------------------------------------------------------------------*/ + +x 6.0; +y true; +h 8.0; +i false; +z 10.0; +k 12.0; From 2e60106c29f5f3edad56259b94100f88d1af0039 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 13:00:12 +0100 Subject: [PATCH 04/10] Added a test to test_dictReader that reads and parses the new test dict with single single character expressions and -references --- tests/test_dictReader.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/test_dictReader.py b/tests/test_dictReader.py index 08d57074..12125cab 100644 --- a/tests/test_dictReader.py +++ b/tests/test_dictReader.py @@ -384,7 +384,7 @@ def test_compare_expressions_in_dict_format_with_expressions_in_json_format(): def _get_references_in_expressions(dict: CppDict) -> List[str]: references: List[str] = [] for item in dict.expressions.values(): - _refs: List[str] = re.findall(r"\$\w[\w\[\]]+", item["expression"]) + _refs: List[str] = re.findall(r"\$\w[\w\[\]]*", item["expression"]) references.extend(_refs) return references @@ -562,6 +562,34 @@ def test_reread_strings_dict(): assert reread_dict["subDict"]["string_07_sq_escdq_word"] == r"quote(\"string_07_sq_escdq_word\")" +def test_single_character_vars(): + # sourcery skip: avoid-builtin-shadow + # Prepare + source_file = Path("test_single_character_vars_dict") + # Execute + dict = DictReader.read(source_file, includes=False) + # Assert single character variables are properly parsed + assert dict["a"] == 1.0 + assert dict["b"] is True + assert dict["c"]["d"] == 4.0 + assert dict["c"]["e"] is False + + +def test_single_character_references(): + # sourcery skip: avoid-builtin-shadow + # Prepare + source_file = Path("test_single_character_vars_dict") + # Execute + dict = DictReader.read(source_file) + # Assert included dict has been merged and single character references been resolved + assert dict["c"]["f"] == 6.0 + assert dict["c"]["g"] is True + assert dict["c"]["h"] == 8.0 + assert dict["c"]["i"] is False + assert dict["j"] == 10.0 + assert dict["k"] == 12.0 + + class SetupHelper: @staticmethod def prepare_dict_until( From a8f25a6c0e2195d1bda93db9d5061056c1b80d1d Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 13:01:32 +0100 Subject: [PATCH 05/10] improved also formatter.py and dictReader.py. Replaced all occurences of '\$\w[\w\[\]]+' with '\$\w[\w\[\]]*' (-> using * in regex instead of + -> That was the problem) --- src/dictIO/dictReader.py | 6 +++--- src/dictIO/formatter.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dictIO/dictReader.py b/src/dictIO/dictReader.py index 5df57dfd..959b0bc8 100644 --- a/src/dictIO/dictReader.py +++ b/src/dictIO/dictReader.py @@ -211,7 +211,7 @@ def _eval_expressions(dict: CppDict): # Collect all references contained in expressions _references: List[str] = [] for item in dict.expressions.values(): - _refs: List[str] = re.findall(r"\$\w[\w\[\]]+", item["expression"]) + _refs: List[str] = re.findall(r"\$\w[\w\[\]]*", item["expression"]) _references.extend(_refs) # Resolve references variables: Dict[str, Any] = dict.variables @@ -233,7 +233,7 @@ def _eval_expressions(dict: CppDict): for key, item in expressions_copy.items(): placeholder: str = item["name"] expression: str = item["expression"] - _refs: List[str] = re.findall(r"\$\w[\w\[\]]+", expression) + _refs: List[str] = re.findall(r"\$\w[\w\[\]]*", expression) for ref in _refs: if ref in references_resolved: expression = re.sub( @@ -265,7 +265,7 @@ def _eval_expressions(dict: CppDict): # At the end of each iteration, re-resolve all references based on the now updated variables table of dict _references = [] for item in dict.expressions.values(): - _refs = re.findall(r"\$\w[\w\[\]]+", item["expression"]) + _refs = re.findall(r"\$\w[\w\[\]]*", item["expression"]) _references.extend(_refs) variables = dict.variables references = {ref: __class__._resolve_reference(ref, variables) for ref in _references} diff --git a/src/dictIO/formatter.py b/src/dictIO/formatter.py index c2002c7b..79528d75 100644 --- a/src/dictIO/formatter.py +++ b/src/dictIO/formatter.py @@ -211,7 +211,7 @@ def format_string(self, arg: str) -> str: the formatted string """ if re.search(r"[$]", arg): - if re.search(r"^\$\w[\w\[\]]+$", arg): # reference + if re.search(r"^\$\w[\w\[\]]*$", arg): # reference return self.format_reference_string(arg) else: # expression return self.format_expression_string(arg) From 6e61c53aac85b9d173835d14fb5d416bcc1ad6f5 Mon Sep 17 00:00:00 2001 From: fralum Date: Tue, 9 Jan 2024 15:59:01 +0100 Subject: [PATCH 06/10] test for single char reference lists, one issue, apparently not related to single char branch --- tests/test_dictReader.py | 5 ++++- tests/test_dicts/test_single_character_vars_dict | 8 ++++++++ tests/test_dicts/test_single_character_vars_paramDict | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_dictReader.py b/tests/test_dictReader.py index 12125cab..661a98ed 100644 --- a/tests/test_dictReader.py +++ b/tests/test_dictReader.py @@ -588,7 +588,10 @@ def test_single_character_references(): assert dict["c"]["i"] is False assert dict["j"] == 10.0 assert dict["k"] == 12.0 - + assert dict["u"] == 3 + assert dict["v"] == "Alice" + assert dict["w"] == "paragliding contest" + #assert dict["ww"] == "AliceandBobfailtheparagliding contest" class SetupHelper: @staticmethod diff --git a/tests/test_dicts/test_single_character_vars_dict b/tests/test_dicts/test_single_character_vars_dict index 71d8ba34..15eaf544 100644 --- a/tests/test_dicts/test_single_character_vars_dict +++ b/tests/test_dicts/test_single_character_vars_dict @@ -7,6 +7,9 @@ filetype dictionary; coding utf-8; version 0.1; local --; purpose --; a 1.0; b true; +n (0 1 2 3); +m (Alice and Bob fail the "paragliding contest"); + c { d 4.0; @@ -19,3 +22,8 @@ c j $z; k $k; + +u $n[3]; +v $m[0]; +w $m[5]; +ww '$m[0] + $m[1] + $m[2] + $m[3] + $m[4] + $m[5]'; \ No newline at end of file diff --git a/tests/test_dicts/test_single_character_vars_paramDict b/tests/test_dicts/test_single_character_vars_paramDict index 5fac978a..1fa426f6 100644 --- a/tests/test_dicts/test_single_character_vars_paramDict +++ b/tests/test_dicts/test_single_character_vars_paramDict @@ -2,6 +2,8 @@ filetype dictionary; coding utf-8; version 0.1; local --; purpose --; \*----------------------------------------------------------------------------*/ +a -1.0; +b false; x 6.0; y true; h 8.0; From a6ea48fc2a5ca7d536fdeebfc7cca8783a7219b8 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 17:05:15 +0100 Subject: [PATCH 07/10] solved the issue in test_single_character_vars_dict that led test_single_character_references() in test_dictReader.py fail on line 594, the test with multiple string references concatenated. ("AliceandBobfailtheparagliding contest") --- tests/test_dictReader.py | 3 ++- tests/test_dicts/test_single_character_vars_dict | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_dictReader.py b/tests/test_dictReader.py index 661a98ed..1aed401c 100644 --- a/tests/test_dictReader.py +++ b/tests/test_dictReader.py @@ -591,7 +591,8 @@ def test_single_character_references(): assert dict["u"] == 3 assert dict["v"] == "Alice" assert dict["w"] == "paragliding contest" - #assert dict["ww"] == "AliceandBobfailtheparagliding contest" + assert dict["ww"] == "AliceandBobfailtheparagliding contest" + class SetupHelper: @staticmethod diff --git a/tests/test_dicts/test_single_character_vars_dict b/tests/test_dicts/test_single_character_vars_dict index 15eaf544..160f20bc 100644 --- a/tests/test_dicts/test_single_character_vars_dict +++ b/tests/test_dicts/test_single_character_vars_dict @@ -26,4 +26,4 @@ k $k; u $n[3]; v $m[0]; w $m[5]; -ww '$m[0] + $m[1] + $m[2] + $m[3] + $m[4] + $m[5]'; \ No newline at end of file +ww "$m[0]$m[1]$m[2]$m[3]$m[4]$m[5]"; From a55e5a628f05de02859c20070e76089891dcbba3 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 13:20:19 +0100 Subject: [PATCH 08/10] updated CHANGELOG.md --- CHANGELOG.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9252eaee..e128d409 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ The changelog format is based on [Keep a Changelog](https://keepachangelog.com/e * -/- +## [0.3.1] - 2024-01-09 + +### Solved + +* Solved a bug that led to single character references not being identified + (solves [#14](https://github.com/dnv-opensource/dictIO/issues/14)). + + ## [0.3.0] - 2024-01-08 ### Changed @@ -221,7 +229,8 @@ The changelog format is based on [Keep a Changelog](https://keepachangelog.com/e * Added support for Python 3.10 -[unreleased]: https://github.com/dnv-opensource/dictIO/compare/v0.3.0...HEAD +[unreleased]: https://github.com/dnv-opensource/dictIO/compare/v0.3.1...HEAD +[0.3.1]: https://github.com/dnv-opensource/dictIO/compare/v0.3.0...v0.3.1 [0.3.0]: https://github.com/dnv-opensource/dictIO/compare/v0.2.9...v0.3.0 [0.2.9]: https://github.com/dnv-opensource/dictIO/compare/v0.2.8...v0.2.9 [0.2.8]: https://github.com/dnv-opensource/dictIO/compare/v0.2.7...v0.2.8 From ada1d4b79c3f7036fe2d21975ae78aa7d4d08e04 Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 13:22:14 +0100 Subject: [PATCH 09/10] README and LICENSE: updated '2023' to '2024' --- LICENSE | 2 +- README.md | 4 ++-- docs/source/conf.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/LICENSE b/LICENSE index c8d9fe4c..90b0cc37 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 [DNV](https://www.dnv.com) [open source](https://github.com/dnv-opensource) +Copyright (c) 2024 [DNV](https://www.dnv.com) [open source](https://github.com/dnv-opensource) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 277a9264..a1879100 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ _For a detailed documentation of the dict file format used by dictIO, see [File ## Meta -Copyright (c) 2023 [DNV](https://www.dnv.com) [open source](https://github.com/dnv-opensource) +Copyright (c) 2024 [DNV](https://www.dnv.com) [open source](https://github.com/dnv-opensource) Frank Lumpitzsch – [@LinkedIn](https://www.linkedin.com/in/frank-lumpitzsch-23013196/) – frank.lumpitzsch@dnv.com @@ -151,4 +151,4 @@ For your contribution, please make sure you follow the [STYLEGUIDE](STYLEGUIDE.m [dictIO_docs]: https://dnv-opensource.github.io/dictIO/README.html [ospx_docs]: https://dnv-opensource.github.io/ospx/README.html -[farn_docs]: https://dnv-opensource.github.io/farn/README.html \ No newline at end of file +[farn_docs]: https://dnv-opensource.github.io/farn/README.html diff --git a/docs/source/conf.py b/docs/source/conf.py index bce900cf..82979058 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = "dictIO" -copyright = "2023, DNV. Frank Lumpitzsch, Claas Rostock, Seung Hyeon Yoo" +copyright = "2024, DNV. Frank Lumpitzsch, Claas Rostock, Seung Hyeon Yoo" author = "Frank Lumpitzsch, Claas Rostock, Seung Hyeon Yoo" # The full version, including alpha/beta/rc tags From d1a3a7450534b3e2dc48866bdaa77f6c332bb82d Mon Sep 17 00:00:00 2001 From: Claas Date: Tue, 9 Jan 2024 13:22:56 +0100 Subject: [PATCH 10/10] bumped version number to 0.3.1 --- docs/source/conf.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 82979058..64a135c8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = "Frank Lumpitzsch, Claas Rostock, Seung Hyeon Yoo" # The full version, including alpha/beta/rc tags -release = "0.3.0" +release = "0.3.1" # -- General configuration --------------------------------------------------- diff --git a/setup.cfg b/setup.cfg index 3a7aedec..6f0ccb65 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = dictIO -version = 0.3.0 +version = 0.3.1 summary = Read, write and manipulate dictionary text files. description = Python package to read, write and manipulate dictionary text files. Supports dictIOs dict file format, as well as JSON, XML and OpenFOAM. long_description = file: README.md