Skip to content

Commit

Permalink
formatter.py and parser.py : refactored and cleaned up
Browse files Browse the repository at this point in the history
  • Loading branch information
ClaasRostock committed Nov 10, 2024
1 parent 0505d32 commit 3299dd5
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 116 deletions.
178 changes: 108 additions & 70 deletions src/dictIO/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import io
import logging
import re
from abc import abstractmethod
from collections.abc import Mapping, MutableMapping, MutableSequence
from copy import deepcopy
from copy import copy, deepcopy
from re import Pattern
from typing import TYPE_CHECKING, cast
from typing import TYPE_CHECKING, TypeVar, cast, overload
from xml.dom import minidom
from xml.etree.ElementTree import Element, SubElement, register_namespace, tostring

Expand All @@ -27,6 +28,12 @@
"XmlFormatter",
]


_KT = TypeVar("_KT", bound=TKey)
_VT = TypeVar("_VT", bound=TValue)
_MT = TypeVar("_MT", bound=MutableMapping[_KT, _VT]) # type: ignore[valid-type, reportGeneralTypeIssues]
_ST = TypeVar("_ST", bound=MutableSequence[_VT]) # type: ignore[valid-type, reportGeneralTypeIssues]

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -72,9 +79,10 @@ def get_formatter(cls, target_file: Path | None = None) -> Formatter:
# 2. If no target file is passed, return CppFormatter as default / fallback
return CppFormatter() # default

@abstractmethod
def to_string(
self,
arg: MutableMapping[TKey, TValue] | SDict[TKey, TValue], # noqa: ARG002
arg: MutableMapping[TKey, TValue] | SDict[TKey, TValue],
) -> str:
"""Create a string representation of the passed in dict.
Expand All @@ -90,45 +98,26 @@ def to_string(
str
string representation of the dict
"""
return ""

def format_dict(
self,
arg: MutableMapping[TKey, TValue] | MutableSequence[TValue] | TValue, # noqa: ARG002
) -> str:
"""Format a dict or list object.
Note: Override this method when implementing a specific Formatter.
Parameters
----------
arg : Union[MutableMapping[TKey, TValue], MutableSequence[TValue], TValue]
the dict or list to be formatted
Returns
-------
str
the formatted string representation of the passed in dict or list
"""
return ""
...

def format_value(
self,
arg: TSingleValue,
) -> str:
arg: TSingleValue | TValue,
) -> str | TValue:
"""Format a single value.
Formats a single value of type TSingleValue = str | int | float | bool | None
Parameters
----------
arg : TSingleValue
arg : TSingleValue | TValue
the value to be formatted
Returns
-------
str
the formatted string representation of the passed in value
str | TValue
the formatted string representation of the passed in value,
if value is of a single value type. Otherwise the value itself.
"""
# sourcery skip: assign-if-exp, reintroduce-else

Expand All @@ -140,9 +129,73 @@ def format_value(
return self.format_bool(arg)
if isinstance(arg, int):
return self.format_int(arg)
if isinstance(arg, float): # pyright: ignore[reportUnnecessaryIsInstance]
if isinstance(arg, float):
return self.format_float(arg)
return str(arg)

# If arg is not of a single value type, return it as is.
return arg

@overload
def format_values(
self,
arg: _MT,
) -> _MT:
pass

@overload
def format_values(
self,
arg: _ST,
) -> _ST:
pass

def format_values(
self,
arg: _MT | _ST,
) -> _MT | _ST:
"""Format multiple values.
Formats all values inside a dict or list.
The function traverses the passed in dict or list recursively
so that all values in also nested dicts and lists get formatted.
A copy of the passed in dict or list is returned with all values formatted.
(The original dict or list is not modified.)
Parameters
----------
arg : Union[MutableMapping[TKey, TValue], MutableSequence[TValue]]
the dict or list containing the values to be formatted.
Returns
-------
MutableMapping[TKey, str] | MutableSequence[str]
a copy of the passed in dict or list, with all values formatted.
"""
item: TValue
_arg = copy(arg) # shallow copy is sufficient because this function is recursive
if isinstance(_arg, MutableSequence): # List
for index in range(len(_arg)):
item = _arg[index]
# ndarray -> list
if isinstance(item, ndarray):
item = cast(list[TValue], item.tolist())
if isinstance(item, MutableMapping | MutableSequence):
_arg[index] = self.format_values(cast(MutableMapping[TKey, TValue] | MutableSequence[TValue], item))
else:
_arg[index] = self.format_value(item)

else: # Dict
for key in list(_arg.keys()): # work on a copy of keys
item = _arg[key]
# ndarray -> list
if isinstance(item, ndarray):
item = cast(list[TValue], item.tolist())
if isinstance(item, MutableMapping | MutableSequence):
_arg[key] = self.format_values(cast(MutableMapping[TKey, TValue] | MutableSequence[TValue], item))
else:
_arg[key] = self.format_value(item)

return cast(_MT | _ST, _arg)

def format_key(
self,
Expand Down Expand Up @@ -423,14 +476,11 @@ def to_string(
str
string representation of the dict in dictIO native file format
"""
s: str = super().to_string(arg)

# Create a working copy of the passed in dict, to avoid modifying the original.
s_dict: SDict[TKey, TValue]
s_dict = deepcopy(arg) if isinstance(arg, SDict) else SDict(deepcopy(arg))
_arg = deepcopy(arg)

# Sort dict in a way that block comment and include statement come first
original_data = deepcopy(s_dict)
original_data = deepcopy(_arg)
sorted_data: dict[TKey, TValue] = {}
for key, element in original_data.items():
if type(key) is str and re.search(r"BLOCKCOMMENT\d{6}", key):
Expand All @@ -441,40 +491,28 @@ def to_string(
for key in sorted_data:
del original_data[key]
sorted_data |= original_data
s_dict.clear()
s_dict.update(sorted_data)

# Create the string representation of the dictionary in its basic structure.
s += self.format_dict(s_dict)

# The following elements a SDict's .data attribute
# are usually still substituted by placeholders:
# - Block comments
# - Include directives
# - Line comments
# Next step hence is to resolve and insert these three element types:
# 1. Block comments
s = self.insert_block_comments(s_dict, s)
# 2. Include directives
s = self.insert_includes(s_dict, s)
# 3. Line comments
s = self.insert_line_comments(s_dict, s)
_arg.clear()
_arg.update(sorted_data)

# Create the string representation of the dict in its basic structure.
s: str = self.format_dict(_arg)

if isinstance(_arg, SDict):
# The following elements in an SDict
# are usually still substituted by placeholders:
# - Block comments
# - Include directives
# - Line comments
# Next step hence is to resolve and insert these three element types:
# 1. Block comments
s = self.insert_block_comments(_arg, s)
# 2. Include directives
s = self.insert_includes(_arg, s)
# 3. Line comments
s = self.insert_line_comments(_arg, s)

# Remove trailing spaces (if any)
s = self.remove_trailing_spaces(s)
"""
# Log variables and includes for debugging purposes
# variables
variables = dict.variables
if variables:
log_message = self.format_dict({'_variables': variables})
logger.debug(log_message)
# includes (the paths of all included files)
includes = dict.includes
if includes:
log_message = self.format_dict({'_includes': [j for i, j in includes.values()]})
logger.debug(log_message)
"""

# Return formatted string
return s
Expand Down Expand Up @@ -543,6 +581,7 @@ def format_dict(
# single value
else:
value = self.format_value(item)
assert isinstance(value, str)
if first_item_on_this_line:
# The first item shall be indented by 1 relative to the (absolute) list level
item_level = level + 1
Expand Down Expand Up @@ -606,6 +645,7 @@ def format_dict(
# key value pair
else:
value = self.format_value(item)
assert isinstance(value, str)
skey: str = self.format_key(key)
s += self.format_dict(
f"{skey}{sep * max(8, (total_indent - len(skey) - tab_len * level))}{value};",
Expand Down Expand Up @@ -985,11 +1025,9 @@ def to_string(
"""
import json

# For the json dump, we need to distinguish between whether the passed in dict is of type dict or SDict.
d = arg
# Json dump
s = json.dumps(
d,
obj=arg,
skipkeys=True,
ensure_ascii=True,
check_circular=True,
Expand Down
46 changes: 23 additions & 23 deletions src/dictIO/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,29 +262,6 @@ def parse_value(
# Returned 'as is' they are kept unchanged, what is in fact what we want here.
return self.remove_quotes_from_string(arg)

def parse_key(
self,
arg: str,
) -> TKey:
"""Parse a single key.
Parses a single key and casts it to its native type (TKey = str | int).
Parameters
----------
arg : str
the value to be parsed
Returns
-------
TKey
the value casted to its native type (TKey = str | int)
"""
key: TSingleValue = self.parse_value(arg)
if not isinstance(key, TKey):
raise TypeError(f"Key '{key}' is not of type TKey. Found type: {type(key)}")
return key

def parse_values(self, arg: MutableMapping[TKey, TValue] | MutableSequence[TValue]) -> None:
"""Parse multiple values.
Expand Down Expand Up @@ -313,6 +290,29 @@ def parse_values(self, arg: MutableMapping[TKey, TValue] | MutableSequence[TValu
arg[index] = self.parse_value(arg[index])
return

def parse_key(
self,
arg: str,
) -> TKey:
"""Parse a single key.
Parses a single key and casts it to its native type (TKey = str | int).
Parameters
----------
arg : str
the value to be parsed
Returns
-------
TKey
the value casted to its native type (TKey = str | int)
"""
key: TSingleValue = self.parse_value(arg)
if not isinstance(key, TKey):
raise TypeError(f"Key '{key}' is not of type TKey. Found type: {type(key)}")
return key

@staticmethod
def remove_quotes_from_string(
arg: str,
Expand Down
23 changes: 0 additions & 23 deletions tests/test_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import pytest

from dictIO import CppFormatter, DictReader, FoamFormatter, SDict, XmlFormatter
from dictIO.formatter import Formatter

if TYPE_CHECKING:
from dictIO.types import TKey, TValue
Expand All @@ -18,28 +17,6 @@ class TestFormatter:
def test_returned_formatter_type(self) -> None:
pass

def test_to_string_does_not_alter_original(self) -> None:
# Prepare
dict_in = DictReader.read(Path("test_formatter_dict"))
formatter = Formatter()
dict_in_reference = dict_in
dict_in_shallowcopy = copy(dict_in)
# Execute
str_out: str = formatter.to_string(dict_in)
# Assert
assert isinstance(str_out, str)
# Make sure that the original dictionary is not modified
assert dict_in == dict_in_reference
assert dict_in is dict_in_reference
for key in dict_in:
assert dict_in[key] == dict_in_reference[key]
assert dict_in[key] is dict_in_reference[key]
assert dict_in == dict_in_shallowcopy
assert dict_in is not dict_in_shallowcopy
for key in dict_in:
assert dict_in[key] == dict_in_shallowcopy[key]
assert dict_in[key] is dict_in_shallowcopy[key]


class TestCppFormatter:
@pytest.mark.parametrize(
Expand Down

0 comments on commit 3299dd5

Please sign in to comment.