Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

👌 IMPROVE: Support for keeping abbreviations at enclosing values. #491

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions bibtexparser/middlewares/enclosing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from typing import Tuple
from typing import Union

Expand Down Expand Up @@ -85,6 +86,7 @@ def __init__(
reuse_previous_enclosing: bool,
enclose_integers: bool,
default_enclosing: str,
keep_abbr_string: bool = False,
allow_inplace_modification: bool = True,
):
"""
Expand All @@ -95,6 +97,8 @@ def __init__(
(only of no previous enclosing was applied)
:param default_enclosing: The default enclosing character to use ('{', '"', or 'no-enclosing')
(only of no previous enclosing was applied, and - for ints - enclose_integers is False)
:keep_abbr_string: Whether to keep the abbreviation (e.g., 'IEEE_J_PAMI').
(only of no previous enclosing was applied)
:param allow_inplace_modification: Whether to allow inplace modification
(see BlockMiddleware docs).
"""
Expand All @@ -110,19 +114,31 @@ def __init__(
self._default_enclosing = default_enclosing
self._reuse_previous_enclosing = reuse_previous_enclosing
self._enclose_integers = enclose_integers
self._keep_abbr_string = keep_abbr_string

# docstr-coverage: inherited
@classmethod
def metadata_key(cls) -> str:
return "remove_enclosing"

def _enclose(self, value: str, metadata_enclosing: str, apply_int_rule: bool) -> str:
def _enclose(
self,
value: str,
metadata_enclosing: str,
apply_int_rule: bool,
replaced_abbr: bool,
) -> str:
enclosing = self._default_enclosing
if self._reuse_previous_enclosing and metadata_enclosing is not None:
enclosing = metadata_enclosing
elif apply_int_rule and not self._enclose_integers and value.isdigit():
return value
enclosing = "no-enclosing"
elif not replaced_abbr and self._keep_abbr_string:
if self._is_value_containing_abbr(value):
enclosing = "no-enclosing"
return self._enclose_value(value, enclosing)

def _enclose_value(self, value: str, enclosing: str) -> str:
if enclosing == "{":
return f"{{{value}}}"
if enclosing == '"':
Expand All @@ -133,18 +149,32 @@ def _enclose(self, value: str, metadata_enclosing: str, apply_int_rule: bool) ->
f"enclosing must be either '{{' or '\"' or 'no-enclosing', " f"not '{enclosing}'"
)

def _is_value_containing_abbr(self, value: str) -> bool:
is_invalid_abbr = False
for _s in value.split("#"):
_s = _s.strip()
# is not a valid string is enclosed in quotes,
if not (_s.startswith('"') and _s.endswith('"')):
# and is a invalid abbreviation starts with a letter and contains only letters, digits and underscores
if re.fullmatch(r"[A-Za-z][A-Za-z0-9_]*", _s) is None:
is_invalid_abbr = True
break
return not is_invalid_abbr

# docstr-coverage: inherited
def transform_entry(self, entry: Entry, *args, **kwargs) -> Entry:
field: Field
metadata_enclosing = entry.parser_metadata.pop(
RemoveEnclosingMiddleware.metadata_key(), None
)
metadata_enclosing = entry.parser_metadata.pop(RemoveEnclosingMiddleware.metadata_key(), {})
# NOTE: this is a ugly hack to check if the string was resolved by the ResolveStringReferencesMiddleware
# we can't import the class directly because of circular imports
# maybe we should add a shared module containing all metadata keys
metadata_resolving: list = entry.parser_metadata.get("ResolveStringReferences", [])
for field in entry.fields:
apply_int_rule = field.key in ENTRY_POTENTIALLY_INT_FIELDS
prev_encoding = (
metadata_enclosing.get(field.key, None) if metadata_enclosing is not None else None
field.value = self._enclose(
field.value,
metadata_enclosing=metadata_enclosing.get(field.key, None),
apply_int_rule=field.key in ENTRY_POTENTIALLY_INT_FIELDS,
replaced_abbr=field.key in metadata_resolving,
)
field.value = self._enclose(field.value, prev_encoding, apply_int_rule=apply_int_rule)
return entry

# docstr-coverage: inherited
Expand All @@ -154,5 +184,6 @@ def transform_string(self, string: String, *args, **kwargs) -> String:
string.value,
string.parser_metadata.get(metadata_key),
apply_int_rule=STRINGS_CAN_BE_UNESCAPED_INTS,
replaced_abbr=False,
)
return string
94 changes: 94 additions & 0 deletions tests/middleware_tests/test_enclosing.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,100 @@ def _figure_out_added_enclosing(changed_value, value):
return used_enclosing


@pytest.mark.parametrize("metadata_resolving", ["", "journal"])
@pytest.mark.parametrize("metadata_enclosing", ["{", '"', "no-enclosing", None])
@pytest.mark.parametrize("default_enclosing", ["{", '"'])
@pytest.mark.parametrize("enclose_ints", [True, False], ids=["enclose_ints", "no_enclose_ints"])
@pytest.mark.parametrize(
"keep_abbr_string", [True, False], ids=["keep_abbr_string", "no_keep_abbr_string"]
)
@pytest.mark.parametrize("reuse_previous_enclosing", [True, False], ids=["reuse", "no_reuse"])
@pytest.mark.parametrize(
"value",
[
# value, is a abbreviation?
("IEEE_T_PAMI", True),
('IEEE_T_PAMI # "ieee tpami"', True),
('IEEE_T_PAMI" # ieee tpami', False),
('IEEE_T-PAMI # "ieee tpami"', False),
('IEEE_T-PAMI # "ieee # tpami"', False),
('IEEE T-PAMI # "ieee tpami"', False),
],
)
@pytest.mark.parametrize("inplace", [True, False], ids=["inplace", "not_inplace"])
def test_addition_of_enclosing_on_entry_with_abbr(
value: tuple,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value: tuple,
value: tuple[str,bool],

metadata_resolving: str,
keep_abbr_string: bool,
metadata_enclosing: str,
default_enclosing: str,
enclose_ints: bool,
reuse_previous_enclosing: bool,
inplace: bool,
):
"""Extensive Matrix-Testing of the AddEnclosingMiddleware on Entries.

Also covers the internals for other block types (i.e., String),
which thus can be tested more light-weight."""
# These values not matter for this unit test,
# but must not change during transformation
# (hence, they are created as variables, not directly in Entry constructor)
value, is_abbr = value
input_entry = Entry(
start_line=5,
entry_type="article",
raw="<--- does not matter for this unit test -->",
key="someKey",
fields=[Field(value=value, start_line=6, key="journal")],
)

if metadata_resolving:
input_entry.parser_metadata["ResolveStringReferences"] = [metadata_resolving]
if metadata_enclosing is not None:
input_entry.parser_metadata["removed_enclosing"] = {"journal": metadata_enclosing}

middleware = AddEnclosingMiddleware(
allow_inplace_modification=inplace,
default_enclosing=default_enclosing,
reuse_previous_enclosing=reuse_previous_enclosing,
enclose_integers=enclose_ints,
keep_abbr_string=keep_abbr_string,
)

transformed_library = middleware.transform(library=Library([input_entry]))

# Assert correct library state
assert len(transformed_library.blocks) == 1
assert len(transformed_library.entries) == 1
# Assert correct addition of enclosing
transformed = transformed_library.entries[0]
changed_value = transformed["journal"]

# Assert correct enclosing was added
if reuse_previous_enclosing and metadata_enclosing is not None:
expected_enclosing = metadata_enclosing
elif (isinstance(value, int) or value.isdigit()) and not enclose_ints:
expected_enclosing = "no-enclosing"
elif not metadata_resolving and keep_abbr_string:
if is_abbr:
expected_enclosing = "no-enclosing"
else:
expected_enclosing = default_enclosing
else:
expected_enclosing = default_enclosing

if expected_enclosing == "no-enclosing":
_skip_pseudo_enclosing_value(value)

assert changed_value == middleware._enclose_value(value, expected_enclosing)

# Assert remaining fields are unchanged
assert_nonfield_entry_attributes_unchanged(input_entry, transformed)

# Assert `allow_inplace_modification` is respected
assert_inplace_is_respected(inplace, input_entry, transformed)


@pytest.mark.parametrize("metadata_enclosing", ["{", '"', None])
@pytest.mark.parametrize("default_enclosing", ["{", '"'])
@pytest.mark.parametrize("enclose_ints", [True, False], ids=["enclose_ints", "no_enclose_ints"])
Expand Down
Loading