-
Notifications
You must be signed in to change notification settings - Fork 406
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(data-masking): add custom mask functionalities #5837
base: develop
Are you sure you want to change the base?
Changes from all commits
d9427f5
796bd89
d931917
4c0070c
ae81dce
7630b06
6e2ec35
93c1544
92d4740
d9535d6
9dc2b56
63c7918
3a10d50
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3,6 +3,7 @@ | |||||
import functools | ||||||
import logging | ||||||
import warnings | ||||||
from copy import deepcopy | ||||||
from typing import TYPE_CHECKING, Any, Callable, Mapping, Sequence, overload | ||||||
|
||||||
from jsonpath_ng.ext import parse | ||||||
|
@@ -94,15 +95,53 @@ def erase(self, data: tuple, fields: list[str]) -> tuple[str]: ... | |||||
@overload | ||||||
def erase(self, data: dict, fields: list[str]) -> dict: ... | ||||||
|
||||||
def erase(self, data: Sequence | Mapping, fields: list[str] | None = None) -> str | list[str] | tuple[str] | dict: | ||||||
return self._apply_action(data=data, fields=fields, action=self.provider.erase) | ||||||
@overload | ||||||
def erase(self, data: dict[Any, Any], *, masking_rules: dict[str, object]) -> dict[Any, Any]: ... | ||||||
|
||||||
@overload | ||||||
def erase( | ||||||
self, | ||||||
data: dict, | ||||||
fields: list[str], | ||||||
dynamic_mask: bool | None = None, | ||||||
custom_mask: str | None = None, | ||||||
regex_pattern: str | None = None, | ||||||
mask_format: str | None = None, | ||||||
) -> dict: ... | ||||||
|
||||||
def erase( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we will need to remove all the overloads and keep only this function implementation. Now we have different use cases that I don't know if we will can cover all of them and make mypy happy. |
||||||
self, | ||||||
data: Sequence | Mapping, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
fields: list[str] | None = None, | ||||||
dynamic_mask: bool | None = None, | ||||||
custom_mask: str | None = None, | ||||||
regex_pattern: str | None = None, | ||||||
mask_format: str | None = None, | ||||||
masking_rules: dict | None = None, | ||||||
) -> str | list[str] | tuple[str] | dict: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if masking_rules: | ||||||
return self._apply_masking_rules(data, masking_rules) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
else: | ||||||
return self._apply_action( | ||||||
data=data, | ||||||
fields=fields, | ||||||
action=self.provider.erase, | ||||||
dynamic_mask=dynamic_mask, | ||||||
custom_mask=custom_mask, | ||||||
regex_pattern=regex_pattern, | ||||||
mask_format=mask_format, | ||||||
) | ||||||
|
||||||
def _apply_action( | ||||||
self, | ||||||
data, | ||||||
fields: list[str] | None, | ||||||
action: Callable, | ||||||
provider_options: dict | None = None, | ||||||
dynamic_mask: bool | None = None, | ||||||
custom_mask: str | None = None, | ||||||
regex_pattern: str | None = None, | ||||||
mask_format: str | None = None, | ||||||
**encryption_context: str, | ||||||
): | ||||||
""" | ||||||
|
@@ -136,18 +175,34 @@ def _apply_action( | |||||
fields=fields, | ||||||
action=action, | ||||||
provider_options=provider_options, | ||||||
dynamic_mask=dynamic_mask, | ||||||
custom_mask=custom_mask, | ||||||
regex_pattern=regex_pattern, | ||||||
mask_format=mask_format, | ||||||
**encryption_context, | ||||||
) | ||||||
else: | ||||||
logger.debug(f"Running action {action.__name__} with the entire data") | ||||||
return action(data=data, provider_options=provider_options, **encryption_context) | ||||||
return action( | ||||||
data=data, | ||||||
provider_options=provider_options, | ||||||
dynamic_mask=dynamic_mask, | ||||||
custom_mask=custom_mask, | ||||||
regex_pattern=regex_pattern, | ||||||
mask_format=mask_format, | ||||||
**encryption_context, | ||||||
) | ||||||
|
||||||
def _apply_action_to_fields( | ||||||
self, | ||||||
data: dict | str, | ||||||
fields: list, | ||||||
action: Callable, | ||||||
provider_options: dict | None = None, | ||||||
dynamic_mask: bool | None = None, | ||||||
custom_mask: str | None = None, | ||||||
regex_pattern: str | None = None, | ||||||
mask_format: str | None = None, | ||||||
**encryption_context: str, | ||||||
) -> dict | str: | ||||||
""" | ||||||
|
@@ -194,6 +249,8 @@ def _apply_action_to_fields( | |||||
new_dict = {'a': {'b': {'c': '*****'}}, 'x': {'y': '*****'}} | ||||||
``` | ||||||
""" | ||||||
if not fields: | ||||||
raise ValueError("Fields parameter cannot be empty") | ||||||
|
||||||
data_parsed: dict = self._normalize_data_to_parse(fields, data) | ||||||
|
||||||
|
@@ -204,6 +261,10 @@ def _apply_action_to_fields( | |||||
self._call_action, | ||||||
action=action, | ||||||
provider_options=provider_options, | ||||||
dynamic_mask=dynamic_mask, | ||||||
custom_mask=custom_mask, | ||||||
regex_pattern=regex_pattern, | ||||||
mask_format=mask_format, | ||||||
**encryption_context, # type: ignore[arg-type] | ||||||
) | ||||||
|
||||||
|
@@ -225,12 +286,6 @@ def _apply_action_to_fields( | |||||
# For in-place updates, json_parse accepts a callback function | ||||||
# that receives 3 args: field_value, fields, field_name | ||||||
# We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx) | ||||||
update_callback = functools.partial( | ||||||
self._call_action, | ||||||
action=action, | ||||||
provider_options=provider_options, | ||||||
**encryption_context, # type: ignore[arg-type] | ||||||
) | ||||||
|
||||||
json_parse.update( | ||||||
data_parsed, | ||||||
|
@@ -239,13 +294,66 @@ def _apply_action_to_fields( | |||||
|
||||||
return data_parsed | ||||||
|
||||||
def _apply_masking_rules(self, data: dict, masking_rules: dict) -> dict: | ||||||
""" | ||||||
Apply masking rules to data, supporting both simple field names and complex path expressions. | ||||||
Args: | ||||||
data: The dictionary containing data to mask | ||||||
masking_rules: Dictionary mapping field names or path expressions to masking rules | ||||||
Returns: | ||||||
dict: The masked data dictionary | ||||||
""" | ||||||
result = deepcopy(data) | ||||||
|
||||||
for path, rule in masking_rules.items(): | ||||||
try: | ||||||
jsonpath_expr = parse(f"$.{path}") | ||||||
matches = jsonpath_expr.find(result) | ||||||
|
||||||
if not matches: | ||||||
warnings.warn(f"No matches found for path: {path}", stacklevel=2) | ||||||
continue | ||||||
|
||||||
for match in matches: | ||||||
try: | ||||||
value = match.value | ||||||
if value is not None: | ||||||
masked_value = self.provider.erase(str(value), **rule) | ||||||
match.full_path.update(result, masked_value) | ||||||
|
||||||
except Exception as e: | ||||||
warnings.warn(f"Error masking value for path {path}: {str(e)}", stacklevel=2) | ||||||
continue | ||||||
|
||||||
except Exception as e: | ||||||
warnings.warn(f"Error processing path {path}: {str(e)}", stacklevel=2) | ||||||
continue | ||||||
|
||||||
return result | ||||||
|
||||||
def _mask_nested_field(self, data: dict, field_path: str, mask_function): | ||||||
keys = field_path.split(".") | ||||||
current = data | ||||||
for key in keys[:-1]: | ||||||
current = current.get(key, {}) | ||||||
if not isinstance(current, dict): | ||||||
return | ||||||
if keys[-1] in current: | ||||||
current[keys[-1]] = mask_function(current[keys[-1]]) | ||||||
|
||||||
@staticmethod | ||||||
def _call_action( | ||||||
field_value: Any, | ||||||
fields: dict[str, Any], | ||||||
field_name: str, | ||||||
action: Callable, | ||||||
provider_options: dict[str, Any] | None = None, | ||||||
dynamic_mask: bool | None = None, | ||||||
custom_mask: str | None = None, | ||||||
regex_pattern: str | None = None, | ||||||
mask_format: str | None = None, | ||||||
**encryption_context, | ||||||
) -> None: | ||||||
""" | ||||||
|
@@ -263,7 +371,15 @@ def _call_action( | |||||
Returns: | ||||||
- fields[field_name]: Returns the processed field value | ||||||
""" | ||||||
fields[field_name] = action(field_value, provider_options=provider_options, **encryption_context) | ||||||
fields[field_name] = action( | ||||||
field_value, | ||||||
provider_options=provider_options, | ||||||
dynamic_mask=dynamic_mask, | ||||||
custom_mask=custom_mask, | ||||||
regex_pattern=regex_pattern, | ||||||
mask_format=mask_format, | ||||||
**encryption_context, | ||||||
) | ||||||
return fields[field_name] | ||||||
|
||||||
def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if we need the arg
*
here. Can you try to remove this method signature and see if mypy complains?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tried to remove, but increases the number of mypy errors
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets keep this conversation opened until we find a solution for this.