Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
hkage committed Feb 29, 2024
2 parents a58acc5 + 66d4133 commit 5fa9230
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 4 deletions.
12 changes: 11 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,18 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox "coverage<5"
pip install --use-pep517 tox "coverage<5"
- name: Run tests
run: |
export TOXENV=$(echo "py${{ matrix.python-version }}" | sed 's/\.//g')
tox -- -p no:warnings
- name: Generate coverage report
run: coverage html
if: ${{ success() }}
- name: Upload coverage data
uses: actions/upload-artifact@v3
with:
name: coverage
path: htmlcov
if-no-files-found: ignore
if: ${{ success() }}
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Changelog
Development
-----------

0.11.0 (2024-02-29)
-------------------

* `#52 <https://github.com/rheinwerk-verlag/pganonymize/pull/52>`_: Add update_json provider (`bobslee <https://github.com/bobslee>`_)

0.10.0 (2022-11-29)
-------------------

Expand Down
31 changes: 31 additions & 0 deletions docs/schema.rst
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,37 @@ This provider will replace values with a unique UUID4.
provider:
name: uuid4
``update_json``
~~~~~~~~~~~~~~~

**Arguments:**

* ``update_values_type``

This provider will replace json and jsonb data values with a specified provider configuration per data type.


**Example usage**:

.. code-block:: yaml
tables:
- payment_transaction:
fields:
- data:
provider:
name: update_json
update_values_type:
str:
provider:
name: uuid4
int:
provider:
name: fake.pyint
float:
provider:
name: fake.pyfloat
.. _Faker: https://github.com/joke2k/faker
.. _Faker documentation: http://faker.rtfd.org/
.. _native UUIDs: https://www.postgresql.org/docs/current/datatype-uuid.html
26 changes: 26 additions & 0 deletions pganonymize/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,29 @@ class UUID4Provider(Provider):
@classmethod
def alter_value(cls, original_value, **kwargs):
return uuid4()


@register('update_json')
class UpdateJSONProvider(Provider):
"""Provider to update JSON data (currently values) by providers."""

@classmethod
def alter_value(cls, original_value, **kwargs):
def update_dict(input_dict, update_values_type={}):
"""Update dictionary with recursion (nested dictionaries)."""
if not update_values_type:
return
for key, val in input_dict.items():
if isinstance(val, dict):
update_dict(val, update_values_type=update_values_type)
else:
val_type = type(val).__name__
val_update = update_values_type.get(val_type)
if val_update:
if val_update.get('provider'):
provider_config = val_update.get('provider')
provider_class = provider_registry.get_provider(provider_config['name'])
provider_value = provider_class.alter_value(val, **provider_config)
input_dict[key] = provider_value
update_dict(original_value, update_values_type=kwargs.get('update_values_type', {}))
return original_value
2 changes: 1 addition & 1 deletion pganonymize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def escape_str_replace(value):
:return: Escaped value
"""
if isinstance(value, dict):
return json.dumps(value).encode()
return json.dumps(value, default=str).encode()
return value


Expand Down
2 changes: 1 addition & 1 deletion pganonymize/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-

__version__ = '0.10.0'
__version__ = '0.11.0'
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pytest]
addopts = --cov=pganonymize --cov-report term-missing --cov-config setup.cfg
addopts = --cov=pganonymize --cov-append --cov-report term-missing --cov-config setup.cfg
testpaths = tests pganonymize
python_paths = pganonymize
105 changes: 105 additions & 0 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,108 @@ class TestUUID4Provider(object):
@pytest.mark.parametrize('value, expected', [(None, uuid.UUID), ('Foo', uuid.UUID)])
def test_alter_value(self, value, expected):
assert type(providers.UUID4Provider.alter_value(value)) == expected


class TestUpdateJSONProvider(object):

@pytest.mark.parametrize('value, update_values_type, expected', [
({'foo': 'bar'}, {'str': {'provider': {'name': 'set', 'value': 'foobar'}}}, {'foo': 'foobar'}),
({'chef': 'cuisine'}, {'str': {'provider': {'name': 'uuid4'}}}, {'chef': uuid.UUID})
])
def test_str_type(self, value, update_values_type, expected):
res = providers.UpdateJSONProvider.alter_value(value, update_values_type=update_values_type)
if res.get('foo'):
assert res['foo'] == expected['foo']
if res.get('chef'):
assert type(res['chef']) is expected['chef']

def test_int_type(self):
test_dict = {'foo': 123}

update_values_type = {'int': {'provider': {'name': 'set', 'value': 999}}}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['foo'] == 999

update_values_type = {'int': {'provider': {'name': 'clear'}}}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['foo'] is None

def test_float_type(self):
test_dict = {'foo': 123.45, 'bar': 234.77}

update_values_type = {'float': {'provider': {'name': 'set', 'value': 999.99}}}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['foo'] == 999.99
assert res['bar'] == 999.99

update_values_type = {'float': {'provider': {'name': 'clear'}}}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['foo'] is None

def test_multi_types(self):
test_dict = {
'fooInt': 123,
'fooFloat': 123.45,
'fooStr': 'some foo',
'barStr': 'some bar'
}
update_values_type = {
'int': {'provider': {'name': 'set', 'value': 999}},
'float': {'provider': {'name': 'set', 'value': 999.99}},
'str': {'provider': {'name': 'set', 'value': 'foobar'}},
}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['fooInt'] == 999
assert res['fooFloat'] == 999.99
assert res['fooStr'] == 'foobar'
assert res['barStr'] == 'foobar'

def test_type_not_specified(self):
test_dict = {
'fooInt': 123,
'fooFloat': 123.45,
'fooStr': 'some foo',
'barStr': 'some bar'
}
update_values_type = {
'int': {'provider': {'name': 'set', 'value': 999}},
}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['fooInt'] == 999
assert res['fooFloat'] == 123.45
assert res['fooStr'] == 'some foo'
assert res['barStr'] == 'some bar'

def test_nested_json(self):
test_dict = {
'fooInt': 123,
'fooFloat': 123.45,
'fooStr': 'some foo',
'barStr': 'some bar',
'nested': {
'fooInt': 444,
'fooFloat': 50.9,
'fooStr': 'abc',
'anotherNested': {
'fooInt': 555,
'fooFloat': 6000.123,
'fooStr': 'xyz',
}
}
}
update_values_type = {
'int': {'provider': {'name': 'set', 'value': 999}},
'float': {'provider': {'name': 'set', 'value': 999.99}},
'str': {'provider': {'name': 'set', 'value': 'foobar'}},
}
res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type)
assert res['fooInt'] == 999
assert res['fooFloat'] == 999.99
assert res['fooStr'] == 'foobar'
assert res['barStr'] == 'foobar'
assert res['nested']['fooInt'] == 999
assert res['nested']['fooFloat'] == 999.99
assert res['nested']['fooStr'] == 'foobar'
assert res['nested']['anotherNested']['fooInt'] == 999
assert res['nested']['anotherNested']['fooFloat'] == 999.99
assert res['nested']['anotherNested']['fooStr'] == 'foobar'

0 comments on commit 5fa9230

Please sign in to comment.