diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a3ca31..001aeba 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,8 +34,18 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install tox "coverage<5" + pip install --use-pep517 tox "coverage<5" - name: Run tests run: | export TOXENV=$(echo "py${{ matrix.python-version }}" | sed 's/\.//g') tox -- -p no:warnings + - name: Generate coverage report + run: coverage html + if: ${{ success() }} + - name: Upload coverage data + uses: actions/upload-artifact@v3 + with: + name: coverage + path: htmlcov + if-no-files-found: ignore + if: ${{ success() }} diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 057454f..7f73dc5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,11 @@ Changelog Development ----------- +0.11.0 (2024-02-29) +------------------- + +* `#52 `_: Add update_json provider (`bobslee `_) + 0.10.0 (2022-11-29) ------------------- diff --git a/docs/schema.rst b/docs/schema.rst index b8a99b3..866a44e 100644 --- a/docs/schema.rst +++ b/docs/schema.rst @@ -423,6 +423,37 @@ This provider will replace values with a unique UUID4. provider: name: uuid4 +``update_json`` +~~~~~~~~~~~~~~~ + +**Arguments:** + +* ``update_values_type`` + +This provider will replace json and jsonb data values with a specified provider configuration per data type. + + +**Example usage**: + +.. code-block:: yaml + + tables: + - payment_transaction: + fields: + - data: + provider: + name: update_json + update_values_type: + str: + provider: + name: uuid4 + int: + provider: + name: fake.pyint + float: + provider: + name: fake.pyfloat + .. _Faker: https://github.com/joke2k/faker .. _Faker documentation: http://faker.rtfd.org/ .. _native UUIDs: https://www.postgresql.org/docs/current/datatype-uuid.html diff --git a/pganonymize/providers.py b/pganonymize/providers.py index cbc4c86..d25d53c 100644 --- a/pganonymize/providers.py +++ b/pganonymize/providers.py @@ -246,3 +246,29 @@ class UUID4Provider(Provider): @classmethod def alter_value(cls, original_value, **kwargs): return uuid4() + + +@register('update_json') +class UpdateJSONProvider(Provider): + """Provider to update JSON data (currently values) by providers.""" + + @classmethod + def alter_value(cls, original_value, **kwargs): + def update_dict(input_dict, update_values_type={}): + """Update dictionary with recursion (nested dictionaries).""" + if not update_values_type: + return + for key, val in input_dict.items(): + if isinstance(val, dict): + update_dict(val, update_values_type=update_values_type) + else: + val_type = type(val).__name__ + val_update = update_values_type.get(val_type) + if val_update: + if val_update.get('provider'): + provider_config = val_update.get('provider') + provider_class = provider_registry.get_provider(provider_config['name']) + provider_value = provider_class.alter_value(val, **provider_config) + input_dict[key] = provider_value + update_dict(original_value, update_values_type=kwargs.get('update_values_type', {})) + return original_value diff --git a/pganonymize/utils.py b/pganonymize/utils.py index b756551..11be694 100644 --- a/pganonymize/utils.py +++ b/pganonymize/utils.py @@ -323,7 +323,7 @@ def escape_str_replace(value): :return: Escaped value """ if isinstance(value, dict): - return json.dumps(value).encode() + return json.dumps(value, default=str).encode() return value diff --git a/pganonymize/version.py b/pganonymize/version.py index a8cad44..4cad2e2 100644 --- a/pganonymize/version.py +++ b/pganonymize/version.py @@ -1,3 +1,3 @@ # -*- coding: utf-8 -*- -__version__ = '0.10.0' +__version__ = '0.11.0' diff --git a/pytest.ini b/pytest.ini index 58f54ac..d49f4c0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -addopts = --cov=pganonymize --cov-report term-missing --cov-config setup.cfg +addopts = --cov=pganonymize --cov-append --cov-report term-missing --cov-config setup.cfg testpaths = tests pganonymize python_paths = pganonymize diff --git a/tests/test_providers.py b/tests/test_providers.py index 45233e2..4484316 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -211,3 +211,108 @@ class TestUUID4Provider(object): @pytest.mark.parametrize('value, expected', [(None, uuid.UUID), ('Foo', uuid.UUID)]) def test_alter_value(self, value, expected): assert type(providers.UUID4Provider.alter_value(value)) == expected + + +class TestUpdateJSONProvider(object): + + @pytest.mark.parametrize('value, update_values_type, expected', [ + ({'foo': 'bar'}, {'str': {'provider': {'name': 'set', 'value': 'foobar'}}}, {'foo': 'foobar'}), + ({'chef': 'cuisine'}, {'str': {'provider': {'name': 'uuid4'}}}, {'chef': uuid.UUID}) + ]) + def test_str_type(self, value, update_values_type, expected): + res = providers.UpdateJSONProvider.alter_value(value, update_values_type=update_values_type) + if res.get('foo'): + assert res['foo'] == expected['foo'] + if res.get('chef'): + assert type(res['chef']) is expected['chef'] + + def test_int_type(self): + test_dict = {'foo': 123} + + update_values_type = {'int': {'provider': {'name': 'set', 'value': 999}}} + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['foo'] == 999 + + update_values_type = {'int': {'provider': {'name': 'clear'}}} + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['foo'] is None + + def test_float_type(self): + test_dict = {'foo': 123.45, 'bar': 234.77} + + update_values_type = {'float': {'provider': {'name': 'set', 'value': 999.99}}} + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['foo'] == 999.99 + assert res['bar'] == 999.99 + + update_values_type = {'float': {'provider': {'name': 'clear'}}} + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['foo'] is None + + def test_multi_types(self): + test_dict = { + 'fooInt': 123, + 'fooFloat': 123.45, + 'fooStr': 'some foo', + 'barStr': 'some bar' + } + update_values_type = { + 'int': {'provider': {'name': 'set', 'value': 999}}, + 'float': {'provider': {'name': 'set', 'value': 999.99}}, + 'str': {'provider': {'name': 'set', 'value': 'foobar'}}, + } + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['fooInt'] == 999 + assert res['fooFloat'] == 999.99 + assert res['fooStr'] == 'foobar' + assert res['barStr'] == 'foobar' + + def test_type_not_specified(self): + test_dict = { + 'fooInt': 123, + 'fooFloat': 123.45, + 'fooStr': 'some foo', + 'barStr': 'some bar' + } + update_values_type = { + 'int': {'provider': {'name': 'set', 'value': 999}}, + } + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['fooInt'] == 999 + assert res['fooFloat'] == 123.45 + assert res['fooStr'] == 'some foo' + assert res['barStr'] == 'some bar' + + def test_nested_json(self): + test_dict = { + 'fooInt': 123, + 'fooFloat': 123.45, + 'fooStr': 'some foo', + 'barStr': 'some bar', + 'nested': { + 'fooInt': 444, + 'fooFloat': 50.9, + 'fooStr': 'abc', + 'anotherNested': { + 'fooInt': 555, + 'fooFloat': 6000.123, + 'fooStr': 'xyz', + } + } + } + update_values_type = { + 'int': {'provider': {'name': 'set', 'value': 999}}, + 'float': {'provider': {'name': 'set', 'value': 999.99}}, + 'str': {'provider': {'name': 'set', 'value': 'foobar'}}, + } + res = providers.UpdateJSONProvider.alter_value(test_dict, update_values_type=update_values_type) + assert res['fooInt'] == 999 + assert res['fooFloat'] == 999.99 + assert res['fooStr'] == 'foobar' + assert res['barStr'] == 'foobar' + assert res['nested']['fooInt'] == 999 + assert res['nested']['fooFloat'] == 999.99 + assert res['nested']['fooStr'] == 'foobar' + assert res['nested']['anotherNested']['fooInt'] == 999 + assert res['nested']['anotherNested']['fooFloat'] == 999.99 + assert res['nested']['anotherNested']['fooStr'] == 'foobar'