diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..97c5cba --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,26 @@ +name: Release + +on: + release: + types: [created] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.isort.cfg b/.isort.cfg index d3ff88a..5835cbb 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -3,6 +3,6 @@ line_length=120 multi_line_output=0 sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,PGA,LOCALFOLDER default_section=THIRDPARTY -known_pga=pganonymizer +known_pga=pganonymize no_lines_before=LOCALFOLDER diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f91dd0..365ac84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,18 @@ ## Development +## 0.8.0 (2022-03-15) + +* [#39](https://github.com/rheinwerk-verlag/pganonymize/issues/39): Renamed project to "pganonymize" +* [#38](https://github.com/rheinwerk-verlag/pganonymize/pull/38): Allow environment variables in schema definition ([nurikk](https://github.com/nurikk)) + ## 0.7.0 (2021-11-30) -* [#34](https://github.com/rheinwerk-verlag/postgresql-anonymizer/issues/34): Subprocess "run" being used on Python2.7 -* [#35](https://github.com/rheinwerk-verlag/postgresql-anonymizer/issues/35): parmap no longer supports Python 2.7 +* [#34](https://github.com/rheinwerk-verlag/pganonymize/issues/34): Subprocess "run" being used on Python2.7 +* [#35](https://github.com/rheinwerk-verlag/pganonymize/issues/35): parmap no longer supports Python 2.7 * Dropped Python 3.5 support * Pinned libraries Python 2.7 -* [#32](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/32): Fixed pg_dump arguments ([korsar182](https://github.com/korsar182)) +* [#32](https://github.com/rheinwerk-verlag/pganonymize/pull/32): Fixed pg_dump arguments ([korsar182](https://github.com/korsar182)) * Simplified provider registration (no metaclass usage anymore) ## 0.6.1 (2021-07-13) @@ -17,35 +22,35 @@ ## 0.6.0 (2021-07-13) -* [#28](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/25): Add json support ([nurikk](https://github.com/nurikk)) -* [#27](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/25): Better anonymisation ([nurikk](https://github.com/nurikk)) -* [#25](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/25): Remove column specification for `cursor.copy_from` call ([nurikk](https://github.com/nurikk)) +* [#28](https://github.com/rheinwerk-verlag/pganonymize/pull/25): Add json support ([nurikk](https://github.com/nurikk)) +* [#27](https://github.com/rheinwerk-verlag/pganonymize/pull/25): Better anonymisation ([nurikk](https://github.com/nurikk)) +* [#25](https://github.com/rheinwerk-verlag/pganonymize/pull/25): Remove column specification for `cursor.copy_from` call ([nurikk](https://github.com/nurikk)) ## 0.5.0 (2021-06-30) -* [#22](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/22): Fix table and column name quotes in `cursor.copy_from` call ([nurikk](https://github.com/nurikk)) -* [#23](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/23): Allow uniq faker ([nurikk](https://github.com/nurikk)) +* [#22](https://github.com/rheinwerk-verlag/pganonymize/pull/22): Fix table and column name quotes in `cursor.copy_from` call ([nurikk](https://github.com/nurikk)) +* [#23](https://github.com/rheinwerk-verlag/pganonymize/pull/23): Allow uniq faker ([nurikk](https://github.com/nurikk)) ## 0.4.1 (2021-05-27) -* [#19](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/19): Make chunk size in the table definition dynamic ([halilkaya](https://github.com/halilkaya)) +* [#19](https://github.com/rheinwerk-verlag/pganonymize/pull/19): Make chunk size in the table definition dynamic ([halilkaya](https://github.com/halilkaya)) ## 0.4.0 (2021-05-05) -* [#18](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/18): Specify (SQL WHERE) search_condition, to filter the table for rows to be anonymized ([bobslee](https://github.com/bobslee)) -* [#17](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/17): Fix anonymizing error if there is a JSONB column in a table ([koptelovav](https://github.com/koptelovav)) +* [#18](https://github.com/rheinwerk-verlag/pganonymize/pull/18): Specify (SQL WHERE) search_condition, to filter the table for rows to be anonymized ([bobslee](https://github.com/bobslee)) +* [#17](https://github.com/rheinwerk-verlag/pganonymize/pull/17): Fix anonymizing error if there is a JSONB column in a table ([koptelovav](https://github.com/koptelovav)) ## 0.3.3 (2021-04-16) -* [#16](https://github.com/rheinwerk-verlag/postgresql-anonymizer/issues/16): Preserve column and table cases during the copy process +* [#16](https://github.com/rheinwerk-verlag/pganonymize/issues/16): Preserve column and table cases during the copy process ## 0.3.2 (2021-01-25) -* [#15](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/15): Fix for exclude bug ([abhinavvaidya90](https://github.com/abhinavvaidya90)) +* [#15](https://github.com/rheinwerk-verlag/pganonymize/pull/15): Fix for exclude bug ([abhinavvaidya90](https://github.com/abhinavvaidya90)) ## 0.3.1 (2020-12-04) -* [#13](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/13): Fixed a syntax error if no truncated tables are defined ([ray-man](https://github.com/ray-man)) +* [#13](https://github.com/rheinwerk-verlag/pganonymize/pull/13): Fixed a syntax error if no truncated tables are defined ([ray-man](https://github.com/ray-man)) ## 0.3.0 (2020-02-11) diff --git a/MANIFEST.in b/MANIFEST.in index 5f4027b..db2a0b2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,4 +2,4 @@ include LICENSE.rst include README.rst include CHANGELOG.md -recursive-include pganonymizer *.html *.js *.css *.png *.gif*.jpg *.jpeg *.svg *.po \ No newline at end of file +recursive-include pganonymize *.html *.js *.css *.png *.gif*.jpg *.jpeg *.svg *.po diff --git a/Makefile b/Makefile index ea43309..ac94085 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ BROWSER ?= xdg-open -PYTHON_PACKAGE = pganonymizer +PYTHON_PACKAGE = pganonymize TESTS_PACKAGE = tests .PHONY: clean clean-test clean-pyc clean-build docs help @@ -61,4 +61,4 @@ test: @poetry run pytest --cov=poetry --cov-config .coveragerc tests/ -sq test-all: ## run tests on every Python version with tox - @tox \ No newline at end of file + @tox diff --git a/README.rst b/README.rst index e3adbad..91bc1ee 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,10 @@ -PostgreSQL Anonymizer -===================== +pganonymize +=========== A commandline tool to anonymize PostgreSQL databases for DSGVO/GDPR purposes. -It uses a YAML file to define which tables and fields should be anonymized and provides various methods of anonymization. The tool requires a direct PostgreSQL connection to perform the anonymization. +It uses a YAML file to define which tables and fields should be anonymized and provides various methods of +anonymization. The tool requires a direct PostgreSQL connection to perform the anonymization. .. class:: no-web no-pdf @@ -136,7 +137,7 @@ If you want to run the anonymizer within a Docker container you first have to bu .. code-block:: sh - $ docker build -t pganonymizer . + $ docker build -t pganonymize . After that you can pass a schema file to the container, using Docker volumes, and call the anonymizer: @@ -144,7 +145,7 @@ After that you can pass a schema file to the container, using Docker volumes, an $ docker run \ -v :/schema.yml \ - -it pganonymizer \ + -it pganonymize \ /usr/local/bin/pganonymize \ --schema=/schema.yml \ --dbname= \ @@ -157,13 +158,13 @@ After that you can pass a schema file to the container, using Docker volumes, an .. _uuid4: https://www.postgresql.org/docs/current/datatype-uuid.html .. _documentation: https://python-postgresql-anonymizer.readthedocs.io/en/latest/ .. _schema documentation: https://python-postgresql-anonymizer.readthedocs.io/en/latest/schema.html -.. _YAML sample schema: https://github.com/rheinwerk-verlag/postgresql-anonymizer/blob/master/sample_schema.yml +.. _YAML sample schema: https://github.com/rheinwerk-verlag/pganonymize/blob/master/sample_schema.yml .. |python| image:: https://img.shields.io/pypi/pyversions/pganonymize :alt: PyPI - Python Version .. |license| image:: https://img.shields.io/badge/license-MIT-green.svg - :target: https://github.com/rheinwerk-verlag/postgresql-anonymizer/blob/master/LICENSE.rst + :target: https://github.com/rheinwerk-verlag/pganonymize/blob/master/LICENSE.rst .. |pypi| image:: https://badge.fury.io/py/pganonymize.svg :target: https://badge.fury.io/py/pganonymize @@ -173,7 +174,7 @@ After that you can pass a schema file to the container, using Docker volumes, an :alt: Download count .. |build| image:: https://github.com/rheinwerk-verlag/postgresql-anonymizer/workflows/Test/badge.svg - :target: https://github.com/rheinwerk-verlag/postgresql-anonymizer/actions + :target: https://github.com/rheinwerk-verlag/pganonymize/actions .. |health| image:: https://snyk.io/advisor/python/pganonymize/badge.svg :target: https://snyk.io/advisor/python/pganonymize diff --git a/docs/Makefile b/docs/Makefile index b6f4929..9cb0448 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -85,17 +85,17 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pganonymizer.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pganonymize.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pganonymizer.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pganonymize.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/pganonymizer" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pganonymizer" + @echo "# mkdir -p $$HOME/.local/share/devhelp/pganonymize" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pganonymize" @echo "# devhelp" epub: diff --git a/docs/api.rst b/docs/api.rst index 47b33ee..4ac86b4 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -4,4 +4,4 @@ API .. toctree:: :maxdepth: 4 - pganonymizer + pganonymize diff --git a/docs/conf.py b/docs/conf.py index 3ba8a7c..4f535cd 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# pganonymizer documentation build configuration file +# pganonymize documentation build configuration file # # This file is execfile()d with the current directory set to its # containing dir. @@ -33,7 +33,7 @@ # exec version.py instead of importing it. Importing may trigger unwanted # side-effects (if autodoc is used, the pypackage may be imported anyway). meta = {} -exec(open(os.path.join(project_root, 'pganonymizer', 'version.py')).read(), {}, meta) +exec(open(os.path.join(project_root, 'pganonymize', 'version.py')).read(), {}, meta) # -- General configuration --------------------------------------------- @@ -61,7 +61,7 @@ master_doc = 'index' # General information about the project. -project = u'PostgreSQL Anonymizer' +project = u'pganonymize' copyright = u'2019, Rheinwerk Verlag GmbH, Henning Kage' # The version info for the project you're documenting, acts as replacement @@ -194,7 +194,7 @@ #html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'pganonymizerdoc' +htmlhelp_basename = 'pganonymizedoc' # -- Options for LaTeX output ------------------------------------------ @@ -214,8 +214,8 @@ # (source start file, target name, title, author, documentclass # [howto/manual]). latex_documents = [ - ('index', 'pganonymizer.tex', - u'PostgreSQL Anonymizer Documentation', + ('index', 'pganonymize.tex', + u'pganonymize Documentation', u'Henning Kage', 'manual'), ] @@ -245,8 +245,8 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'pganonymizer', - u'PostgreSQL Anonymizer Documentation', + ('index', 'pganonymize', + u'pganonymize Anonymizer Documentation', [u'Henning Kage'], 1) ] @@ -260,10 +260,10 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'pganonymizer', - u'PostgreSQL Anonymizer Documentation', + ('index', 'pganonymize', + u'pganonymize Documentation', u'Henning Kage', - 'pganonymizer', + 'pganonymize', 'A cookiecutter template for Rheinwerk Python packages', 'Miscellaneous'), ] diff --git a/docs/index.rst b/docs/index.rst index 363728d..091dd01 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ Contents: deploy license changelog + links Indices and tables diff --git a/docs/links.rst b/docs/links.rst new file mode 100644 index 0000000..c2c9794 --- /dev/null +++ b/docs/links.rst @@ -0,0 +1,14 @@ +Links +===== + +The following links refer to projects that have a similar purpose of anonymizing a PostgreSQL database. Thanks to the +authors of these projects. Some of them inspired the author of this project, e.g. `pgantomizer`_ for using a human +readable declaration file in YAML. + +* `PostgreSQL Anonymizer`_: Anonymization & Data Masking for PostgreSQL +* `pg-anonymizer`_: Dump anonymized PostgreSQL database with a NodeJS CLI +* `pgantomizer`_: Anonymize data in your PostgreSQL dabatase with ease + +.. _PostgreSQL Anonymizer: https://gitlab.com/dalibo/postgresql_anonymizer +.. _pg-anonymizer: https://github.com/rap2hpoutre/pg-anonymizer +.. _pgantomizer: https://github.com/asgeirrr/pgantomizer diff --git a/docs/pganonymizer.rst b/docs/pganonymize.rst similarity index 59% rename from docs/pganonymizer.rst rename to docs/pganonymize.rst index e210de0..f460a56 100644 --- a/docs/pganonymizer.rst +++ b/docs/pganonymize.rst @@ -1,53 +1,53 @@ -pganonymizer package +pganonymize package ==================== Submodules ---------- -pganonymizer.cli module +pganonymize.cli module ----------------------- -.. automodule:: pganonymizer.cli +.. automodule:: pganonymize.cli :members: :undoc-members: :show-inheritance: -pganonymizer.constants module +pganonymize.constants module ----------------------------- -.. automodule:: pganonymizer.constants +.. automodule:: pganonymize.constants :members: :undoc-members: :show-inheritance: -pganonymizer.exceptions module +pganonymize.exceptions module ------------------------------ -.. automodule:: pganonymizer.exceptions +.. automodule:: pganonymize.exceptions :members: :undoc-members: :show-inheritance: -pganonymizer.providers module +pganonymize.providers module ----------------------------- -.. automodule:: pganonymizer.providers +.. automodule:: pganonymize.providers :members: :undoc-members: :show-inheritance: -pganonymizer.utils module +pganonymize.utils module ------------------------- -.. automodule:: pganonymizer.utils +.. automodule:: pganonymize.utils :members: :undoc-members: :show-inheritance: -pganonymizer.version module +pganonymize.version module --------------------------- -.. automodule:: pganonymizer.version +.. automodule:: pganonymize.version :members: :undoc-members: :show-inheritance: @@ -56,7 +56,7 @@ pganonymizer.version module Module contents --------------- -.. automodule:: pganonymizer +.. automodule:: pganonymize :members: :undoc-members: :show-inheritance: diff --git a/docs/schema.rst b/docs/schema.rst index ee841db..e3d9421 100644 --- a/docs/schema.rst +++ b/docs/schema.rst @@ -114,6 +114,33 @@ This is useful if you need to anonymize one or more specific records, eg for "Ri provider: name: clear +YAML schema file supports placeholders with environment variables, ex: + +`!ENV ${HOST}`` + +`!ENV '/var/${LOG_PATH}'` + +So you can construct dynamic filter conditions like: +.. code-block:: sh + + $ export COMPANY_ID=123 + + $ export ACTION_TO_BE_TAKEN=clear + + $ pganonymize + + +***Example**:: + + - login: + search: id = '!ENV ${COMPANY_ID}' + search2: id = ${COMPANY_ID} + search3: username = '${USER_TO_BE_SEARCHED}' + fields: + - first_name: + provider: + name: ${ACTION_TO_BE_TAKEN} + ``chunk_size`` ~~~~~~~~~~~~~~ diff --git a/pganonymizer/__init__.py b/pganonymize/__init__.py similarity index 100% rename from pganonymizer/__init__.py rename to pganonymize/__init__.py diff --git a/pganonymizer/__main__.py b/pganonymize/__main__.py similarity index 84% rename from pganonymizer/__main__.py rename to pganonymize/__main__.py index a18a742..382025f 100644 --- a/pganonymizer/__main__.py +++ b/pganonymize/__main__.py @@ -5,7 +5,7 @@ def main(): - from pganonymizer.cli import get_arg_parser, main + from pganonymize.cli import get_arg_parser, main try: args = get_arg_parser().parse_args() diff --git a/pganonymizer/cli.py b/pganonymize/cli.py similarity index 90% rename from pganonymizer/cli.py rename to pganonymize/cli.py index d05cd2f..dbf22a6 100644 --- a/pganonymizer/cli.py +++ b/pganonymize/cli.py @@ -6,11 +6,9 @@ import logging import time -import yaml - -from pganonymizer.constants import DATABASE_ARGS, DEFAULT_SCHEMA_FILE -from pganonymizer.providers import provider_registry -from pganonymizer.utils import anonymize_tables, create_database_dump, get_connection, truncate_tables +from pganonymize.constants import DATABASE_ARGS, DEFAULT_SCHEMA_FILE +from pganonymize.providers import provider_registry +from pganonymize.utils import anonymize_tables, create_database_dump, get_connection, load_config, truncate_tables def get_pg_args(args): @@ -64,7 +62,7 @@ def main(args): list_provider_classes() return 0 - schema = yaml.load(open(args.schema), Loader=yaml.FullLoader) + schema = load_config(args.schema) pg_args = get_pg_args(args) connection = get_connection(pg_args) diff --git a/pganonymizer/constants.py b/pganonymize/constants.py similarity index 100% rename from pganonymizer/constants.py rename to pganonymize/constants.py diff --git a/pganonymizer/exceptions.py b/pganonymize/exceptions.py similarity index 100% rename from pganonymizer/exceptions.py rename to pganonymize/exceptions.py diff --git a/pganonymizer/providers.py b/pganonymize/providers.py similarity index 95% rename from pganonymizer/providers.py rename to pganonymize/providers.py index cec1535..f27e420 100644 --- a/pganonymizer/providers.py +++ b/pganonymize/providers.py @@ -7,7 +7,7 @@ from faker import Faker -from pganonymizer.exceptions import InvalidProvider, InvalidProviderArgument, ProviderAlreadyRegistered +from pganonymize.exceptions import InvalidProvider, InvalidProviderArgument, ProviderAlreadyRegistered fake_data = Faker() @@ -22,7 +22,7 @@ def register(self, provider_class, provider_id): """ Register a provider class. - :param pganonymizer.providers.Provider provider_class: Provider class that should be registered + :param pganonymize.providers.Provider provider_class: Provider class that should be registered :param str provider_id: A string id to register the provider for :raises ProviderAlreadyRegistered: If another provider with the given id has been registered """ diff --git a/pganonymizer/utils.py b/pganonymize/utils.py similarity index 89% rename from pganonymizer/utils.py rename to pganonymize/utils.py index 188bce5..45e6fd8 100644 --- a/pganonymizer/utils.py +++ b/pganonymize/utils.py @@ -5,6 +5,7 @@ import json import logging import math +import os import re import subprocess import time @@ -12,12 +13,13 @@ import parmap import psycopg2 import psycopg2.extras +import yaml from pgcopy import CopyManager from psycopg2.sql import SQL, Composed, Identifier from tqdm import trange -from pganonymizer.constants import DEFAULT_CHUNK_SIZE, DEFAULT_PRIMARY_KEY -from pganonymizer.providers import provider_registry +from pganonymize.constants import DEFAULT_CHUNK_SIZE, DEFAULT_PRIMARY_KEY +from pganonymize.providers import provider_registry def anonymize_tables(connection, definitions, verbose=False, dry_run=False): @@ -82,6 +84,7 @@ def build_and_then_import_data(connection, table, primary_key, columns, sql_select = Composed([sql_select, SQL(" WHERE {search_condition}".format(search_condition=search))]) if dry_run: sql_select = Composed([sql_select, SQL(" LIMIT 100")]) + logging.info(sql_select.as_string(connection)) cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor, name='fetch_large_result') cursor.execute(sql_select.as_string(connection)) temp_table = 'tmp_{table}'.format(table=table) @@ -155,7 +158,7 @@ def create_temporary_table(connection, definitions, source_table, temp_table, pr FROM {source_table} WITH NO DATA""") cursor = connection.cursor() cursor.execute(ctas_query.format(temp_table=Identifier(temp_table), - source_table=Identifier(source_table), columns=sql_columns) + source_table=Identifier(source_table), columns=sql_columns) .as_string(connection) ) cursor.close() @@ -350,3 +353,33 @@ def nested_set(dic, path, value, delimiter='.'): for key in keys[:-1]: dic = dic.get(key, {}) dic[keys[-1]] = value + + +def load_config(schema): + # Original code from here https://gist.github.com/mkaranasou/ba83e25c835a8f7629e34dd7ede01931 + tag = '!ENV' + pattern = re.compile(r'.*?\${(\w+)}.*?') + custom_loader = yaml.FullLoader + custom_loader.add_implicit_resolver(tag, pattern, None) + + def constructor_env_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the parsed string that contains the value of the environment + variable + """ + value = loader.construct_scalar(node) + match = pattern.findall(value) # to find all env variables in line + if match: + full_value = value + for g in match: + full_value = full_value.replace( + f'${{{g}}}', os.environ.get(g, g) + ) + return full_value + return value + + custom_loader.add_constructor(tag, constructor_env_variables) + return yaml.load(open(schema), Loader=custom_loader) diff --git a/pganonymizer/version.py b/pganonymize/version.py similarity index 53% rename from pganonymizer/version.py rename to pganonymize/version.py index 82dc6dc..0c6d7b8 100644 --- a/pganonymizer/version.py +++ b/pganonymize/version.py @@ -1,3 +1,3 @@ # -*- coding: utf-8 -*- -__version__ = '0.7.0' +__version__ = '0.8.0' diff --git a/pyproject.toml b/pyproject.toml index 9dfec34..9557349 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,14 @@ [tool.poetry] -name = "postgresql-anonymizer" -version = "0.7.0" +name = "pganonymize" +version = "0.8.0" description = "Commandline tool to anonymize PostgreSQL databases" authors = [ "Henning Kage " ] license = "MIT" readme = "README.rst" -homepage = "https://github.com/rheinwerk-verlag/postgresql-anonymizer" -repository = "https://github.com/rheinwerk-verlag/postgresql-anonymizer" +homepage = "https://github.com/rheinwerk-verlag/pganonymize/" +repository = "https://github.com/rheinwerk-verlag/pganonymize.git" [tool.poetry.dependencies] python = "~2.7 || ^3.6" diff --git a/pytest.ini b/pytest.ini index e308cf3..58f54ac 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -addopts = --cov=pganonymizer --cov-report term-missing --cov-config setup.cfg -testpaths = tests pganonymizer -python_paths = pganonymizer +addopts = --cov=pganonymize --cov-report term-missing --cov-config setup.cfg +testpaths = tests pganonymize +python_paths = pganonymize diff --git a/setup.py b/setup.py index 64e43ff..f822c34 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def run(self): return subprocess.call(['tox']) -exec(read('pganonymizer', 'version.py')) +exec(read('pganonymize', 'version.py')) install_requires = [ 'faker', @@ -73,7 +73,7 @@ def run(self): author_email='henning.kage@rheinwerk-verlag.de', maintainer='Henning Kage', maintainer_email='henning.kage@rheinwerk-verlag.de', - url='https://github.com/rheinwerk-verlag/postgresql-anonymizer', + url='https://github.com/rheinwerk-verlag/pganonymize', license='MIT license', classifiers=[ 'Development Status :: 3 - Alpha', @@ -92,7 +92,7 @@ def run(self): 'Environment :: Console', 'Topic :: Database' ], - packages=find_packages(include=['pganonymizer*']), + packages=find_packages(include=['pganonymize*']), include_package_data=True, install_requires=install_requires, tests_require=tests_require, @@ -101,7 +101,7 @@ def run(self): }, entry_points={ 'console_scripts': [ - 'pganonymize = pganonymizer.__main__:main' + 'pganonymize = pganonymize.__main__:main' ] } ) diff --git a/tests/schemes/schema_with_env_variables.yml b/tests/schemes/schema_with_env_variables.yml new file mode 100644 index 0000000..efa0da4 --- /dev/null +++ b/tests/schemes/schema_with_env_variables.yml @@ -0,0 +1,13 @@ +primary_key: !ENV ${TEST_PRIMARY_KEY} +primary_key2: !ENV ${TEST_PRIMARY_KEY} +chunk_size: !ENV ${TEST_CHUNK_SIZE} +concat_missing: !ENV 'Hello, ${MISSING_ENV_VAL}' +concat_missing2: 'Hello, ${MISSING_ENV_VAL}' +concat_present: !ENV 'Hello, ${PRESENT_WORLD_NAME}' +concat_present2: ${PRESENT_WORLD_NAME} +concat_present3: Hello, ${PRESENT_WORLD_NAME} +search: id = ${COMPANY_ID} +search2: username = '${USER_TO_BE_SEARCHED}' +corrupted: username = '${CORRUPTED +corrupted2: !ENV +corrupted3: !ENV $ diff --git a/tests/test_cli.py b/tests/test_cli.py index f4961bd..02d821a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,13 +6,13 @@ from tests.utils import quote_ident -from pganonymizer.cli import get_arg_parser, main +from pganonymize.cli import get_arg_parser, main class TestCli: @patch('psycopg2.extensions.quote_ident', side_effect=quote_ident) - @patch('pganonymizer.utils.psycopg2.connect') - @patch('pganonymizer.utils.subprocess') + @patch('pganonymize.utils.psycopg2.connect') + @patch('pganonymize.utils.subprocess') @pytest.mark.parametrize('cli_args, expected, expected_executes, commit_calls, call_dump', [ ['--host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root', diff --git a/tests/test_providers.py b/tests/test_providers.py index a33e0f0..acf43b3 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -6,7 +6,7 @@ import six from mock import MagicMock, Mock, patch -from pganonymizer import exceptions, providers +from pganonymize import exceptions, providers def test_register(): @@ -121,7 +121,7 @@ class TestFakeProvider: ('fake.first_name', 'first_name'), ('fake.unique.first_name', 'unique.first_name'), ]) - @patch('pganonymizer.providers.fake_data') + @patch('pganonymize.providers.fake_data') def test_alter_value(self, mock_fake_data, name, function_name): provider = providers.FakeProvider(name=name) provider.alter_value('Foo') diff --git a/tests/test_utils.py b/tests/test_utils.py index e274856..c0afc79 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,18 +1,20 @@ import math +import os from collections import OrderedDict, namedtuple +from unittest import mock import pytest from mock import ANY, Mock, call, patch from tests.utils import quote_ident -from pganonymizer.utils import (anonymize_tables, build_and_then_import_data, create_database_dump, - get_column_values, get_connection, import_data, truncate_tables) +from pganonymize.utils import (anonymize_tables, build_and_then_import_data, create_database_dump, + get_column_values, get_connection, import_data, load_config, truncate_tables) class TestGetConnection: - @patch('pganonymizer.utils.psycopg2.connect') + @patch('pganonymize.utils.psycopg2.connect') def test(self, mock_connect): connection_data = { 'dbname': 'test', @@ -79,7 +81,7 @@ def test(self, inspect, util, quote_ident, tmp_table, cols, data): expected = [call('COPY "public"."src_tbl" ("id", "location") FROM STDIN WITH BINARY', ANY)] assert mock_cursor.copy_expert.call_args_list == expected - @patch('pganonymizer.utils.CopyManager') + @patch('pganonymize.utils.CopyManager') @patch('psycopg2.extensions.quote_ident', side_effect=quote_ident) def test_anonymize_tables(self, quote_ident, copy_manager): mock_cursor = Mock() @@ -163,7 +165,7 @@ def test_anonymize_tables(self, quote_ident, copy_manager): class TestBuildAndThenImport: @patch('psycopg2.extensions.quote_ident', side_effect=quote_ident) - @patch('pganonymizer.utils.CopyManager') + @patch('pganonymize.utils.CopyManager') @pytest.mark.parametrize('table, primary_key, columns, total_count, chunk_size', [ ['src_tbl', 'id', [{'col1': {'provider': {'name': 'md5'}}}, {'COL2': {'provider': {'name': 'md5'}}}], 10, 3] @@ -190,7 +192,7 @@ def test(self, quote_ident, copy_manager, table, primary_key, columns, total_cou call('UPDATE "src_tbl" t SET "col1" = s."col1", "COL2" = s."COL2" FROM "tmp_src_tbl" s WHERE t."id" = s."id"')] # noqa assert mock_cursor.execute.call_args_list == expected_execute_calls - @patch('pganonymizer.utils.CopyManager') + @patch('pganonymize.utils.CopyManager') def test_column_format(self, copy_manager): columns = [ { @@ -231,8 +233,46 @@ def test_column_format(self, copy_manager): class TestCreateDatabaseDump: - @patch('pganonymizer.utils.subprocess.call') + @patch('pganonymize.utils.subprocess.call') def test(self, mock_call): create_database_dump('/tmp/dump.gz', {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}) mock_call.assert_called_once_with('pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz', shell=True) + + +class TestConfigLoader: + + @pytest.mark.parametrize('file, envs, expected', [ + ['./tests/schemes/valid_schema.yml', {}, { + 'tables': [{'auth_user': {'primary_key': 'id', 'chunk_size': 5000, 'fields': [ + {'first_name': {'provider': {'name': 'fake.first_name'}}}, + {'last_name': {'provider': {'name': 'set', 'value': 'Bar'}}}, + {'email': {'provider': {'name': 'md5'}, 'append': '@localhost'}} + ], 'excludes': [{'email': ['\\S[^@]*@example\\.com']}]}}], 'truncate': ['django_session']}], + ['./tests/schemes/schema_with_env_variables.yml', { + "TEST_CHUNK_SIZE": "123", + "TEST_PRIMARY_KEY": "foo-bar", + "PRESENT_WORLD_NAME": "beautiful world", + "COMPANY_ID": "42", + "USER_TO_BE_SEARCHED": "i wanna be forgotten", + }, { + 'primary_key': 'foo-bar', + 'primary_key2': 'foo-bar', + 'chunk_size': '123', + 'concat_missing': 'Hello, MISSING_ENV_VAL', + 'concat_missing2': 'Hello, ${MISSING_ENV_VAL}', + 'concat_present': 'Hello, beautiful world', + 'concat_present2': 'beautiful world', + 'concat_present3': 'Hello, beautiful world', + 'search': 'id = 42', + 'search2': "username = 'i wanna be forgotten'", + 'corrupted': "username = '${CORRUPTED", + 'corrupted2': '', + 'corrupted3': '$' + } + ] + ]) + def test(self, file, envs, expected): + with mock.patch.dict(os.environ, envs): + print(load_config(file)) + assert load_config(file) == expected diff --git a/tox.ini b/tox.ini index c431537..9c36a12 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ skip_missing_interpreters=True [testenv:flake8] deps = flake8 -commands = flake8 {toxinidir}/pganonymizer {toxinidir}/tests +commands = flake8 {toxinidir}/pganonymize {toxinidir}/tests [testenv] whitelist_externals = poetry