Skip to content

Commit

Permalink
Merge pull request #28 from yojee/add_json_support
Browse files Browse the repository at this point in the history
Add json support
  • Loading branch information
hkage authored Jul 13, 2021
2 parents 8d97bbe + 4d6771f commit f05c8fb
Show file tree
Hide file tree
Showing 13 changed files with 580 additions and 265 deletions.
4 changes: 4 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[report]
exclude_lines =
# Don't complain if tests don't hit defensive assertion code:
raise NotImplementedError
40 changes: 23 additions & 17 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,30 @@ Features
* Exclude data for anonymization depending on regular expressions
* Truncate entire tables for unwanted data

+----------------+----------------------+-----------------------+----------------------------------+
| Field | Value | Provider | Output |
+================+======================+=======================+==================================+
| ``first_name`` | John | ``choice`` | (Bob|Larry|Lisa) |
+----------------+----------------------+-----------------------+----------------------------------+
| ``title`` | Dr. | ``clear`` | |
+----------------+----------------------+-----------------------+----------------------------------+
| ``street`` | Irving St | ``faker.street_name`` | Miller Station |
+----------------+----------------------+-----------------------+----------------------------------+
| ``password`` | dsf82hFxcM | ``mask`` | XXXXXXXXXX |
+----------------+----------------------+-----------------------+----------------------------------+
| ``email`` | [email protected] | ``md5`` | 0cba00ca3da1b283a57287bcceb17e35 |
+----------------+----------------------+-----------------------+----------------------------------+
| ``email`` | [email protected] | ``faker.unique.email``| [email protected] |
+----------------+----------------------+-----------------------+----------------------------------+
| ``ip`` | 157.50.1.20 | ``set`` | 127.0.0.1 |
+----------------+----------------------+-----------------------+----------------------------------+
+----------------+----------------------+------------------------+----------------------------------+
| Field | Value | Provider | Output |
+================+======================+========================+==================================+
| ``first_name`` | John | ``choice`` | (Bob|Larry|Lisa) |
+----------------+----------------------+------------------------+----------------------------------+
| ``title`` | Dr. | ``clear`` | |
+----------------+----------------------+------------------------+----------------------------------+
| ``street`` | Irving St | ``faker.street_name`` | Miller Station |
+----------------+----------------------+------------------------+----------------------------------+
| ``password`` | dsf82hFxcM | ``mask`` | XXXXXXXXXX |
+----------------+----------------------+------------------------+----------------------------------+
| ``email`` | [email protected] | ``md5`` | 0cba00ca3da1b283a57287bcceb17e35 |
+----------------+----------------------+------------------------+----------------------------------+
| ``email`` | [email protected] | ``faker.unique.email`` | [email protected] |
+----------------+----------------------+------------------------+----------------------------------+
| ``phone_num`` | 65923473 | ``md5``as_number: True | 3948293448 |
+----------------+----------------------+------------------------+----------------------------------+
| ``ip`` | 157.50.1.20 | ``set`` | 127.0.0.1 |
+----------------+----------------------+------------------------+----------------------------------+
| ``uuid_col`` | 00010203-0405-...... | ``uuid4`` | f7c1bd87-4d.... |
+----------------+----------------------+------------------------+----------------------------------+

Note: `faker.unique.[provider]` only supported on python3.5+ (Faker library min supported python version)
Note: `uuid4` - only for (native `uuid4<https://www.postgresql.org/docs/current/datatype-uuid.html>`) columns

See the `documentation`_ for a more detailed description of the provided anonymization methods.

Expand Down Expand Up @@ -81,6 +86,7 @@ Despite the database connection values, you will have to define a YAML schema fi
all anonymization rules for that database. Take a look at the `schema documentation`_ or the
`YAML sample schema`_.


Example call::

$ pganonymize --schema=myschema.yml \
Expand Down
10 changes: 2 additions & 8 deletions pganonymizer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import argparse
import logging
import sys
import time

import yaml
Expand Down Expand Up @@ -63,7 +62,7 @@ def main(args):

if args.list_providers:
list_provider_classes()
sys.exit(0)
return 0

schema = yaml.load(open(args.schema), Loader=yaml.FullLoader)

Expand All @@ -77,7 +76,7 @@ def main(args):

start_time = time.time()
truncate_tables(connection, schema.get('truncate', []))
anonymize_tables(connection, schema.get('tables', []), verbose=args.verbose)
anonymize_tables(connection, schema.get('tables', []), verbose=args.verbose, dry_run=args.dry_run)

if not args.dry_run:
connection.commit()
Expand All @@ -88,8 +87,3 @@ def main(args):

if args.dump_file:
create_database_dump(args.dump_file, pg_args)


if __name__ == '__main__':
args = get_arg_parser().parse_args()
main(args)
5 changes: 1 addition & 4 deletions pganonymizer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@
# Default name for the primary key column
DEFAULT_PRIMARY_KEY = 'id'

# Delimiter used to buffer and import database data.
COPY_DB_DELIMITER = '\x1f'

# Filename of the default schema
DEFAULT_SCHEMA_FILE = 'schema.yml'

# Default chunk size for data fetch
DEFAULT_CHUNK_SIZE = 2000
DEFAULT_CHUNK_SIZE = 100000
19 changes: 18 additions & 1 deletion pganonymizer/providers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import operator
import random
from hashlib import md5
from uuid import uuid4

from faker import Faker
from six import with_metaclass
Expand Down Expand Up @@ -111,9 +112,16 @@ class MD5Provider(with_metaclass(ProviderMeta, Provider)):
"""Provider to hash a value with the md5 algorithm."""

id = 'md5'
default_max_length = 8

def alter_value(self, value):
return md5(value.encode('utf-8')).hexdigest()
as_number = self.kwargs.get('as_number', False)
as_number_length = self.kwargs.get('as_number_length', self.default_max_length)
hashed = md5(value.encode('utf-8')).hexdigest()
if as_number:
return int(hashed, 16) % (10 ** as_number_length)
else:
return hashed


class SetProvider(with_metaclass(ProviderMeta, Provider)):
Expand All @@ -123,3 +131,12 @@ class SetProvider(with_metaclass(ProviderMeta, Provider)):

def alter_value(self, value):
return self.kwargs.get('value')


class UUID4Provider(with_metaclass(ProviderMeta, Provider)):
"""Provider to set a random uuid value."""

id = 'uuid4'

def alter_value(self, value):
return uuid4()
Loading

0 comments on commit f05c8fb

Please sign in to comment.