Skip to content

Commit

Permalink
Merge pull request #139 from maxmind/ugexe/email-nfc
Browse files Browse the repository at this point in the history
Normalize email addresses to NFC
  • Loading branch information
oschwald authored Apr 12, 2024
2 parents 6364583 + b96f65a commit ee2db20
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ History
become ``gmail.com``.
* Additional ``gmail.com`` typos are now normalized when ``hash_email`` is
used. For example, ``gmali.com`` will become ``gmail.com``.
* When ``hash_email`` is used, the local part of an email address is now
normalized to NFC.

2.9.0 (2023-12-05)
++++++++++++++++++
Expand Down
3 changes: 3 additions & 0 deletions minfraud/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import warnings
import hashlib
import unicodedata
from typing import Any, Dict
from voluptuous import MultipleInvalid

Expand Down Expand Up @@ -364,6 +365,8 @@ def _clean_email(address):
domain = _clean_domain(address[at_idx + 1 :]) # noqa
local_part = address[:at_idx]

local_part = unicodedata.normalize("NFC", local_part)

# Strip off aliased part of email address.
if domain in _YAHOO_DOMAINS:
divider = "-"
Expand Down
22 changes: 22 additions & 0 deletions tests/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,26 @@ def test_maybe_hash_email(self):
}
},
},
{
"name": "email local part nfc normalization form 1",
"input": {"email": {"address": "bu\u0308[email protected]"}},
"expected": {
"email": {
"address": "53550c712b146287a2d0dd30e5ed6f4b",
"domain": "example.com",
}
},
},
{
"name": "email local part nfc normalization form 2",
"input": {"email": {"address": "b\u00FC[email protected]"}},
"expected": {
"email": {
"address": "53550c712b146287a2d0dd30e5ed6f4b",
"domain": "example.com",
}
},
},
]

for test in tests:
Expand Down Expand Up @@ -231,6 +251,8 @@ def test_clean_email():
{"input": "[email protected]", "output": "[email protected]"},
{"input": "[email protected].", "output": "[email protected]"},
{"input": "[email protected]...", "output": "[email protected]"},
{"input": "example@bu\u0308cher.com", "output": "[email protected]"},
{"input": "example@b\u00FCcher.com", "output": "[email protected]"},
]

for test in tests:
Expand Down

0 comments on commit ee2db20

Please sign in to comment.