Skip to content

Commit

Permalink
Normalize email local part to NFC
Browse files Browse the repository at this point in the history
  • Loading branch information
ugexe committed Apr 11, 2024
1 parent 7b54c0a commit 194f922
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ History
become ``gmail.com``.
* Additional ``gmail.com`` typos are now normalized when ``hash_email`` is
used. For example, ``gmali.com`` will become ``gmail.com``.
* When ``hash_email`` is used, the local part of an email address is now
normalized to NFC.

2.9.0 (2023-12-05)
++++++++++++++++++
Expand Down
3 changes: 3 additions & 0 deletions minfraud/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import warnings
import hashlib
import unicodedata
from typing import Any, Dict
from voluptuous import MultipleInvalid

Expand Down Expand Up @@ -364,6 +365,8 @@ def _clean_email(address):
domain = _clean_domain(address[at_idx + 1 :]) # noqa
local_part = address[:at_idx]

local_part = unicodedata.normalize('NFC', local_part)

# Strip off aliased part of email address.
if domain in _YAHOO_DOMAINS:
divider = "-"
Expand Down
20 changes: 20 additions & 0 deletions tests/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,26 @@ def test_maybe_hash_email(self):
}
},
},
{
"name": "email local part nfc normalization form 1",
"input": {"email": {"address": "bu\u0308[email protected]"}},
"expected": {
"email": {
"address": "53550c712b146287a2d0dd30e5ed6f4b",
"domain": "example.com",
}
},
},
{
"name": "email local part nfc normalization form 2",
"input": {"email": {"address": "b\u00FC[email protected]"}},
"expected": {
"email": {
"address": "53550c712b146287a2d0dd30e5ed6f4b",
"domain": "example.com",
}
},
},
]

for test in tests:
Expand Down

0 comments on commit 194f922

Please sign in to comment.