Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix email name parser #85

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 41 additions & 38 deletions emailparser/name_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,44 @@ var (
surnames map[string]bool
)

func Parse(email string) (ParsedEmail, error) {
ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email)
if !ok {
return ParsedEmail{}, ErrInvalidEmail
}

username = strings.ToLower(username)

// Try delimited format first if it contains a dot
if strings.Contains(username, ".") {
if result, ok := tryDelimitedFormat(username); ok {
result.Email = cleanEmail
return result, nil
}

// If delimited format didn't match, remove dots before trying other patterns
username = strings.ReplaceAll(username, ".", "")
}

// Try other patterns with cleaned username
for _, tryPattern := range []func(string) (ParsedEmail, bool){
trySingleName,
tryCombinedName,
tryNameWithInitial,
tryInitialSurname,
} {
if result, ok := tryPattern(username); ok {
result.Email = cleanEmail
return result, nil
}
}

return ParsedEmail{
Email: cleanEmail,
Pattern: string(PatternUnknown),
}, nil
}

// tryDelimitedFormat handles all patterns with dots
func tryDelimitedFormat(username string) (ParsedEmail, bool) {
parts := strings.Split(username, ".")
Expand Down Expand Up @@ -97,6 +135,9 @@ func tryDelimitedFormat(username string) (ParsedEmail, bool) {

// tryNameWithInitial handles trailing initials without dots
func tryNameWithInitial(username string) (ParsedEmail, bool) {
if len(username) < 5 {
return ParsedEmail{}, false
}
// Try to find a known first name
for i := 2; i < len(username); i++ {
possibleName := username[:i]
Expand Down Expand Up @@ -177,44 +218,6 @@ func trySingleName(username string) (ParsedEmail, bool) {
return ParsedEmail{}, false
}

func Parse(email string) (ParsedEmail, error) {
ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email)
if !ok {
return ParsedEmail{}, ErrInvalidEmail
}

username = strings.ToLower(username)

// Try delimited format first if it contains a dot
if strings.Contains(username, ".") {
if result, ok := tryDelimitedFormat(username); ok {
result.Email = cleanEmail
return result, nil
}

// If delimited format didn't match, remove dots before trying other patterns
username = strings.ReplaceAll(username, ".", "")
}

// Try other patterns with cleaned username
for _, tryPattern := range []func(string) (ParsedEmail, bool){
tryCombinedName,
tryNameWithInitial,
tryInitialSurname,
trySingleName,
} {
if result, ok := tryPattern(username); ok {
result.Email = cleanEmail
return result, nil
}
}

return ParsedEmail{
Email: cleanEmail,
Pattern: string(PatternUnknown),
}, nil
}

func init() {
var err error
firstNames, err = names.LoadFirstNames()
Expand Down
10 changes: 10 additions & 0 deletions emailparser/name_parses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ func TestParse(t *testing.T) {
Pattern: string(PatternFirstName),
},
},
{
name: "firstname only #3",
email: "[email protected]",
want: ParsedEmail{
Email: "[email protected]",
FirstName: "Alex",
LastName: "",
Pattern: string(PatternFirstName),
},
},
}

for _, tt := range tests {
Expand Down
Loading