Skip to content

Commit

Permalink
Merge pull request #14 from eugene-gurevich/master
Browse files Browse the repository at this point in the history
Fix cache strict look up bug and unify cache management
  • Loading branch information
nicpottier authored Sep 3, 2018
2 parents 2e133f7 + ff6ee83 commit d7a8a43
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 63 deletions.
85 changes: 22 additions & 63 deletions phonenumbers.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,15 @@ func writeToRegexCache(key string, value *regexp.Regexp) {
regCacheMutex.Unlock()
}

func regexFor(pattern string) *regexp.Regexp {
regex, found := readFromRegexCache(pattern)
if !found {
regex = regexp.MustCompile(pattern)
writeToRegexCache(pattern, regex)
}
return regex
}

func readFromNanpaRegions(key string) (struct{}, bool) {
v, ok := nanpaRegions[key]
return v, ok
Expand Down Expand Up @@ -1789,11 +1798,7 @@ func chooseFormattingPatternForNumber(
size := len(leadingDigitsPattern)

patP := `^(?:` + numFormat.GetPattern() + `)$` // Strictly match
m, ok := readFromRegexCache(numFormat.GetPattern())
if !ok {
m = regexp.MustCompile(patP)
writeToRegexCache(patP, m)
}
m := regexFor(patP)

if size == 0 {
mat := m.FindString(nationalNumber)
Expand All @@ -1806,12 +1811,7 @@ func chooseFormattingPatternForNumber(

// We always use the last leading_digits_pattern, as it is the
// most detailed.
reg, ok := readFromRegexCache(leadingDigitsPattern[size-1])
if !ok {
pat := leadingDigitsPattern[size-1]
reg = regexp.MustCompile(pat)
writeToRegexCache(pat, reg)
}
reg := regexFor(leadingDigitsPattern[size-1])

inds := reg.FindStringIndex(nationalNumber)
if len(inds) > 0 && inds[0] == 0 && m.MatchString(nationalNumber) { // inds[0] == 0 ensures strict match of leading digits
Expand Down Expand Up @@ -1839,12 +1839,7 @@ func formatNsnUsingPatternWithCarrier(
carrierCode string) string {

numberFormatRule := formattingPattern.GetFormat()
m, ok := readFromRegexCache(formattingPattern.GetPattern())
if !ok {
pat := formattingPattern.GetPattern()
m = regexp.MustCompile(pat)
writeToRegexCache(pat, m)
}
m := regexFor(formattingPattern.GetPattern())

formattedNationalNumber := ""
if numberFormat == NATIONAL &&
Expand Down Expand Up @@ -2114,21 +2109,13 @@ func isNumberPossibleForDesc(nationalNumber string, numberDesc *PhoneNumberDesc)
}
}
possiblePattern := "^(?:" + numberDesc.GetNationalNumberPattern() + ")$" // Strictly match
pat, ok := readFromRegexCache(possiblePattern)
if !ok {
pat = regexp.MustCompile(possiblePattern)
writeToRegexCache(possiblePattern, pat)
}
pat := regexFor(possiblePattern)
return pat.MatchString(nationalNumber)
}

func isNumberMatchingDesc(nationalNumber string, numberDesc *PhoneNumberDesc) bool {
patP := "^(?:" + numberDesc.GetNationalNumberPattern() + ")$" // Strictly match
pat, ok := readFromRegexCache(patP)
if !ok {
pat = regexp.MustCompile(patP)
writeToRegexCache(patP, pat)
}
pat := regexFor(patP)
return isNumberPossibleForDesc(nationalNumber, numberDesc) &&
pat.MatchString(nationalNumber)

Expand Down Expand Up @@ -2192,11 +2179,7 @@ func getRegionCodeForNumberFromRegionList(
var metadata *PhoneMetadata = getMetadataForRegion(regionCode)
if len(metadata.GetLeadingDigits()) > 0 {
patP := "^(?:" + metadata.GetLeadingDigits() + ")" // Non capturing grouping to support OR'ed alternatives (e.g. 555|1[78]|2)
pat, ok := readFromRegexCache(patP)
if !ok {
pat = regexp.MustCompile(patP)
writeToRegexCache(patP, pat)
}
pat := regexFor(patP)
if pat.MatchString(nationalNumber) {
return regionCode
}
Expand Down Expand Up @@ -2608,24 +2591,15 @@ func maybeExtractCountryCode(
var (
potentialNationalNumber = NewBuilderString(
normalizedNumber[len(defaultCountryCodeString):])
generalDesc = defaultRegionMetadata.GetGeneralDesc()
patP = `^(?:` + generalDesc.GetNationalNumberPattern() + `)$` // Strictly match
validNumberPattern, ok = readFromRegexCache(patP)
generalDesc = defaultRegionMetadata.GetGeneralDesc()
patP = `^(?:` + generalDesc.GetNationalNumberPattern() + `)$` // Strictly match
validNumberPattern = regexFor(patP)
)
if !ok {
validNumberPattern = regexp.MustCompile(patP)
writeToRegexCache(patP, validNumberPattern)
}
maybeStripNationalPrefixAndCarrierCode(
potentialNationalNumber,
defaultRegionMetadata,
NewBuilder(nil) /* Don't need the carrier code */)
nationalNumberPattern, ok := readFromRegexCache(generalDesc.GetNationalNumberPattern())
if !ok {
pat := generalDesc.GetNationalNumberPattern()
nationalNumberPattern = regexp.MustCompile(pat)
writeToRegexCache(pat, nationalNumberPattern)
}

// If the number was not valid before but is valid now, or
// if it was too long before, we consider the number with
// the country calling code stripped to be a better result and
Expand Down Expand Up @@ -2693,12 +2667,7 @@ func maybeStripInternationalPrefixAndNormalize(
}

// Attempt to parse the first digits as an international prefix.
iddPattern, ok := readFromRegexCache(possibleIddPrefix)
if !ok {
pat := possibleIddPrefix
iddPattern = regexp.MustCompile(pat)
writeToRegexCache(pat, iddPattern)
}
iddPattern := regexFor(possibleIddPrefix)
number.ResetWithString(normalize(string(numBytes)))
if parsePrefixAsIdd(iddPattern, number) {
return PhoneNumber_FROM_NUMBER_WITH_IDD
Expand All @@ -2721,20 +2690,10 @@ func maybeStripNationalPrefixAndCarrierCode(
}
possibleNationalPrefix = "^(?:" + possibleNationalPrefix + ")" // Strictly match from string start
// Attempt to parse the first digits as a national prefix.
prefixMatcher, ok := readFromRegexCache(possibleNationalPrefix)
if !ok {
pat := possibleNationalPrefix
prefixMatcher = regexp.MustCompile(pat)
writeToRegexCache(pat, prefixMatcher)
}
prefixMatcher := regexFor(possibleNationalPrefix)
if prefixMatcher.MatchString(number.String()) {
natRulePattern := "^(?:" + metadata.GetGeneralDesc().GetNationalNumberPattern() + ")$" // Strictly match
nationalNumberRule, ok :=
readFromRegexCache(natRulePattern)
if !ok {
nationalNumberRule = regexp.MustCompile(natRulePattern)
writeToRegexCache(natRulePattern, nationalNumberRule)
}
nationalNumberRule := regexFor(natRulePattern)
// Check if the original number is viable.
isViableOriginalNumber := nationalNumberRule.Match(number.Bytes())
// prefixMatcher.group(numOfGroups) == null implies nothing was
Expand Down
62 changes: 62 additions & 0 deletions phonenumbers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package phonenumbers

import (
"reflect"
"regexp"
"testing"

"github.com/golang/protobuf/proto"
"github.com/theothertomelliott/go-must"
)

func TestParse(t *testing.T) {
Expand Down Expand Up @@ -1209,3 +1211,63 @@ func TestMergeLengths(t *testing.T) {
}
}
}

func TestRegexCacheWrite(t *testing.T) {
pattern1 := "TestRegexCacheWrite"
if _, found1 := readFromRegexCache(pattern1); found1 {
t.Errorf("pattern |%v| is in the cache", pattern1)
}
regex1 := regexFor(pattern1)
cachedRegex1, found1 := readFromRegexCache(pattern1)
if !found1 {
t.Errorf("pattern |%v| is not in the cache", pattern1)
}
if regex1 != cachedRegex1 {
t.Error("expected the same instance, but got a different one")
}
pattern2 := pattern1 + "."
if _, found2 := readFromRegexCache(pattern2); found2 {
t.Errorf("pattern |%v| is in the cache", pattern2)
}
}

func TestRegexCacheRead(t *testing.T) {
pattern1 := "TestRegexCacheRead"
if _, found1 := readFromRegexCache(pattern1); found1 {
t.Errorf("pattern |%v| is in the cache", pattern1)
}
regex1 := regexp.MustCompile(pattern1)
writeToRegexCache(pattern1, regex1)
if cachedRegex1 := regexFor(pattern1); cachedRegex1 != regex1 {
t.Error("expected the same instance, but got a different one")
}
cachedRegex1, found1 := readFromRegexCache(pattern1)
if !found1 {
t.Errorf("pattern |%v| is not in the cache", pattern1)
}
if cachedRegex1 != regex1 {
t.Error("expected the same instance, but got a different one")
}
pattern2 := pattern1 + "."
if _, found2 := readFromRegexCache(pattern2); found2 {
t.Errorf("pattern |%v| is in the cache", pattern2)
}
}

func TestRegexCacheStrict(t *testing.T) {
const expectedResult = "(41) 3020-3445"
phoneToTest := &PhoneNumber{
CountryCode: proto.Int32(55),
NationalNumber: proto.Uint64(4130203445),
}
firstRunResult := Format(phoneToTest, NATIONAL)
must.BeEqual(t, expectedResult, firstRunResult, "phone number formatting not as expected")
// This adds value to the regex cache that would break the following lookup if the regex-s
// in cache were not strict.
Format(&PhoneNumber{
CountryCode: proto.Int32(973),
NationalNumber: proto.Uint64(17112724),
}, NATIONAL)
secondRunResult := Format(phoneToTest, NATIONAL)
must.BeEqual(t, expectedResult, secondRunResult, "phone number formatting not as expected")
}

0 comments on commit d7a8a43

Please sign in to comment.