From e9503e5cff0c7329dee6c7435a3054e8e6606288 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:28:05 +0100 Subject: [PATCH 01/30] Added first version of phone matcher port --- matcher.go | 8 -- phonenumbermatcher.go | 264 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+), 8 deletions(-) create mode 100644 phonenumbermatcher.go diff --git a/matcher.go b/matcher.go index 3bbf427..1e5696e 100644 --- a/matcher.go +++ b/matcher.go @@ -6,14 +6,6 @@ import ( "unicode" ) -type PhoneNumberMatcher struct { -} - -func NewPhoneNumberMatcher(seq string) *PhoneNumberMatcher { - // TODO(ttacon): to be implemented - return nil -} - func ContainsOnlyValidXChars(number *PhoneNumber, candidate string) bool { // The characters 'x' and 'X' can be (1) a carrier code, in which // case they always precede the national significant number or (2) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go new file mode 100644 index 0000000..2349357 --- /dev/null +++ b/phonenumbermatcher.go @@ -0,0 +1,264 @@ +package phonenumbers + +import ( + "io" + "regexp" + "strconv" + "unicode" +) + +/* +A stateful class that finds and extracts telephone numbers fom text. + +Vanity numbers (phone numbers using alphabetic digits such as '1-800-SIX-FLAGS' are not found. +*/ +type PhoneNumberMatcher struct { + text string + preferredRegion string + leniency Leniency + maxTries int + state int + lastMatch *PhoneNumberMatch + searchIndex int +} + +const ( + notReady = 0 + ready = 1 + done = 2 +) + +var ( + OPENING_PARENS = "(\\[\uFF08\uFF3B" + CLOSING_PARENS = ")\\]\uFF09\uFF3D" + NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" + BRACKET_PAIR_LIMIT = "{0,3}" + + LEAD_CLASS = OPENING_PARENS + PLUS_CHARS + LEAD_PATTERN = regexp.MustCompile(LEAD_CLASS) + LEAD_LIMIT = "{0,2}" + + DIGIT_BLOCK_LIMIT = 17 + 3 + DIGIT_SEQUENCE = "\\d{1," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" + + PUNCTIATION_LIMIT = "{0,4}" + PUNCTUATION = "[" + VALID_PUNCTUATION + "]" + PUNCTIATION_LIMIT + + BLOCK_LIMIT = "{0," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" + + PATTERN = regexp.MustCompile("(?:" + LEAD_CLASS + PUNCTUATION + ")" + LEAD_LIMIT + DIGIT_SEQUENCE + "(?:" + PUNCTUATION + DIGIT_SEQUENCE + ")" + BLOCK_LIMIT + "(?:" + EXTN_PATTERNS_FOR_MATCHING + ")?") + + SLASH_SEPARATED_DATES = regexp.MustCompile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}") + TIME_STAMPS = regexp.MustCompile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$") + + MATCHING_BRACKETS = regexp.MustCompile("(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") + + PUB_PAGES = regexp.MustCompile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}") + + /** + * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are + * ordered according to specificity. For example, white-space is last since that is frequently + * used in numbers, not just to separate two numbers. We have separate patterns since we don't + * want to break up the phone-number-like text on more than one different kind of symbol at one + * time, although symbols of the same type (e.g. space) can be safely grouped together. + * + * Note that if there is a match, we will always check any text found up to the first match as + * well. + */ + INNER_MATCHES = []*regexp.Regexp{ + // Breaks on the slash - e.g. "651-234-2345/332-445-1234" + regexp.MustCompile("/+(.*)"), + // Note that the bracket here is inside the capturing group, since we consider it part of the + // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321". + regexp.MustCompile("(\\([^(]*)"), + // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." + // We require a space on either side of the hyphen for it to be considered a separator. + regexp.MustCompile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"), + // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's + // possible that it's supposed to be used to break two numbers without spaces, and we haven't + // seen many instances of it used within a number. + regexp.MustCompile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"), + // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." + regexp.MustCompile("\\.+\\p{Z}*([^.]+)"), + // Breaks on space - e.g. "3324451234 8002341234" + regexp.MustCompile("\\p{Z}+(\\P{Z}+)"), + } +) + +func NewPhoneNumberMatcher(text string, region string) PhoneNumberMatcher { + m := PhoneNumberMatcher{ + text: text, + preferredRegion: region, + leniency: Leniency(1), + maxTries: 65535, + state: notReady, + lastMatch: nil, + searchIndex: 0, + } + + return m +} + +func (*PhoneNumberMatcher) trimAfterFirstMatch(pattern *regexp.Regexp, candidate string) string { + trailingCharsMatch := pattern.FindStringIndex(candidate) + if trailingCharsMatch != nil { + candidate = candidate[:trailingCharsMatch[0]] + } + return candidate +} + +func (*PhoneNumberMatcher) isInvalidPunctuationSymbol(char rune) bool { + return char == '%' || unicode.In(char, unicode.Sc) +} + +func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*PhoneNumberMatch, error) { + if MATCHING_BRACKETS.FindStringIndex(candidate) == nil || PUB_PAGES.FindStringIndex(candidate) != nil { + return nil, nil + } + + if p.leniency >= VALID { + if offset > 0 && LEAD_PATTERN.FindStringIndex(candidate) == nil { + previousChar := []rune(p.text)[offset-1] + if p.isInvalidPunctuationSymbol(previousChar) || unicode.IsLetter(previousChar) { + return nil, nil + } + } + lastCharIndex := offset + len(candidate) + if lastCharIndex < len(p.text) { + nextChar := []rune(p.text)[lastCharIndex] + if p.isInvalidPunctuationSymbol(nextChar) || unicode.IsLetter(nextChar) { + return nil, nil + } + } + } + + number, err := ParseAndKeepRawInput(candidate, p.preferredRegion) + if err != nil { + return nil, err + } + + if p.leniency.Verify(number, candidate) { + match := NewPhoneNumberMatch(offset, candidate, *number) + + return &match, nil + } + + return nil, nil +} + +func (p *PhoneNumberMatcher) extractMatch(candidate string, offset int) *PhoneNumberMatch { + if SLASH_SEPARATED_DATES.FindStringIndex(candidate) != nil { + return nil + } + + if TIME_STAMPS.FindStringIndex(candidate) != nil { + return nil + } + + match, _ := p.parseAndVerify(candidate, offset) + if match != nil { + return match + } + + return p.extractInnerMatch(candidate, offset) +} + +func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *PhoneNumberMatch { + for _, possibleInnerMatch := range INNER_MATCHES { + groupMatch := possibleInnerMatch.FindStringIndex(candidate) + isFirstMatch := true + for { + if groupMatch == nil || p.maxTries == 0 { + break + } + if isFirstMatch { + group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, candidate[:groupMatch[0]]) + match, _ := p.parseAndVerify(group, offset+groupMatch[0]) + if match != nil { + return match + } + p.maxTries-- + isFirstMatch = false + } + group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, candidate[groupMatch[0]:groupMatch[1]]) + match, _ := p.parseAndVerify(group, offset+groupMatch[0]) + if match != nil { + return match + } + p.maxTries-- + groupMatch = possibleInnerMatch.FindStringIndex(candidate[groupMatch[1]:]) + } + } + return nil +} + +func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { + matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:len(p.text)]) + for { + if p.maxTries > 0 && matcher == nil { + break + } + start := matcher[0] + candidate := p.text[start:matcher[1]] + + // Check for extra numbers at the end. + // TODO: This is the place to start when trying to support extraction of multiple phone number + // from split notations (+41 79 123 45 67 / 68). + candidate = p.trimAfterFirstMatch(SECOND_NUMBER_START_PATTERN, candidate) + + match := p.extractMatch(candidate, start) + if match != nil { + return match + } + + matcher = PATTERN.FindStringIndex(p.text[start+len(candidate) : len(p.text)]) + p.maxTries-- + } + return nil +} + +func (p *PhoneNumberMatcher) hasNext() bool { + if p.state == notReady { + p.lastMatch = p.find() + if p.lastMatch == nil { + p.state = done + } else { + p.searchIndex = p.lastMatch.end + p.state = ready + } + } + return p.state == ready +} + +func (p *PhoneNumberMatcher) Next() (*PhoneNumberMatch, error) { + if !p.hasNext() { + return nil, io.EOF + } + // Remove from memory after use + result := p.lastMatch + p.lastMatch = nil + p.state = notReady + return result, nil +} + +/* +The immutable match of a phone number within a piece of text. + +Matches may be found using the find() method of PhoneNumberMatcher. + +A match consists of the phone number (in .number) as well as the .start and .end offsets of the corresponding subsequence of the searched text. Use .raw_string to obtain a copy of the matched subsequence. +*/ +type PhoneNumberMatch struct { + start, end int + rawString string + number PhoneNumber +} + +func NewPhoneNumberMatch(start int, rawString string, number PhoneNumber) PhoneNumberMatch { + return PhoneNumberMatch{ + start: start, + end: start + len(rawString), + rawString: rawString, + number: number, + } +} From b286ba74974b9cf5cf5363287759ccb237fd2082 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:45:10 +0100 Subject: [PATCH 02/30] Added first version of phone matcher port --- cmd/phoneserver/main.go | 111 +++++++++++++++++++++------------------- go.mod | 4 +- go.sum | 17 ++++++ 3 files changed, 77 insertions(+), 55 deletions(-) diff --git a/cmd/phoneserver/main.go b/cmd/phoneserver/main.go index 2af237e..d057f36 100644 --- a/cmd/phoneserver/main.go +++ b/cmd/phoneserver/main.go @@ -1,13 +1,16 @@ package main -import ( - "encoding/json" - "net/http" - - "github.com/aws/aws-lambda-go/events" - "github.com/aws/aws-lambda-go/lambda" - "github.com/nyaruka/phonenumbers" -) +//import ( +// "encoding/json" +// "net/http" +// +// "github.com/nyaruka/phonenumbers" +//)import ( +// "encoding/json" +// "net/http" +// +// "github.com/nyaruka/phonenumbers" +//) var Version = "dev" @@ -26,49 +29,49 @@ type successResponse struct { Version string `json:"version"` } -func writeResponse(status int, body interface{}) (events.APIGatewayProxyResponse, error) { - js, err := json.MarshalIndent(body, "", " ") - if err != nil { - return events.APIGatewayProxyResponse{ - StatusCode: 500, - Body: err.Error(), - }, nil - } - - return events.APIGatewayProxyResponse{ - StatusCode: 200, - Body: string(js), - Headers: map[string]string{"Content-Type": "application/json"}, - }, nil -} - -func parse(request events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { - phone := request.QueryStringParameters["phone"] - - // required phone number - if phone == "" { - return writeResponse(http.StatusBadRequest, errorResponse{"missing body", "missing 'phone' parameter"}) - } - - // optional country code - country := request.QueryStringParameters["country"] - - metadata, err := phonenumbers.Parse(phone, country) - if err != nil { - return writeResponse(http.StatusBadRequest, errorResponse{"error parsing phone", err.Error()}) - } - - return writeResponse(http.StatusOK, successResponse{ - NationalNumber: *metadata.NationalNumber, - CountryCode: *metadata.CountryCode, - IsPossible: phonenumbers.IsPossibleNumber(metadata), - IsValid: phonenumbers.IsValidNumber(metadata), - NationalFormatted: phonenumbers.Format(metadata, phonenumbers.NATIONAL), - InternationalFormatted: phonenumbers.Format(metadata, phonenumbers.INTERNATIONAL), - Version: Version, - }) -} - -func main() { - lambda.Start(parse) -} +//func writeResponse(status int, body interface{}) (events.APIGatewayProxyResponse, error) { +// js, err := json.MarshalIndent(body, "", " ") +// if err != nil { +// return events.APIGatewayProxyResponse{ +// StatusCode: 500, +// Body: err.Error(), +// }, nil +// } +// +// return events.APIGatewayProxyResponse{ +// StatusCode: 200, +// Body: string(js), +// Headers: map[string]string{"Content-Type": "application/json"}, +// }, nil +//} +// +//func parse(request events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { +// phone := request.QueryStringParameters["phone"] +// +// // required phone number +// if phone == "" { +// return writeResponse(http.StatusBadRequest, errorResponse{"missing body", "missing 'phone' parameter"}) +// } +// +// // optional country code +// country := request.QueryStringParameters["country"] +// +// metadata, err := phonenumbers.Parse(phone, country) +// if err != nil { +// return writeResponse(http.StatusBadRequest, errorResponse{"error parsing phone", err.Error()}) +// } +// +// return writeResponse(http.StatusOK, successResponse{ +// NationalNumber: *metadata.NationalNumber, +// CountryCode: *metadata.CountryCode, +// IsPossible: phonenumbers.IsPossibleNumber(metadata), +// IsValid: phonenumbers.IsValidNumber(metadata), +// NationalFormatted: phonenumbers.Format(metadata, phonenumbers.NATIONAL), +// InternationalFormatted: phonenumbers.Format(metadata, phonenumbers.INTERNATIONAL), +// Version: Version, +// }) +//} +// +//func main() { +// lambda.Start(parse) +//} diff --git a/go.mod b/go.mod index b3c5356..badd47d 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,7 @@ module github.com/nyaruka/phonenumbers -require github.com/golang/protobuf v1.3.2 +require ( + github.com/golang/protobuf v1.3.2 +) go 1.13 diff --git a/go.sum b/go.sum index 9a46bf9..83d6e04 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,19 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/aws/aws-lambda-go v1.23.0 h1:Vjwow5COkFJp7GePkk9kjAo/DyX36b7wVPKwseQZbRo= +github.com/aws/aws-lambda-go v1.23.0/go.mod h1:jJmlefzPfGnckuHdXX7/80O3BvUUi12XOkbv4w9SGLU= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 7a5e4a48c1651b44e23c661c10bd8ad2734254fb Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:46:50 +0100 Subject: [PATCH 03/30] Fix module name --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index badd47d..9dd1e4b 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/nyaruka/phonenumbers +module github.com/mauritsderuiter95/phonenumbers require ( github.com/golang/protobuf v1.3.2 From 50bf9c3d967c66261ac09aa6b8136e0a2efe5cbd Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:47:49 +0100 Subject: [PATCH 04/30] Update go version --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 9dd1e4b..24c16e7 100644 --- a/go.mod +++ b/go.mod @@ -4,4 +4,4 @@ require ( github.com/golang/protobuf v1.3.2 ) -go 1.13 +go 1.16 From 314f08e84d49a1d3bf2e49309aaea3a0f9d4c1c2 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:52:05 +0100 Subject: [PATCH 05/30] Escape parentheses --- phonenumbermatcher.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 2349357..d37a4de 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -29,8 +29,8 @@ const ( ) var ( - OPENING_PARENS = "(\\[\uFF08\uFF3B" - CLOSING_PARENS = ")\\]\uFF09\uFF3D" + OPENING_PARENS = "((\\[\uFF08\uFF3B" + CLOSING_PARENS = "))\\]\uFF09\uFF3D" NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" From c9c99eb9e0d7166b5c409c4d647d8f90892b14a4 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 09:53:19 +0100 Subject: [PATCH 06/30] Escape parentheses --- phonenumbermatcher.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index d37a4de..7b90d62 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -29,8 +29,8 @@ const ( ) var ( - OPENING_PARENS = "((\\[\uFF08\uFF3B" - CLOSING_PARENS = "))\\]\uFF09\uFF3D" + OPENING_PARENS = "\\(\\[\uFF08\uFF3B" + CLOSING_PARENS = "\\)\\]\uFF09\uFF3D" NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" From 3bebb51cd0b89502e73fe2eea61d46d6ff832b56 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 10:32:34 +0100 Subject: [PATCH 07/30] Fix endless loop --- phonenumbermatcher.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 7b90d62..784c695 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -29,8 +29,8 @@ const ( ) var ( - OPENING_PARENS = "\\(\\[\uFF08\uFF3B" - CLOSING_PARENS = "\\)\\]\uFF09\uFF3D" + OPENING_PARENS = `\(\[\\uFF08\\uFF3B` + CLOSING_PARENS = `\\)\\]\uFF09\uFF3D` NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" @@ -193,13 +193,15 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph } func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { - matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:len(p.text)]) + matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) + index := 0 for { if p.maxTries > 0 && matcher == nil { break } - start := matcher[0] - candidate := p.text[start:matcher[1]] + start := index + matcher[0] + end := index + matcher[1] + candidate := p.text[start:end] // Check for extra numbers at the end. // TODO: This is the place to start when trying to support extraction of multiple phone number @@ -211,7 +213,8 @@ func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { return match } - matcher = PATTERN.FindStringIndex(p.text[start+len(candidate) : len(p.text)]) + index = start + len(candidate) + matcher = PATTERN.FindStringIndex(p.text[index:]) p.maxTries-- } return nil From f6fd65dd91cc73d49ebbee1b99dee41c3dff2635 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 10:35:07 +0100 Subject: [PATCH 08/30] Fix wrong regex --- phonenumbermatcher.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 784c695..ef4130f 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -29,8 +29,8 @@ const ( ) var ( - OPENING_PARENS = `\(\[\\uFF08\\uFF3B` - CLOSING_PARENS = `\\)\\]\uFF09\uFF3D` + OPENING_PARENS = "\\(\\[\uFF08\uFF3B" + CLOSING_PARENS = "\\)\\]\uFF09\uFF3D" NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" From b6a7ec4a2b3a0336851ecaf7b7e6d8e8c0e5b6d2 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 10:42:31 +0100 Subject: [PATCH 09/30] Fix new endless loop --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index ef4130f..384644e 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -194,7 +194,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) - index := 0 + index := 0 + p.searchIndex for { if p.maxTries > 0 && matcher == nil { break From 44ad373d68654050e1d8f0500bfb2d285fea9395 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 10:46:10 +0100 Subject: [PATCH 10/30] Export Number --- phonenumbermatcher.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 384644e..7e0d5c8 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -254,7 +254,7 @@ A match consists of the phone number (in .number) as well as the .start and .end type PhoneNumberMatch struct { start, end int rawString string - number PhoneNumber + Number PhoneNumber } func NewPhoneNumberMatch(start int, rawString string, number PhoneNumber) PhoneNumberMatch { @@ -262,6 +262,6 @@ func NewPhoneNumberMatch(start int, rawString string, number PhoneNumber) PhoneN start: start, end: start + len(rawString), rawString: rawString, - number: number, + Number: number, } } From b8e3569c5c441a6894a0f87a4a1d923c21c07b41 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 13:42:34 +0100 Subject: [PATCH 11/30] Remove rune conversion --- phonenumbermatcher.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 7e0d5c8..1f5c510 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -118,15 +118,15 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon if p.leniency >= VALID { if offset > 0 && LEAD_PATTERN.FindStringIndex(candidate) == nil { - previousChar := []rune(p.text)[offset-1] - if p.isInvalidPunctuationSymbol(previousChar) || unicode.IsLetter(previousChar) { + previousChar := p.text[offset-1] + if p.isInvalidPunctuationSymbol(rune(previousChar)) || unicode.IsLetter(rune(previousChar)) { return nil, nil } } lastCharIndex := offset + len(candidate) if lastCharIndex < len(p.text) { - nextChar := []rune(p.text)[lastCharIndex] - if p.isInvalidPunctuationSymbol(nextChar) || unicode.IsLetter(nextChar) { + nextChar := p.text[lastCharIndex] + if p.isInvalidPunctuationSymbol(rune(nextChar)) || unicode.IsLetter(rune(nextChar)) { return nil, nil } } From 67b632ffa3c64ae0eea8d05ec5433eadfaabe908 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 13:49:25 +0100 Subject: [PATCH 12/30] Fix nil value check --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 1f5c510..a52c3ab 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -196,7 +196,7 @@ func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) index := 0 + p.searchIndex for { - if p.maxTries > 0 && matcher == nil { + if p.maxTries > 0 || matcher == nil { break } start := index + matcher[0] From 877d5d5e734944c267559158b2714453595d548d Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 13:54:33 +0100 Subject: [PATCH 13/30] Fix conditional logic --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index a52c3ab..dd4dfff 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -196,7 +196,7 @@ func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) index := 0 + p.searchIndex for { - if p.maxTries > 0 || matcher == nil { + if p.maxTries == 0 || matcher == nil { break } start := index + matcher[0] From 8516f175c7cd4e818fdbfb38ac9d4c3dc0a039dc Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 14:07:44 +0100 Subject: [PATCH 14/30] Fix infinite loop --- phonenumbermatcher.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index dd4dfff..8b0b374 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -168,7 +168,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph groupMatch := possibleInnerMatch.FindStringIndex(candidate) isFirstMatch := true for { - if groupMatch == nil || p.maxTries == 0 { + if p.maxTries <= 0 || groupMatch == nil { break } if isFirstMatch { @@ -196,7 +196,7 @@ func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) index := 0 + p.searchIndex for { - if p.maxTries == 0 || matcher == nil { + if p.maxTries <= 0 || matcher == nil { break } start := index + matcher[0] From 53888a6a13ad9396840834addecf81adcd7d0f88 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 15:30:28 +0100 Subject: [PATCH 15/30] Ignore invalid utf-8 chars --- phonenumbermatcher.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 8b0b374..6c7d9f8 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "unicode" + "unicode/utf8" ) /* @@ -119,6 +120,12 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon if p.leniency >= VALID { if offset > 0 && LEAD_PATTERN.FindStringIndex(candidate) == nil { previousChar := p.text[offset-1] + for i := 0; i < 4; i++ { + if utf8.Valid([]byte{previousChar}) { + break + } + previousChar = p.text[offset+i] + } if p.isInvalidPunctuationSymbol(rune(previousChar)) || unicode.IsLetter(rune(previousChar)) { return nil, nil } @@ -126,6 +133,12 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon lastCharIndex := offset + len(candidate) if lastCharIndex < len(p.text) { nextChar := p.text[lastCharIndex] + for i := 1; i < 5; i++ { + if utf8.Valid([]byte{nextChar}) { + break + } + nextChar = p.text[lastCharIndex-i] + } if p.isInvalidPunctuationSymbol(rune(nextChar)) || unicode.IsLetter(rune(nextChar)) { return nil, nil } From 3a248d7f8c157b237b892d91547b7ad138874e4e Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 16:51:32 +0100 Subject: [PATCH 16/30] Fix unintended infinite loop at extractInnerMatch --- phonenumbermatcher.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 6c7d9f8..226cc13 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -180,6 +180,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph for _, possibleInnerMatch := range INNER_MATCHES { groupMatch := possibleInnerMatch.FindStringIndex(candidate) isFirstMatch := true + index := 0 + offset for { if p.maxTries <= 0 || groupMatch == nil { break @@ -193,13 +194,18 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph p.maxTries-- isFirstMatch = false } - group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, candidate[groupMatch[0]:groupMatch[1]]) - match, _ := p.parseAndVerify(group, offset+groupMatch[0]) + start := index + groupMatch[0] + end := index + groupMatch[1] + innerCandidate := candidate[start:end] + group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, innerCandidate) + match, _ := p.parseAndVerify(group, offset+start) if match != nil { return match } + + index = start + len(innerCandidate) + groupMatch = possibleInnerMatch.FindStringIndex(candidate[index:]) p.maxTries-- - groupMatch = possibleInnerMatch.FindStringIndex(candidate[groupMatch[1]:]) } } return nil From 213aabadeac9846d203dfad4a82d4827151a548a Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 16:54:21 +0100 Subject: [PATCH 17/30] Fix unintended infinite loop at extractInnerMatch --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 226cc13..b1ed1da 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -180,7 +180,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph for _, possibleInnerMatch := range INNER_MATCHES { groupMatch := possibleInnerMatch.FindStringIndex(candidate) isFirstMatch := true - index := 0 + offset + index := 0 for { if p.maxTries <= 0 || groupMatch == nil { break From 6c13f2b3b36cbd447420ddd7abba5534e08dae35 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 17:15:24 +0100 Subject: [PATCH 18/30] Escape plus sign --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index b1ed1da..77f93e4 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -35,7 +35,7 @@ var ( NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" - LEAD_CLASS = OPENING_PARENS + PLUS_CHARS + LEAD_CLASS = OPENING_PARENS + "\\" + PLUS_CHARS LEAD_PATTERN = regexp.MustCompile(LEAD_CLASS) LEAD_LIMIT = "{0,2}" From 9e8f8e240dc2685af8c3c91c2b813d3a1b0550d4 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 17:32:43 +0100 Subject: [PATCH 19/30] Get last char, instead of next char after candidate --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 77f93e4..71357f4 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -130,7 +130,7 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon return nil, nil } } - lastCharIndex := offset + len(candidate) + lastCharIndex := offset + len(candidate) - 1 if lastCharIndex < len(p.text) { nextChar := p.text[lastCharIndex] for i := 1; i < 5; i++ { From 2a3bf650dd2ad2471cd295f25265c51eee1ab4ed Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 17:59:17 +0100 Subject: [PATCH 20/30] Catch + before numbers --- phonenumbermatcher.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 71357f4..47f6b81 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -35,7 +35,7 @@ var ( NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" BRACKET_PAIR_LIMIT = "{0,3}" - LEAD_CLASS = OPENING_PARENS + "\\" + PLUS_CHARS + LEAD_CLASS = OPENING_PARENS + PLUS_CHARS LEAD_PATTERN = regexp.MustCompile(LEAD_CLASS) LEAD_LIMIT = "{0,2}" @@ -47,12 +47,12 @@ var ( BLOCK_LIMIT = "{0," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" - PATTERN = regexp.MustCompile("(?:" + LEAD_CLASS + PUNCTUATION + ")" + LEAD_LIMIT + DIGIT_SEQUENCE + "(?:" + PUNCTUATION + DIGIT_SEQUENCE + ")" + BLOCK_LIMIT + "(?:" + EXTN_PATTERNS_FOR_MATCHING + ")?") + PATTERN = regexp.MustCompile("(?i)(?:\\+){0,1}(?:" + LEAD_CLASS + PUNCTUATION + ")" + LEAD_LIMIT + DIGIT_SEQUENCE + "(?:" + PUNCTUATION + DIGIT_SEQUENCE + ")" + BLOCK_LIMIT + "(?:" + EXTN_PATTERNS_FOR_MATCHING + ")?") SLASH_SEPARATED_DATES = regexp.MustCompile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}") TIME_STAMPS = regexp.MustCompile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$") - MATCHING_BRACKETS = regexp.MustCompile("(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") + MATCHING_BRACKETS = regexp.MustCompile("(?:([" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") PUB_PAGES = regexp.MustCompile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}") From 3989f2d26b0c3ced9d2dac02db7a89e503102d2a Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Tue, 23 Mar 2021 18:01:07 +0100 Subject: [PATCH 21/30] Typo --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 47f6b81..147cc05 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -52,7 +52,7 @@ var ( SLASH_SEPARATED_DATES = regexp.MustCompile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}") TIME_STAMPS = regexp.MustCompile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$") - MATCHING_BRACKETS = regexp.MustCompile("(?:([" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") + MATCHING_BRACKETS = regexp.MustCompile("(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") PUB_PAGES = regexp.MustCompile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}") From 0a638d745bf59bc84778787cb650853dd5e32a3c Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 09:15:40 +0100 Subject: [PATCH 22/30] Fixed logic error --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 147cc05..30b4191 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -130,7 +130,7 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon return nil, nil } } - lastCharIndex := offset + len(candidate) - 1 + lastCharIndex := offset + len(candidate) if lastCharIndex < len(p.text) { nextChar := p.text[lastCharIndex] for i := 1; i < 5; i++ { From 89d8133e1642fd42cce97473f6709027590efa93 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 12:42:31 +0100 Subject: [PATCH 23/30] Fix wrong offset --- phonenumbermatcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 30b4191..67a1ffb 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -187,7 +187,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph } if isFirstMatch { group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, candidate[:groupMatch[0]]) - match, _ := p.parseAndVerify(group, offset+groupMatch[0]) + match, _ := p.parseAndVerify(group, offset) if match != nil { return match } From a8bd4e6f0a0589a1200f9f23d053d3b2cb40cd5b Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 15:36:46 +0100 Subject: [PATCH 24/30] Remove valid rune checking --- phonenumbermatcher.go | 169 ++++++++++++++++++++++++++++++------------ 1 file changed, 120 insertions(+), 49 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 67a1ffb..289c4f4 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -5,14 +5,11 @@ import ( "regexp" "strconv" "unicode" - "unicode/utf8" ) -/* -A stateful class that finds and extracts telephone numbers fom text. - -Vanity numbers (phone numbers using alphabetic digits such as '1-800-SIX-FLAGS' are not found. -*/ +// A stateful class that finds and extracts telephone numbers fom text. +// +// Vanity numbers (phone numbers using alphabetic digits such as '1-800-SIX-FLAGS' are not found. type PhoneNumberMatcher struct { text string preferredRegion string @@ -30,42 +27,51 @@ const ( ) var ( - OPENING_PARENS = "\\(\\[\uFF08\uFF3B" - CLOSING_PARENS = "\\)\\]\uFF09\uFF3D" - NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" - BRACKET_PAIR_LIMIT = "{0,3}" - - LEAD_CLASS = OPENING_PARENS + PLUS_CHARS - LEAD_PATTERN = regexp.MustCompile(LEAD_CLASS) - LEAD_LIMIT = "{0,2}" - - DIGIT_BLOCK_LIMIT = 17 + 3 - DIGIT_SEQUENCE = "\\d{1," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" - - PUNCTIATION_LIMIT = "{0,4}" - PUNCTUATION = "[" + VALID_PUNCTUATION + "]" + PUNCTIATION_LIMIT - - BLOCK_LIMIT = "{0," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" - + // The phone number pattern used by {@link #find}, similar to + // {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: + //
    + //
  • All captures are limited in order to place an upper bound to the text matched by the + // pattern. + //
      + //
    • Leading punctuation / plus signs are limited. + //
    • Consecutive occurrences of punctuation are limited. + //
    • Number of digits is limited. + //
    + //
  • No whitespace is allowed at the start or end. + //
  • No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. + //
PATTERN = regexp.MustCompile("(?i)(?:\\+){0,1}(?:" + LEAD_CLASS + PUNCTUATION + ")" + LEAD_LIMIT + DIGIT_SEQUENCE + "(?:" + PUNCTUATION + DIGIT_SEQUENCE + ")" + BLOCK_LIMIT + "(?:" + EXTN_PATTERNS_FOR_MATCHING + ")?") + // Matches strings that look like publication pages. Example: + //
Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
+	//  Chen Li. VLDB J. 12(3): 211-227 (2003).
+ // + // The string "211-227 (2003)" is not a telephone number. + PUB_PAGES = regexp.MustCompile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}") + + // Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 08/31/95. SLASH_SEPARATED_DATES = regexp.MustCompile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}") - TIME_STAMPS = regexp.MustCompile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$") - MATCHING_BRACKETS = regexp.MustCompile("(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") + // Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. + TIME_STAMPS = regexp.MustCompile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$") + TIME_STAMPS_SUFFIX = regexp.MustCompile(":[0-5]\\d") - PUB_PAGES = regexp.MustCompile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}") + // Pattern to check that brackets match. Opening brackets should be closed within a phone number. + // This also checks that there is something inside the brackets. Having no brackets at all is also + // fine. + // An opening bracket at the beginning may not be closed, but subsequent ones should be. It's + // also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a + // closing bracket first. We limit the sets of brackets in a phone number to four. + MATCHING_BRACKETS = regexp.MustCompile("(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*") - /** - * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are - * ordered according to specificity. For example, white-space is last since that is frequently - * used in numbers, not just to separate two numbers. We have separate patterns since we don't - * want to break up the phone-number-like text on more than one different kind of symbol at one - * time, although symbols of the same type (e.g. space) can be safely grouped together. - * - * Note that if there is a match, we will always check any text found up to the first match as - * well. - */ + // Patterns used to extract phone numbers from a larger phone-number-like pattern. These are + // ordered according to specificity. For example, white-space is last since that is frequently + // used in numbers, not just to separate two numbers. We have separate patterns since we don't + // want to break up the phone-number-like text on more than one different kind of symbol at one + // time, although symbols of the same type (e.g. space) can be safely grouped together. + // + // Note that if there is a match, we will always check any text found up to the first match as + // well. INNER_MATCHES = []*regexp.Regexp{ // Breaks on the slash - e.g. "651-234-2345/332-445-1234" regexp.MustCompile("/+(.*)"), @@ -84,8 +90,49 @@ var ( // Breaks on space - e.g. "3324451234 8002341234" regexp.MustCompile("\\p{Z}+(\\P{Z}+)"), } + + // Punctuation that may be at the start of a phone number - brackets and plus signs. + LEAD_CLASS = OPENING_PARENS + PLUS_CHARS + LEAD_PATTERN = regexp.MustCompile(LEAD_CLASS) + + // Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist to make the pattern more easily understood. + OPENING_PARENS = "\\(\\[\uFF08\uFF3B" + CLOSING_PARENS = "\\)\\]\uFF09\uFF3D" + NON_PARENS = "[^" + OPENING_PARENS + CLOSING_PARENS + "]" + + // Limit on the number of pairs of brackets in a phone number. + BRACKET_PAIR_LIMIT = "{0,3}" + + // Limit on the number of leading (plus) characters. + LEAD_LIMIT = "{0,2}" + + // Limit on the number of consecutive punctuation characters. + PUNCTIATION_LIMIT = "{0,4}" + + // The maximum number of digits allowed in a digit-separated block. As we allow all digits in a + //single block, set high enough to accommodate the entire national number and the international + //country code. + DIGIT_BLOCK_LIMIT = 17 + 3 + + // Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some + // formats use spaces to separate each digit. + BLOCK_LIMIT = "{0," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" + + // A punctuation sequence allowing white space. + PUNCTUATION = "[" + VALID_PUNCTUATION + "]" + PUNCTIATION_LIMIT + + // A digits block without punctuation. + DIGIT_SEQUENCE = "\\d{1," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" ) +// Creates a new instance. +// +// Arguments: +// text -- The character sequence that we will search +// country -- The country to assume for phone numbers not written in +// international format (with a leading plus, or with the +// international dialing prefix of the specified region). May be +// "ZZ" if only numbers with a leading plus should be considered. func NewPhoneNumberMatcher(text string, region string) PhoneNumberMatcher { m := PhoneNumberMatcher{ text: text, @@ -100,6 +147,8 @@ func NewPhoneNumberMatcher(text string, region string) PhoneNumberMatcher { return m } +// Trims away any characters after the first match of pattern in +// candidate, returning the trimmed version. func (*PhoneNumberMatcher) trimAfterFirstMatch(pattern *regexp.Regexp, candidate string) string { trailingCharsMatch := pattern.FindStringIndex(candidate) if trailingCharsMatch != nil { @@ -112,20 +161,39 @@ func (*PhoneNumberMatcher) isInvalidPunctuationSymbol(char rune) bool { return char == '%' || unicode.In(char, unicode.Sc) } +// Parses a phone number from the candidate using phonenumberutil.parse and +// verifies it matches the requested leniency. If parsing and verification succeed, a +// corresponding PhoneNumberMatch is returned, otherwise this method returns None. +// +// Arguments: +// +// candidate -- The candidate match. +// +// offset -- The offset of candidate within self.text. +// +// Returns the parsed and validated phone number match, or None. func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*PhoneNumberMatch, error) { + // Check the candidate doesn't contain any formatting which would + // indicate that it really isn't a phone number. if MATCHING_BRACKETS.FindStringIndex(candidate) == nil || PUB_PAGES.FindStringIndex(candidate) != nil { return nil, nil } + // If leniency is set to VALID or stricter, we also want to skip + // numbers that are surrounded by Latin alphabetic characters, to + // skip cases like abc8005001234 or 8005001234def. if p.leniency >= VALID { if offset > 0 && LEAD_PATTERN.FindStringIndex(candidate) == nil { + // If the candidate is not at the start of the text, and does + // not start with phone-number punctuation, check the previous + // character previousChar := p.text[offset-1] - for i := 0; i < 4; i++ { - if utf8.Valid([]byte{previousChar}) { - break - } - previousChar = p.text[offset+i] - } + //for i := 0; i < 4; i++ { + // if utf8.Valid([]byte{previousChar}) { + // break + // } + // previousChar = p.text[offset+i] + //} if p.isInvalidPunctuationSymbol(rune(previousChar)) || unicode.IsLetter(rune(previousChar)) { return nil, nil } @@ -133,12 +201,12 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon lastCharIndex := offset + len(candidate) if lastCharIndex < len(p.text) { nextChar := p.text[lastCharIndex] - for i := 1; i < 5; i++ { - if utf8.Valid([]byte{nextChar}) { - break - } - nextChar = p.text[lastCharIndex-i] - } + //for i := 1; i < 5; i++ { + // if utf8.Valid([]byte{nextChar}) { + // break + // } + // nextChar = p.text[lastCharIndex-i] + //} if p.isInvalidPunctuationSymbol(rune(nextChar)) || unicode.IsLetter(rune(nextChar)) { return nil, nil } @@ -165,7 +233,10 @@ func (p *PhoneNumberMatcher) extractMatch(candidate string, offset int) *PhoneNu } if TIME_STAMPS.FindStringIndex(candidate) != nil { - return nil + followingText := p.text[offset+len(candidate):] + if TIME_STAMPS_SUFFIX.FindStringIndex(followingText) != nil { + return nil + } } match, _ := p.parseAndVerify(candidate, offset) From c11d83c36ae3681bbdea2009ea342cadc73cfe71 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 15:47:19 +0100 Subject: [PATCH 25/30] Added comments --- phonenumbermatcher.go | 55 +++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 289c4f4..1fc81d0 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -188,12 +188,8 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon // not start with phone-number punctuation, check the previous // character previousChar := p.text[offset-1] - //for i := 0; i < 4; i++ { - // if utf8.Valid([]byte{previousChar}) { - // break - // } - // previousChar = p.text[offset+i] - //} + // We return nil if it is a latin letter or an invalid + // punctuation symbol if p.isInvalidPunctuationSymbol(rune(previousChar)) || unicode.IsLetter(rune(previousChar)) { return nil, nil } @@ -201,12 +197,6 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon lastCharIndex := offset + len(candidate) if lastCharIndex < len(p.text) { nextChar := p.text[lastCharIndex] - //for i := 1; i < 5; i++ { - // if utf8.Valid([]byte{nextChar}) { - // break - // } - // nextChar = p.text[lastCharIndex-i] - //} if p.isInvalidPunctuationSymbol(rune(nextChar)) || unicode.IsLetter(rune(nextChar)) { return nil, nil } @@ -219,6 +209,11 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon } if p.leniency.Verify(number, candidate) { + // We used parse(keep_raw_input=True) to create this number, + // but for now we don't return the extra values parsed. + // TODO: stop clearing all values here and switch all users + // over to using raw_input rather than the raw_string of + // PhoneNumberMatch. match := NewPhoneNumberMatch(offset, candidate, *number) return &match, nil @@ -227,11 +222,23 @@ func (p *PhoneNumberMatcher) parseAndVerify(candidate string, offset int) (*Phon return nil, nil } +// Attempts to extract a match from a candidate string. +// +// Arguments: +// +// candidate -- The candidate text that might contain a phone number. +// +// offset -- The offset of candidate within self.text +// +// Returns the match found, None if none can be found func (p *PhoneNumberMatcher) extractMatch(candidate string, offset int) *PhoneNumberMatch { + // Skip a match that is more likely a publication page reference or a + // date. if SLASH_SEPARATED_DATES.FindStringIndex(candidate) != nil { return nil } + // Skip potential time-stamps. if TIME_STAMPS.FindStringIndex(candidate) != nil { followingText := p.text[offset+len(candidate):] if TIME_STAMPS_SUFFIX.FindStringIndex(followingText) != nil { @@ -239,14 +246,27 @@ func (p *PhoneNumberMatcher) extractMatch(candidate string, offset int) *PhoneNu } } + // Try to come up with a valid match given the entire candidate. match, _ := p.parseAndVerify(candidate, offset) if match != nil { return match } + // If that failed, try to find an "inner match" -- there might be a + // phone number within this candidate. return p.extractInnerMatch(candidate, offset) } +// Attempts to extract a match from candidate if the whole candidate +// does not qualify as a match. +// +// Arguments: +// +// candidate -- The candidate text that might contain a phone number +// +// offset -- The current offset of candidate within text +// +// Returns the match found, None if none can be found func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *PhoneNumberMatch { for _, possibleInnerMatch := range INNER_MATCHES { groupMatch := possibleInnerMatch.FindStringIndex(candidate) @@ -257,6 +277,7 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph break } if isFirstMatch { + // We should handle any group before this one too. group := p.trimAfterFirstMatch(UNWANTED_END_CHAR_PATTERN, candidate[:groupMatch[0]]) match, _ := p.parseAndVerify(group, offset) if match != nil { @@ -282,6 +303,14 @@ func (p *PhoneNumberMatcher) extractInnerMatch(candidate string, offset int) *Ph return nil } +// Attempts to find the next subsequence in the searched sequence on or after index +// that represents a phone number. Returns the next match, None if none was found. +// +// Arguments: +// +// index -- The search index to start searching at. +// +// Returns the phone number match found, None if none can be found. func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { matcher := PATTERN.FindStringIndex(p.text[p.searchIndex:]) index := 0 + p.searchIndex @@ -310,6 +339,7 @@ func (p *PhoneNumberMatcher) find() *PhoneNumberMatch { return nil } +// Indicates whether there is another match available func (p *PhoneNumberMatcher) hasNext() bool { if p.state == notReady { p.lastMatch = p.find() @@ -323,6 +353,7 @@ func (p *PhoneNumberMatcher) hasNext() bool { return p.state == ready } +// Return the next match; raises Exception if no next match available func (p *PhoneNumberMatcher) Next() (*PhoneNumberMatch, error) { if !p.hasNext() { return nil, io.EOF From a46b3749bf04d3108cffd33ab625c183e03a091b Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 15:52:19 +0100 Subject: [PATCH 26/30] Reset unused files --- cmd/phoneserver/go.mod | 2 +- cmd/phoneserver/main.go | 111 +++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/cmd/phoneserver/go.mod b/cmd/phoneserver/go.mod index fc7907f..c3fa7cb 100644 --- a/cmd/phoneserver/go.mod +++ b/cmd/phoneserver/go.mod @@ -7,4 +7,4 @@ replace github.com/nyaruka/phonenumbers => ../../ require ( github.com/aws/aws-lambda-go v1.13.1 github.com/nyaruka/phonenumbers v0.0.0-00010101000000-000000000000 -) +) \ No newline at end of file diff --git a/cmd/phoneserver/main.go b/cmd/phoneserver/main.go index d057f36..2af237e 100644 --- a/cmd/phoneserver/main.go +++ b/cmd/phoneserver/main.go @@ -1,16 +1,13 @@ package main -//import ( -// "encoding/json" -// "net/http" -// -// "github.com/nyaruka/phonenumbers" -//)import ( -// "encoding/json" -// "net/http" -// -// "github.com/nyaruka/phonenumbers" -//) +import ( + "encoding/json" + "net/http" + + "github.com/aws/aws-lambda-go/events" + "github.com/aws/aws-lambda-go/lambda" + "github.com/nyaruka/phonenumbers" +) var Version = "dev" @@ -29,49 +26,49 @@ type successResponse struct { Version string `json:"version"` } -//func writeResponse(status int, body interface{}) (events.APIGatewayProxyResponse, error) { -// js, err := json.MarshalIndent(body, "", " ") -// if err != nil { -// return events.APIGatewayProxyResponse{ -// StatusCode: 500, -// Body: err.Error(), -// }, nil -// } -// -// return events.APIGatewayProxyResponse{ -// StatusCode: 200, -// Body: string(js), -// Headers: map[string]string{"Content-Type": "application/json"}, -// }, nil -//} -// -//func parse(request events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { -// phone := request.QueryStringParameters["phone"] -// -// // required phone number -// if phone == "" { -// return writeResponse(http.StatusBadRequest, errorResponse{"missing body", "missing 'phone' parameter"}) -// } -// -// // optional country code -// country := request.QueryStringParameters["country"] -// -// metadata, err := phonenumbers.Parse(phone, country) -// if err != nil { -// return writeResponse(http.StatusBadRequest, errorResponse{"error parsing phone", err.Error()}) -// } -// -// return writeResponse(http.StatusOK, successResponse{ -// NationalNumber: *metadata.NationalNumber, -// CountryCode: *metadata.CountryCode, -// IsPossible: phonenumbers.IsPossibleNumber(metadata), -// IsValid: phonenumbers.IsValidNumber(metadata), -// NationalFormatted: phonenumbers.Format(metadata, phonenumbers.NATIONAL), -// InternationalFormatted: phonenumbers.Format(metadata, phonenumbers.INTERNATIONAL), -// Version: Version, -// }) -//} -// -//func main() { -// lambda.Start(parse) -//} +func writeResponse(status int, body interface{}) (events.APIGatewayProxyResponse, error) { + js, err := json.MarshalIndent(body, "", " ") + if err != nil { + return events.APIGatewayProxyResponse{ + StatusCode: 500, + Body: err.Error(), + }, nil + } + + return events.APIGatewayProxyResponse{ + StatusCode: 200, + Body: string(js), + Headers: map[string]string{"Content-Type": "application/json"}, + }, nil +} + +func parse(request events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { + phone := request.QueryStringParameters["phone"] + + // required phone number + if phone == "" { + return writeResponse(http.StatusBadRequest, errorResponse{"missing body", "missing 'phone' parameter"}) + } + + // optional country code + country := request.QueryStringParameters["country"] + + metadata, err := phonenumbers.Parse(phone, country) + if err != nil { + return writeResponse(http.StatusBadRequest, errorResponse{"error parsing phone", err.Error()}) + } + + return writeResponse(http.StatusOK, successResponse{ + NationalNumber: *metadata.NationalNumber, + CountryCode: *metadata.CountryCode, + IsPossible: phonenumbers.IsPossibleNumber(metadata), + IsValid: phonenumbers.IsValidNumber(metadata), + NationalFormatted: phonenumbers.Format(metadata, phonenumbers.NATIONAL), + InternationalFormatted: phonenumbers.Format(metadata, phonenumbers.INTERNATIONAL), + Version: Version, + }) +} + +func main() { + lambda.Start(parse) +} From 1832dc624d090ca036752bed747d07960d62fd96 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 15:54:54 +0100 Subject: [PATCH 27/30] Set mod to nyaruka --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 24c16e7..fad8daa 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/mauritsderuiter95/phonenumbers +module github.com/nyaruka/phonenumbers require ( github.com/golang/protobuf v1.3.2 From 30bc7151ca7feaf7c1287026d9fd5f7690caaabf Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Thu, 25 Mar 2021 15:56:25 +0100 Subject: [PATCH 28/30] Set go version to 1.13 --- go.mod | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index fad8daa..b3c5356 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,5 @@ module github.com/nyaruka/phonenumbers -require ( - github.com/golang/protobuf v1.3.2 -) +require github.com/golang/protobuf v1.3.2 -go 1.16 +go 1.13 From 7592927b95b9be1b4b7bac2c7c2e76d6fe5ec759 Mon Sep 17 00:00:00 2001 From: mauritsderuiter Date: Fri, 26 Mar 2021 13:23:02 +0100 Subject: [PATCH 29/30] Ran go mod tidy, reset cmd/phonnumbers/go.mod --- cmd/phoneserver/go.mod | 2 +- go.sum | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/cmd/phoneserver/go.mod b/cmd/phoneserver/go.mod index c3fa7cb..fc7907f 100644 --- a/cmd/phoneserver/go.mod +++ b/cmd/phoneserver/go.mod @@ -7,4 +7,4 @@ replace github.com/nyaruka/phonenumbers => ../../ require ( github.com/aws/aws-lambda-go v1.13.1 github.com/nyaruka/phonenumbers v0.0.0-00010101000000-000000000000 -) \ No newline at end of file +) diff --git a/go.sum b/go.sum index 83d6e04..9a46bf9 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,2 @@ -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/aws/aws-lambda-go v1.23.0 h1:Vjwow5COkFJp7GePkk9kjAo/DyX36b7wVPKwseQZbRo= -github.com/aws/aws-lambda-go v1.23.0/go.mod h1:jJmlefzPfGnckuHdXX7/80O3BvUUi12XOkbv4w9SGLU= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 0e9208758775143175f8c1d9cee3ad7b47487d49 Mon Sep 17 00:00:00 2001 From: Sami Kammoun Date: Sat, 27 Jul 2024 10:57:07 +0100 Subject: [PATCH 30/30] allow gradual non-breaking releases --- phonenumbermatcher.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/phonenumbermatcher.go b/phonenumbermatcher.go index 1fc81d0..086f46a 100644 --- a/phonenumbermatcher.go +++ b/phonenumbermatcher.go @@ -125,6 +125,11 @@ var ( DIGIT_SEQUENCE = "\\d{1," + strconv.Itoa(DIGIT_BLOCK_LIMIT) + "}" ) +func NewPhoneNumberMatcher(seq string) *PhoneNumberMatcher { + // TODO: Add region parameter and implement in the next major release (2.0) + return nil +} + // Creates a new instance. // // Arguments: @@ -133,7 +138,8 @@ var ( // international format (with a leading plus, or with the // international dialing prefix of the specified region). May be // "ZZ" if only numbers with a leading plus should be considered. -func NewPhoneNumberMatcher(text string, region string) PhoneNumberMatcher { +func NewPhoneNumberMatcherForRegion(text string, region string) PhoneNumberMatcher { + // TODO: Deprecate in the next major release (2.0) and remove in the following major release (3.0). m := PhoneNumberMatcher{ text: text, preferredRegion: region,