• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "phonenumbers/phonenumbermatcher.h"
16 
17 #include <string>
18 #include <vector>
19 
20 #include <gtest/gtest.h>
21 #include <unicode/unistr.h>
22 
23 #include "phonenumbers/base/basictypes.h"
24 #include "phonenumbers/base/memory/scoped_ptr.h"
25 #include "phonenumbers/base/memory/singleton.h"
26 #include "phonenumbers/default_logger.h"
27 #include "phonenumbers/phonenumber.h"
28 #include "phonenumbers/phonenumber.pb.h"
29 #include "phonenumbers/phonenumbermatch.h"
30 #include "phonenumbers/phonenumberutil.h"
31 #include "phonenumbers/stringutil.h"
32 #include "phonenumbers/test_util.h"
33 
34 namespace i18n {
35 namespace phonenumbers {
36 
37 using std::string;
38 using icu::UnicodeString;
39 
40 namespace {
41 // Small class that holds the context of the number we are testing against. The
42 // test will insert the phone number to be found between leading_text and
43 // trailing_text.
44 struct NumberContext {
45   string leading_text_;
46   string trailing_text_;
NumberContexti18n::phonenumbers::__anon197a462c0111::NumberContext47   NumberContext(const string& leading_text, const string& trailing_text)
48     : leading_text_(leading_text),
49       trailing_text_(trailing_text) {
50   }
51 };
52 
53 // Small class that holds the number we want to test and the region for which it
54 // should be valid.
55 struct NumberTest {
56   string raw_string_;
57   string region_;
58 
ToStringi18n::phonenumbers::__anon197a462c0111::NumberTest59   string ToString() const {
60     return StrCat(raw_string_, " (", region_, ")");
61   }
62 
NumberTesti18n::phonenumbers::__anon197a462c0111::NumberTest63   NumberTest(const string& raw_string, const string& region)
64       : raw_string_(raw_string),
65         region_(region) {
66   }
67 };
68 }  // namespace
69 
70 class PhoneNumberMatcherTest : public testing::Test {
71  protected:
PhoneNumberMatcherTest()72   PhoneNumberMatcherTest()
73       : phone_util_(*PhoneNumberUtil::GetInstance()),
74         matcher_(phone_util_, "",
75                  RegionCode::US(),
76                  PhoneNumberMatcher::VALID, 5),
77         offset_(0) {
78     PhoneNumberUtil::GetInstance()->SetLogger(new StdoutLogger());
79   }
80 
IsLatinLetter(char32 letter)81   bool IsLatinLetter(char32 letter) {
82     return PhoneNumberMatcher::IsLatinLetter(letter);
83   }
84 
ContainsMoreThanOneSlashInNationalNumber(const PhoneNumber & phone_number,const string & candidate)85   bool ContainsMoreThanOneSlashInNationalNumber(
86       const PhoneNumber& phone_number, const string& candidate) {
87     return PhoneNumberMatcher::ContainsMoreThanOneSlashInNationalNumber(
88         phone_number, candidate, phone_util_);
89   }
90 
ExtractMatch(const string & text,PhoneNumberMatch * match)91   bool ExtractMatch(const string& text, PhoneNumberMatch* match) {
92     return matcher_.ExtractMatch(text, offset_, match);
93   }
94 
GetMatcherWithLeniency(const string & text,const string & region,PhoneNumberMatcher::Leniency leniency) const95   PhoneNumberMatcher* GetMatcherWithLeniency(
96       const string& text, const string& region,
97       PhoneNumberMatcher::Leniency leniency) const {
98     return new PhoneNumberMatcher(phone_util_, text, region, leniency,
99                                   100 /* max_tries */);
100   }
101 
102   // Tests each number in the test cases provided is found in its entirety for
103   // the specified leniency level.
DoTestNumberMatchesForLeniency(const std::vector<NumberTest> & test_cases,PhoneNumberMatcher::Leniency leniency) const104   void DoTestNumberMatchesForLeniency(
105       const std::vector<NumberTest>& test_cases,
106       PhoneNumberMatcher::Leniency leniency) const {
107     scoped_ptr<PhoneNumberMatcher> matcher;
108     for (std::vector<NumberTest>::const_iterator test = test_cases.begin();
109          test != test_cases.end(); ++test) {
110       matcher.reset(GetMatcherWithLeniency(
111           test->raw_string_, test->region_, leniency));
112       EXPECT_TRUE(matcher->HasNext())
113           << "No match found in " << test->ToString()
114           << " for leniency: " << leniency;
115       if (matcher->HasNext()) {
116         PhoneNumberMatch match;
117         matcher->Next(&match);
118         EXPECT_EQ(test->raw_string_, match.raw_string())
119             << "Found wrong match in test " << test->ToString()
120             << ". Found " << match.raw_string();
121       }
122     }
123   }
124 
125   // Tests no number in the test cases provided is found for the specified
126   // leniency level.
DoTestNumberNonMatchesForLeniency(const std::vector<NumberTest> & test_cases,PhoneNumberMatcher::Leniency leniency) const127   void DoTestNumberNonMatchesForLeniency(
128       const std::vector<NumberTest>& test_cases,
129       PhoneNumberMatcher::Leniency leniency) const {
130     scoped_ptr<PhoneNumberMatcher> matcher;
131     for (std::vector<NumberTest>::const_iterator test = test_cases.begin();
132          test != test_cases.end(); ++test) {
133       matcher.reset(GetMatcherWithLeniency(
134           test->raw_string_, test->region_, leniency));
135       EXPECT_FALSE(matcher->HasNext()) << "Match found in " << test->ToString()
136                                        << " for leniency: " << leniency;
137     }
138   }
139 
140   // Asserts that the raw string and expected proto buffer for a match are set
141   // appropriately.
AssertMatchProperties(const PhoneNumberMatch & match,const string & text,const string & number,const string & region_code)142   void AssertMatchProperties(const PhoneNumberMatch& match, const string& text,
143                              const string& number, const string& region_code) {
144     PhoneNumber expected_result;
145     phone_util_.Parse(number, region_code, &expected_result);
146 
147     EXPECT_EQ(expected_result, match.number());
148     EXPECT_EQ(number, match.raw_string()) << " Wrong number found in " << text;
149   }
150 
151   // Asserts that another number can be found in "text" starting at "index", and
152   // that its corresponding range is [start, end).
AssertEqualRange(const string & text,int index,int start,int end)153   void AssertEqualRange(const string& text, int index, int start, int end) {
154     string sub = text.substr(index);
155     PhoneNumberMatcher matcher(phone_util_, sub, RegionCode::NZ(),
156                                PhoneNumberMatcher::POSSIBLE,
157                                1000000 /* max_tries */);
158     PhoneNumberMatch match;
159     ASSERT_TRUE(matcher.HasNext());
160     matcher.Next(&match);
161     EXPECT_EQ(start - index, match.start());
162     EXPECT_EQ(end - index, match.end());
163     EXPECT_EQ(sub.substr(match.start(), match.length()), match.raw_string());
164   }
165 
166   // Tests numbers found by the PhoneNumberMatcher in various textual contexts.
DoTestFindInContext(const string & number,const string & default_country)167   void DoTestFindInContext(const string& number,
168                            const string& default_country) {
169     FindPossibleInContext(number, default_country);
170 
171     PhoneNumber parsed;
172     phone_util_.Parse(number, default_country, &parsed);
173     if (phone_util_.IsValidNumber(parsed)) {
174       FindValidInContext(number, default_country);
175     }
176   }
177 
178   // Helper method which tests the contexts provided and ensures that:
179   // -- if is_valid is true, they all find a test number inserted in the middle
180   //   when leniency of matching is set to VALID; else no test number should be
181   //   extracted at that leniency level
182   // -- if is_possible is true, they all find a test number inserted in the
183   //   middle when leniency of matching is set to POSSIBLE; else no test number
184   //   should be extracted at that leniency level
FindMatchesInContexts(const std::vector<NumberContext> & contexts,bool is_valid,bool is_possible,const string & region,const string & number)185   void FindMatchesInContexts(const std::vector<NumberContext>& contexts,
186                              bool is_valid, bool is_possible,
187                              const string& region, const string& number) {
188     if (is_valid) {
189       DoTestInContext(number, region, contexts, PhoneNumberMatcher::VALID);
190     } else {
191       for (std::vector<NumberContext>::const_iterator it = contexts.begin();
192            it != contexts.end(); ++it) {
193         string text = StrCat(it->leading_text_, number, it->trailing_text_);
194         PhoneNumberMatcher matcher(text, region);
195         EXPECT_FALSE(matcher.HasNext());
196       }
197     }
198     if (is_possible) {
199       DoTestInContext(number, region, contexts, PhoneNumberMatcher::POSSIBLE);
200     } else {
201       for (std::vector<NumberContext>::const_iterator it = contexts.begin();
202            it != contexts.end(); ++it) {
203         string text = StrCat(it->leading_text_, number, it->trailing_text_);
204         PhoneNumberMatcher matcher(phone_util_, text, region,
205                                    PhoneNumberMatcher::POSSIBLE,
206                                    10000);  // Number of matches.
207         EXPECT_FALSE(matcher.HasNext());
208       }
209     }
210   }
211 
212   // Variant of FindMatchesInContexts that uses a default number and region.
FindMatchesInContexts(const std::vector<NumberContext> & contexts,bool is_valid,bool is_possible)213   void FindMatchesInContexts(const std::vector<NumberContext>& contexts,
214                              bool is_valid, bool is_possible) {
215     const string& region = RegionCode::US();
216     const string number("415-666-7777");
217 
218     FindMatchesInContexts(contexts, is_valid, is_possible, region, number);
219   }
220 
221   // Tests valid numbers in contexts that should pass for
222   // PhoneNumberMatcher::POSSIBLE.
FindPossibleInContext(const string & number,const string & default_country)223   void FindPossibleInContext(const string& number,
224                              const string& default_country) {
225     std::vector<NumberContext> context_pairs;
226     context_pairs.push_back(NumberContext("", ""));  // no context
227     context_pairs.push_back(NumberContext("   ", "\t"));  // whitespace only
228     context_pairs.push_back(NumberContext("Hello ", ""));  // no context at end
229     // No context at start.
230     context_pairs.push_back(NumberContext("", " to call me!"));
231     context_pairs.push_back(NumberContext("Hi there, call ", " to reach me!"));
232     // With commas.
233     context_pairs.push_back(NumberContext("Hi there, call ", ", or don't"));
234     // Three examples without whitespace around the number.
235     context_pairs.push_back(NumberContext("Hi call", ""));
236     context_pairs.push_back(NumberContext("", "forme"));
237     context_pairs.push_back(NumberContext("Hi call", "forme"));
238     // With other small numbers.
239     context_pairs.push_back(NumberContext("It's cheap! Call ", " before 6:30"));
240     // With a second number later.
241     context_pairs.push_back(NumberContext("Call ", " or +1800-123-4567!"));
242     // With a Month-Day date.
243     context_pairs.push_back(NumberContext("Call me on June 2 at", ""));
244     // With publication pages.
245     context_pairs.push_back(NumberContext(
246         "As quoted by Alfonso 12-15 (2009), you may call me at ", ""));
247     context_pairs.push_back(NumberContext(
248         "As quoted by Alfonso et al. 12-15 (2009), you may call me at ", ""));
249     // With dates, written in the American style.
250     context_pairs.push_back(NumberContext(
251         "As I said on 03/10/2011, you may call me at ", ""));
252     // With trailing numbers after a comma. The 45 should not be considered an
253     // extension.
254     context_pairs.push_back(NumberContext("", ", 45 days a year"));
255     // When matching we don't consider semicolon along with legitimate extension
256     // symbol to indicate an extension. The 7246433 should not be considered an
257     // extension.
258     context_pairs.push_back(NumberContext("", ";x 7246433"));
259     // With a postfix stripped off as it looks like the start of another number.
260     context_pairs.push_back(NumberContext("Call ", "/x12 more"));
261 
262     DoTestInContext(number, default_country, context_pairs,
263                     PhoneNumberMatcher::POSSIBLE);
264   }
265 
266   // Tests valid numbers in contexts that fail for PhoneNumberMatcher::POSSIBLE
267   // but are valid for PhoneNumberMatcher::VALID.
FindValidInContext(const string & number,const string & default_country)268   void FindValidInContext(const string& number, const string& default_country) {
269     std::vector<NumberContext> context_pairs;
270     // With other small numbers.
271     context_pairs.push_back(NumberContext("It's only 9.99! Call ", " to buy"));
272     // With a number Day.Month.Year date.
273     context_pairs.push_back(NumberContext("Call me on 21.6.1984 at ", ""));
274     // With a number Month/Day date.
275     context_pairs.push_back(NumberContext("Call me on 06/21 at ", ""));
276     // With a number Day.Month date.
277     context_pairs.push_back(NumberContext("Call me on 21.6. at ", ""));
278     // With a number Month/Day/Year date.
279     context_pairs.push_back(NumberContext("Call me on 06/21/84 at ", ""));
280 
281     DoTestInContext(number, default_country, context_pairs,
282                     PhoneNumberMatcher::VALID);
283   }
284 
DoTestInContext(const string & number,const string & default_country,const std::vector<NumberContext> & context_pairs,PhoneNumberMatcher::Leniency leniency)285   void DoTestInContext(const string& number, const string& default_country,
286                        const std::vector<NumberContext>& context_pairs,
287                        PhoneNumberMatcher::Leniency leniency) {
288     for (std::vector<NumberContext>::const_iterator it = context_pairs.begin();
289          it != context_pairs.end(); ++it) {
290       string prefix = it->leading_text_;
291       string text = StrCat(prefix, number, it->trailing_text_);
292 
293       int start = prefix.length();
294       int end = start + number.length();
295       PhoneNumberMatcher matcher(phone_util_, text, default_country, leniency,
296                                  1000000 /* max_tries */);
297       PhoneNumberMatch match;
298       ASSERT_TRUE(matcher.HasNext())
299           << "Did not find a number in '" << text << "'; expected '"
300           << number << "'";
301       matcher.Next(&match);
302 
303       string extracted = text.substr(match.start(), match.length());
304       EXPECT_EQ(start, match.start());
305       EXPECT_EQ(end, match.end());
306       EXPECT_EQ(number, extracted);
307       EXPECT_EQ(extracted, match.raw_string())
308           << "Unexpected phone region in '" << text << "'; extracted '"
309           << extracted << "'";
310       EnsureTermination(text, default_country, leniency);
311     }
312   }
313 
314   // Exhaustively searches for phone numbers from each index within "text" to
315   // test that finding matches always terminates.
EnsureTermination(const string & text,const string & default_country,PhoneNumberMatcher::Leniency leniency)316   void EnsureTermination(const string& text, const string& default_country,
317                          PhoneNumberMatcher::Leniency leniency) {
318     for (size_t index = 0; index <= text.length(); ++index) {
319       string sub = text.substr(index);
320       // Iterates over all matches.
321       PhoneNumberMatcher matcher(phone_util_, text, default_country, leniency,
322                                  1000000 /* max_tries */);
323       string matches;
324       PhoneNumberMatch match;
325       int match_count = 0;
326       while (matcher.HasNext()) {
327         matcher.Next(&match);
328         StrAppend(&matches, ",", match.ToString());
329         ++match_count;
330       }
331       // We should not ever find more than 10 matches in a single candidate text
332       // in these test cases, so we check here that the matcher was limited by
333       // the number of matches, rather than by max_tries.
334       ASSERT_LT(match_count, 10);
335     }
336   }
337 
338   const PhoneNumberUtil& phone_util_;
339 
340  private:
341   PhoneNumberMatcher matcher_;
342   int offset_;
343 };
344 
TEST_F(PhoneNumberMatcherTest,ContainsMoreThanOneSlashInNationalNumber)345 TEST_F(PhoneNumberMatcherTest, ContainsMoreThanOneSlashInNationalNumber) {
346   // A date should return true.
347   PhoneNumber number;
348   number.set_country_code(1);
349   number.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY);
350   string candidate = "1/05/2013";
351   EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
352 
353   // Here, the country code source thinks it started with a country calling
354   // code, but this is not the same as the part before the slash, so it's still
355   // true.
356   number.Clear();
357   number.set_country_code(274);
358   number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
359   candidate = "27/4/2013";
360   EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
361 
362   // Now it should be false, because the first slash is after the country
363   // calling code.
364   number.Clear();
365   number.set_country_code(49);
366   number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN);
367   candidate = "49/69/2013";
368   EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
369 
370   number.Clear();
371   number.set_country_code(49);
372   number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
373   candidate = "+49/69/2013";
374   EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
375 
376   candidate = "+ 49/69/2013";
377   EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
378 
379   candidate = "+ 49/69/20/13";
380   EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
381 
382   // Here, the first group is not assumed to be the country calling code, even
383   // though it is the same as it, so this should return true.
384   number.Clear();
385   number.set_country_code(49);
386   number.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY);
387   candidate = "49/69/2013";
388   EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
389 }
390 
391 // See PhoneNumberUtilTest::ParseNationalNumber.
TEST_F(PhoneNumberMatcherTest,FindNationalNumber)392 TEST_F(PhoneNumberMatcherTest, FindNationalNumber) {
393   // Same cases as in ParseNationalNumber.
394   DoTestFindInContext("033316005", RegionCode::NZ());
395   // "33316005", RegionCode::NZ() is omitted since the national-prefix is
396   // obligatory for these types of numbers in New Zealand.
397   // National prefix attached and some formatting present.
398   DoTestFindInContext("03-331 6005", RegionCode::NZ());
399   DoTestFindInContext("03 331 6005", RegionCode::NZ());
400   // Testing international prefixes.
401   // Should strip country code.
402   DoTestFindInContext("0064 3 331 6005", RegionCode::NZ());
403   // Try again, but this time we have an international number with Region Code
404   // US. It should recognize the country code and parse accordingly.
405   DoTestFindInContext("01164 3 331 6005", RegionCode::US());
406   DoTestFindInContext("+64 3 331 6005", RegionCode::US());
407 
408   DoTestFindInContext("64(0)64123456", RegionCode::NZ());
409   // Check that using a "/" is fine in a phone number.
410   // Note that real Polish numbers do *not* start with a 0.
411   DoTestFindInContext("0123/456789", RegionCode::PL());
412   DoTestFindInContext("123-456-7890", RegionCode::US());
413 }
414 
415 // See PhoneNumberUtilTest::ParseWithInternationalPrefixes.
TEST_F(PhoneNumberMatcherTest,FindWithInternationalPrefixes)416 TEST_F(PhoneNumberMatcherTest, FindWithInternationalPrefixes) {
417   DoTestFindInContext("+1 (650) 333-6000", RegionCode::NZ());
418   DoTestFindInContext("1-650-333-6000", RegionCode::US());
419   // Calling the US number from Singapore by using different service providers
420   // 1st test: calling using SingTel IDD service (IDD is 001)
421   DoTestFindInContext("0011-650-333-6000", RegionCode::SG());
422   // 2nd test: calling using StarHub IDD service (IDD is 008)
423   DoTestFindInContext("0081-650-333-6000", RegionCode::SG());
424   // 3rd test: calling using SingTel V019 service (IDD is 019)
425   DoTestFindInContext("0191-650-333-6000", RegionCode::SG());
426   // Calling the US number from Poland
427   DoTestFindInContext("0~01-650-333-6000", RegionCode::PL());
428   // Using "++" at the start.
429   DoTestFindInContext("++1 (650) 333-6000", RegionCode::PL());
430   // Using a full-width plus sign.
431   DoTestFindInContext(
432       "\xEF\xBC\x8B""1 (650) 333-6000" /* "+1 (650) 333-6000" */,
433       RegionCode::SG());
434   // The whole number, including punctuation, is here represented in full-width
435   // form.
436   DoTestFindInContext(
437       /* "+1 (650) 333-6000" */
438       "\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95"
439       "\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93"
440       "\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
441       RegionCode::SG());
442 }
443 
444 // See PhoneNumberUtilTest::ParseWithLeadingZero.
TEST_F(PhoneNumberMatcherTest,FindWithLeadingZero)445 TEST_F(PhoneNumberMatcherTest, FindWithLeadingZero) {
446   DoTestFindInContext("+39 02-36618 300", RegionCode::NZ());
447   DoTestFindInContext("02-36618 300", RegionCode::IT());
448   DoTestFindInContext("312 345 678", RegionCode::IT());
449 }
450 
451 // See PhoneNumberUtilTest::ParseNationalNumberArgentina.
TEST_F(PhoneNumberMatcherTest,FindNationalNumberArgentina)452 TEST_F(PhoneNumberMatcherTest, FindNationalNumberArgentina) {
453   // Test parsing mobile numbers of Argentina.
454   DoTestFindInContext("+54 9 343 555 1212", RegionCode::AR());
455   DoTestFindInContext("0343 15 555 1212", RegionCode::AR());
456 
457   DoTestFindInContext("+54 9 3715 65 4320", RegionCode::AR());
458   DoTestFindInContext("03715 15 65 4320", RegionCode::AR());
459 
460   // Test parsing fixed-line numbers of Argentina.
461   DoTestFindInContext("+54 11 3797 0000", RegionCode::AR());
462   DoTestFindInContext("011 3797 0000", RegionCode::AR());
463 
464   DoTestFindInContext("+54 3715 65 4321", RegionCode::AR());
465   DoTestFindInContext("03715 65 4321", RegionCode::AR());
466 
467   DoTestFindInContext("+54 23 1234 0000", RegionCode::AR());
468   DoTestFindInContext("023 1234 0000", RegionCode::AR());
469 }
470 
471 // See PhoneNumberMatcherTest::ParseWithXInNumber.
TEST_F(PhoneNumberMatcherTest,FindWithXInNumber)472 TEST_F(PhoneNumberMatcherTest, FindWithXInNumber) {
473   DoTestFindInContext("(0xx) 123456789", RegionCode::AR());
474   // A case where x denotes both carrier codes and extension symbol.
475   DoTestFindInContext("(0xx) 123456789 x 1234", RegionCode::AR());
476 
477   // This test is intentionally constructed such that the number of digit after
478   // xx is larger than 7, so that the number won't be mistakenly treated as an
479   // extension, as we allow extensions up to 7 digits. This assumption is okay
480   // for now as all the countries where a carrier selection code is written in
481   // the form of xx have a national significant number of length larger than 7.
482   DoTestFindInContext("011xx5481429712", RegionCode::US());
483 }
484 
485 // See PhoneNumberUtilTest::ParseNumbersMexico.
TEST_F(PhoneNumberMatcherTest,FindNumbersMexico)486 TEST_F(PhoneNumberMatcherTest, FindNumbersMexico) {
487   // Test parsing fixed-line numbers of Mexico.
488   DoTestFindInContext("+52 (449)978-0001", RegionCode::MX());
489   DoTestFindInContext("01 (449)978-0001", RegionCode::MX());
490   DoTestFindInContext("(449)978-0001", RegionCode::MX());
491 
492   // Test parsing mobile numbers of Mexico.
493   DoTestFindInContext("+52 1 33 1234-5678", RegionCode::MX());
494   DoTestFindInContext("044 (33) 1234-5678", RegionCode::MX());
495   DoTestFindInContext("045 33 1234-5678", RegionCode::MX());
496 }
497 
498 // See PhoneNumberUtilTest::ParseNumbersWithPlusWithNoRegion.
TEST_F(PhoneNumberMatcherTest,FindNumbersWithPlusWithNoRegion)499 TEST_F(PhoneNumberMatcherTest, FindNumbersWithPlusWithNoRegion) {
500   // RegionCode::ZZ() is allowed only if the number starts with a '+' - then the
501   // country code can be calculated.
502   DoTestFindInContext("+64 3 331 6005", RegionCode::ZZ());
503 }
504 
505 // See PhoneNumberUtilTest::ParseExtensions.
TEST_F(PhoneNumberMatcherTest,FindExtensions)506 TEST_F(PhoneNumberMatcherTest, FindExtensions) {
507   DoTestFindInContext("03 331 6005 ext 3456", RegionCode::NZ());
508   DoTestFindInContext("03-3316005x3456", RegionCode::NZ());
509   DoTestFindInContext("03-3316005 int.3456", RegionCode::NZ());
510   DoTestFindInContext("03 3316005 #3456", RegionCode::NZ());
511   DoTestFindInContext("0~0 1800 7493 524", RegionCode::PL());
512   DoTestFindInContext("(1800) 7493.524", RegionCode::US());
513   // Check that the last instance of an extension token is matched.
514   DoTestFindInContext("0~0 1800 7493 524 ~1234", RegionCode::PL());
515   // Verifying bug-fix where the last digit of a number was previously omitted
516   // if it was a 0 when extracting the extension. Also verifying a few different
517   // cases of extensions.
518   DoTestFindInContext("+44 2034567890x456", RegionCode::NZ());
519   DoTestFindInContext("+44 2034567890x456", RegionCode::GB());
520   DoTestFindInContext("+44 2034567890 x456", RegionCode::GB());
521   DoTestFindInContext("+44 2034567890 X456", RegionCode::GB());
522   DoTestFindInContext("+44 2034567890 X 456", RegionCode::GB());
523   DoTestFindInContext("+44 2034567890 X  456", RegionCode::GB());
524   DoTestFindInContext("+44 2034567890  X 456", RegionCode::GB());
525 
526   DoTestFindInContext("(800) 901-3355 x 7246433", RegionCode::US());
527   DoTestFindInContext("(800) 901-3355 , ext 7246433", RegionCode::US());
528   DoTestFindInContext("(800) 901-3355 ,extension 7246433", RegionCode::US());
529   // The next test differs from PhoneNumberUtil -> when matching we don't
530   // consider a lone comma to indicate an extension, although we accept it when
531   // parsing.
532   DoTestFindInContext("(800) 901-3355 ,x 7246433", RegionCode::US());
533   DoTestFindInContext("(800) 901-3355 ext: 7246433", RegionCode::US());
534 }
535 
TEST_F(PhoneNumberMatcherTest,FindInterspersedWithSpace)536 TEST_F(PhoneNumberMatcherTest, FindInterspersedWithSpace) {
537   DoTestFindInContext("0 3   3 3 1   6 0 0 5", RegionCode::NZ());
538 }
539 
540 // Test matching behavior when starting in the middle of a phone number.
TEST_F(PhoneNumberMatcherTest,IntermediateParsePositions)541 TEST_F(PhoneNumberMatcherTest, IntermediateParsePositions) {
542   string text = "Call 033316005  or 032316005!";
543   //             |    |    |    |    |    |
544   //             0    5   10   15   20   25
545 
546   // Iterate over all possible indices.
547   for (int i = 0; i <= 5; ++i) {
548     AssertEqualRange(text, i, 5, 14);
549   }
550   // 7 and 8 digits in a row are still parsed as number.
551   AssertEqualRange(text, 6, 6, 14);
552   AssertEqualRange(text, 7, 7, 14);
553   // Anything smaller is skipped to the second instance.
554   for (int i = 8; i <= 19; ++i) {
555     AssertEqualRange(text, i, 19, 28);
556   }
557 }
558 
TEST_F(PhoneNumberMatcherTest,FourMatchesInARow)559 TEST_F(PhoneNumberMatcherTest, FourMatchesInARow) {
560   string number1 = "415-666-7777";
561   string number2 = "800-443-1223";
562   string number3 = "212-443-1223";
563   string number4 = "650-443-1223";
564   string text = StrCat(number1, " - ", number2, " - ", number3, " - ", number4);
565 
566   PhoneNumberMatcher matcher(text, RegionCode::US());
567   PhoneNumberMatch match;
568 
569   EXPECT_TRUE(matcher.HasNext());
570   EXPECT_TRUE(matcher.Next(&match));
571   AssertMatchProperties(match, text, number1, RegionCode::US());
572 
573   EXPECT_TRUE(matcher.HasNext());
574   EXPECT_TRUE(matcher.Next(&match));
575   AssertMatchProperties(match, text, number2, RegionCode::US());
576 
577   EXPECT_TRUE(matcher.HasNext());
578   EXPECT_TRUE(matcher.Next(&match));
579   AssertMatchProperties(match, text, number3, RegionCode::US());
580 
581   EXPECT_TRUE(matcher.HasNext());
582   EXPECT_TRUE(matcher.Next(&match));
583   AssertMatchProperties(match, text, number4, RegionCode::US());
584 }
585 
TEST_F(PhoneNumberMatcherTest,MatchesFoundWithMultipleSpaces)586 TEST_F(PhoneNumberMatcherTest, MatchesFoundWithMultipleSpaces) {
587   string number1 = "415-666-7777";
588   string number2 = "800-443-1223";
589   string text = StrCat(number1, " ", number2);
590 
591   PhoneNumberMatcher matcher(text, RegionCode::US());
592   PhoneNumberMatch match;
593 
594   EXPECT_TRUE(matcher.HasNext());
595   EXPECT_TRUE(matcher.Next(&match));
596   AssertMatchProperties(match, text, number1, RegionCode::US());
597 
598   EXPECT_TRUE(matcher.HasNext());
599   EXPECT_TRUE(matcher.Next(&match));
600   AssertMatchProperties(match, text, number2, RegionCode::US());
601 }
602 
TEST_F(PhoneNumberMatcherTest,MatchWithSurroundingZipcodes)603 TEST_F(PhoneNumberMatcherTest, MatchWithSurroundingZipcodes) {
604   string number = "415-666-7777";
605   string zip_preceding =
606       StrCat("My address is CA 34215 - ", number, " is my number.");
607   PhoneNumber expected_result;
608   phone_util_.Parse(number, RegionCode::US(), &expected_result);
609 
610   scoped_ptr<PhoneNumberMatcher> matcher(
611       GetMatcherWithLeniency(zip_preceding, RegionCode::US(),
612                              PhoneNumberMatcher::VALID));
613 
614   PhoneNumberMatch match;
615   EXPECT_TRUE(matcher->HasNext());
616   EXPECT_TRUE(matcher->Next(&match));
617   AssertMatchProperties(match, zip_preceding, number, RegionCode::US());
618 
619   // Now repeat, but this time the phone number has spaces in it. It should
620   // still be found.
621   number = "(415) 666 7777";
622 
623   string zip_following =
624       StrCat("My number is ", number, ". 34215 is my zip-code.");
625   matcher.reset(
626       GetMatcherWithLeniency(zip_following, RegionCode::US(),
627                              PhoneNumberMatcher::VALID));
628 
629   PhoneNumberMatch match_with_spaces;
630   EXPECT_TRUE(matcher->HasNext());
631   EXPECT_TRUE(matcher->Next(&match_with_spaces));
632   AssertMatchProperties(
633       match_with_spaces, zip_following, number, RegionCode::US());
634 }
635 
TEST_F(PhoneNumberMatcherTest,IsLatinLetter)636 TEST_F(PhoneNumberMatcherTest, IsLatinLetter) {
637   EXPECT_TRUE(IsLatinLetter('c'));
638   EXPECT_TRUE(IsLatinLetter('C'));
639   EXPECT_TRUE(IsLatinLetter(UnicodeString::fromUTF8("\xC3\x89" /* "É" */)[0]));
640   // Combining acute accent.
641   EXPECT_TRUE(IsLatinLetter(UnicodeString::fromUTF8("\xCC\x81")[0]));
642   EXPECT_FALSE(IsLatinLetter(':'));
643   EXPECT_FALSE(IsLatinLetter('5'));
644   EXPECT_FALSE(IsLatinLetter('-'));
645   EXPECT_FALSE(IsLatinLetter('.'));
646   EXPECT_FALSE(IsLatinLetter(' '));
647   EXPECT_FALSE(
648       IsLatinLetter(UnicodeString::fromUTF8("\xE6\x88\x91" /* "我" */)[0]));
649   /* Hiragana letter no (の) - this should neither seem to start or end with a
650      Latin letter. */
651   EXPECT_FALSE(IsLatinLetter(UnicodeString::fromUTF8("\xE3\x81\xAE")[0]));
652   EXPECT_FALSE(IsLatinLetter(UnicodeString::fromUTF8("\xE3\x81\xAE")[2]));
653 }
654 
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingLatinChars)655 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingLatinChars) {
656   std::vector<NumberContext> possible_only_contexts;
657   possible_only_contexts.push_back(NumberContext("abc", "def"));
658   possible_only_contexts.push_back(NumberContext("abc", ""));
659   possible_only_contexts.push_back(NumberContext("", "def"));
660   possible_only_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, ""));
661   // e with an acute accent decomposed (with combining mark).
662   possible_only_contexts.push_back(
663       NumberContext("\x20\x22\xCC\x81""e\xCC\x81" /* "́e\xCC\x81" */, ""));
664 
665   // Numbers should not be considered valid, if they are surrounded by Latin
666   // characters, but should be considered possible.
667   FindMatchesInContexts(possible_only_contexts, false, true);
668 }
669 
TEST_F(PhoneNumberMatcherTest,MoneyNotSeenAsPhoneNumber)670 TEST_F(PhoneNumberMatcherTest, MoneyNotSeenAsPhoneNumber) {
671   std::vector<NumberContext> possible_only_contexts;
672   possible_only_contexts.push_back(NumberContext("$", ""));
673   possible_only_contexts.push_back(NumberContext("", "$"));
674   possible_only_contexts.push_back(NumberContext("\xC2\xA3" /* "£" */, ""));
675   possible_only_contexts.push_back(NumberContext("\xC2\xA5" /* "¥" */, ""));
676   FindMatchesInContexts(possible_only_contexts, false, true);
677 }
678 
TEST_F(PhoneNumberMatcherTest,PercentageNotSeenAsPhoneNumber)679 TEST_F(PhoneNumberMatcherTest, PercentageNotSeenAsPhoneNumber) {
680   std::vector<NumberContext> possible_only_contexts;
681   possible_only_contexts.push_back(NumberContext("", "%"));
682   // Numbers followed by % should be dropped.
683   FindMatchesInContexts(possible_only_contexts, false, true);
684 }
685 
TEST_F(PhoneNumberMatcherTest,PhoneNumberWithLeadingOrTrailingMoneyMatches)686 TEST_F(PhoneNumberMatcherTest, PhoneNumberWithLeadingOrTrailingMoneyMatches) {
687   std::vector<NumberContext> contexts;
688   contexts.push_back(NumberContext("$20 ", ""));
689   contexts.push_back(NumberContext("", " 100$"));
690   // Because of the space after the 20 (or before the 100) these dollar amounts
691   // should not stop the actual number from being found.
692   FindMatchesInContexts(contexts, true, true);
693 }
694 
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingLatinCharsAndLeadingPunctuation)695 TEST_F(PhoneNumberMatcherTest,
696        MatchesWithSurroundingLatinCharsAndLeadingPunctuation) {
697   std::vector<NumberContext> possible_only_contexts;
698   // Contexts with trailing characters. Leading characters are okay here since
699   // the numbers we will insert start with punctuation, but trailing characters
700   // are still not allowed.
701   possible_only_contexts.push_back(NumberContext("abc", "def"));
702   possible_only_contexts.push_back(NumberContext("", "def"));
703   possible_only_contexts.push_back(NumberContext("", "\xC3\x89" /* "É" */));
704 
705   // Numbers should not be considered valid, if they have trailing Latin
706   // characters, but should be considered possible.
707   string number_with_plus = "+14156667777";
708   string number_with_brackets = "(415)6667777";
709   FindMatchesInContexts(possible_only_contexts, false, true, RegionCode::US(),
710                         number_with_plus);
711   FindMatchesInContexts(possible_only_contexts, false, true, RegionCode::US(),
712                         number_with_brackets);
713 
714   std::vector<NumberContext> valid_contexts;
715   valid_contexts.push_back(NumberContext("abc", ""));
716   valid_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, ""));
717   valid_contexts.push_back(
718       NumberContext("\xC3\x89" /* "É" */, "."));  // Trailing punctuation.
719   // Trailing white-space.
720   valid_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, " def"));
721 
722   // Numbers should be considered valid, since they start with punctuation.
723   FindMatchesInContexts(valid_contexts, true, true, RegionCode::US(),
724                         number_with_plus);
725   FindMatchesInContexts(valid_contexts, true, true, RegionCode::US(),
726                         number_with_brackets);
727 }
728 
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingChineseChars)729 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingChineseChars) {
730   std::vector<NumberContext> valid_contexts;
731   valid_contexts.push_back(NumberContext(
732       /* "我的电话号码是" */
733       "\xE6\x88\x91\xE7\x9A\x84\xE7\x94\xB5\xE8\xAF\x9D\xE5\x8F\xB7\xE7\xA0\x81"
734       "\xE6\x98\xAF", ""));
735   valid_contexts.push_back(NumberContext(
736       "",
737       /* "是我的电话号码" */
738       "\xE6\x98\xAF\xE6\x88\x91\xE7\x9A\x84\xE7\x94\xB5\xE8\xAF\x9D\xE5\x8F\xB7"
739       "\xE7\xA0\x81"));
740   valid_contexts.push_back(NumberContext(
741       "\xE8\xAF\xB7\xE6\x8B\xA8\xE6\x89\x93" /* "请拨打" */,
742       "\xE6\x88\x91\xE5\x9C\xA8\xE6\x98\x8E\xE5\xA4\xA9" /* "我在明天" */));
743 
744   // Numbers should be considered valid, since they are surrounded by Chinese.
745   FindMatchesInContexts(valid_contexts, true, true);
746 }
747 
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingPunctuation)748 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingPunctuation) {
749   std::vector<NumberContext> valid_contexts;
750   // At end of text.
751   valid_contexts.push_back(NumberContext("My number-", ""));
752   // At start of text.
753   valid_contexts.push_back(NumberContext("", ".Nice day."));
754   // Punctuation surround number.
755   valid_contexts.push_back(NumberContext("Tel:", "."));
756   // White-space is also fine.
757   valid_contexts.push_back(NumberContext("Tel: ", " on Saturdays."));
758 
759   // Numbers should be considered valid, since they are surrounded by
760   // punctuation.
761   FindMatchesInContexts(valid_contexts, true, true);
762 }
763 
TEST_F(PhoneNumberMatcherTest,MatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation)764 TEST_F(PhoneNumberMatcherTest,
765        MatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation) {
766   const string text = "Call 650-253-4561 -- 455-234-3451";
767   const string& region = RegionCode::US();
768   PhoneNumber number1;
769   number1.set_country_code(phone_util_.GetCountryCodeForRegion(region));
770   number1.set_national_number(6502534561ULL);
771   PhoneNumberMatch match1(5, "650-253-4561", number1);
772 
773   PhoneNumber number2;
774   number2.set_country_code(phone_util_.GetCountryCodeForRegion(region));
775   number2.set_national_number(4552343451ULL);
776   PhoneNumberMatch match2(21, "455-234-3451", number2);
777 
778   PhoneNumberMatcher matcher(
779       phone_util_, text, region, PhoneNumberMatcher::VALID, 100);
780 
781   PhoneNumberMatch actual_match1;
782   PhoneNumberMatch actual_match2;
783   matcher.Next(&actual_match1);
784   matcher.Next(&actual_match2);
785   EXPECT_TRUE(match1.Equals(actual_match1))
786       << "Got: " << actual_match1.ToString();
787   EXPECT_TRUE(match2.Equals(actual_match2))
788       << "Got: " << actual_match2.ToString();
789 }
790 
TEST_F(PhoneNumberMatcherTest,DoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace)791 TEST_F(PhoneNumberMatcherTest,
792        DoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace) {
793   const string text = "Call 650-253-4561--455-234-3451";
794   const string& region = RegionCode::US();
795   PhoneNumberMatcher matcher(
796       phone_util_, text, region, PhoneNumberMatcher::VALID, 100);
797   EXPECT_FALSE(matcher.HasNext());
798 }
799 
800 // Strings with number-like things that shouldn't be found under any level.
801 static const NumberTest kImpossibleCases[] = {
802   NumberTest("12345", RegionCode::US()),
803   NumberTest("23456789", RegionCode::US()),
804   NumberTest("234567890112", RegionCode::US()),
805   NumberTest("650+253+1234", RegionCode::US()),
806   NumberTest("3/10/1984", RegionCode::CA()),
807   NumberTest("03/27/2011", RegionCode::US()),
808   NumberTest("31/8/2011", RegionCode::US()),
809   NumberTest("1/12/2011", RegionCode::US()),
810   NumberTest("10/12/82", RegionCode::DE()),
811   NumberTest("650x2531234", RegionCode::US()),
812   NumberTest("2012-01-02 08:00", RegionCode::US()),
813   NumberTest("2012/01/02 08:00", RegionCode::US()),
814   NumberTest("20120102 08:00", RegionCode::US()),
815   NumberTest("2014-04-12 04:04 PM", RegionCode::US()),
816   NumberTest("2014-04-12 &nbsp;04:04 PM", RegionCode::US()),
817   NumberTest("2014-04-12 &nbsp;04:04 PM", RegionCode::US()),
818   NumberTest("2014-04-12  04:04 PM", RegionCode::US()),
819 };
820 
821 // Strings with number-like things that should only be found under "possible".
822 static const NumberTest kPossibleOnlyCases[] = {
823   // US numbers cannot start with 7 in the test metadata to be valid.
824   NumberTest("7121115678", RegionCode::US()),
825   // 'X' should not be found in numbers at leniencies stricter than POSSIBLE,
826   // unless it represents a carrier code or extension.
827   NumberTest("1650 x 253 - 1234", RegionCode::US()),
828   NumberTest("650 x 253 - 1234", RegionCode::US()),
829   NumberTest("6502531x234", RegionCode::US()),
830   NumberTest("(20) 3346 1234", RegionCode::GB()),  // Non-optional NP omitted
831 };
832 
833 // Strings with number-like things that should only be found up to and including
834 // the "valid" leniency level.
835 static const NumberTest kValidCases[] = {
836   NumberTest("65 02 53 00 00", RegionCode::US()),
837   NumberTest("6502 538365", RegionCode::US()),
838   // 2 slashes are illegal at higher levels.
839   NumberTest("650//253-1234", RegionCode::US()),
840   NumberTest("650/253/1234", RegionCode::US()),
841   NumberTest("9002309. 158", RegionCode::US()),
842   NumberTest("12 7/8 - 14 12/34 - 5", RegionCode::US()),
843   NumberTest("12.1 - 23.71 - 23.45", RegionCode::US()),
844   NumberTest("800 234 1 111x1111", RegionCode::US()),
845   NumberTest("1979-2011 100", RegionCode::US()),
846   // National number in wrong format.
847   NumberTest("+494949-4-94", RegionCode::DE()),
848   NumberTest(
849       /* "415666-7777" */
850       "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x96\xEF\xBC\x96\xEF\xBC\x96"
851       "\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
852   NumberTest("2012-0102 08", RegionCode::US()),  // Very strange formatting.
853   NumberTest("2012-01-02 08", RegionCode::US()),
854   // Breakdown assistance number with unexpected formatting.
855   NumberTest("1800-1-0-10 22", RegionCode::AU()),
856   NumberTest("030-3-2 23 12 34", RegionCode::DE()),
857   NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()),
858   NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()),
859   NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),
860 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
861   // Fits an alternate pattern, but the leading digits don't match.
862   NumberTest("+52 332 123 23 23", RegionCode::MX()),
863 #endif  // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
864 };
865 
866 // Strings with number-like things that should only be found up to and including
867 // the "strict_grouping" leniency level.
868 static const NumberTest kStrictGroupingCases[] = {
869   NumberTest("(415) 6667777", RegionCode::US()),
870   NumberTest("415-6667777", RegionCode::US()),
871   // Should be found by strict grouping but not exact grouping, as the last two
872   // groups are formatted together as a block.
873   NumberTest("0800-2491234", RegionCode::DE()),
874   // If the user is using alternate formats, test that numbers formatted in
875   // that way are found.
876 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
877   // Doesn't match any formatting in the test file, but almost matches an
878   // alternate format (the last two groups have been squashed together here).
879   NumberTest("0900-1 123123", RegionCode::DE()),
880   NumberTest("(0)900-1 123123", RegionCode::DE()),
881   NumberTest("0 900-1 123123", RegionCode::DE()),
882 #endif  // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
883   // NDC also found as part of the country calling code; this shouldn't ruin the
884   // grouping expectations.
885   NumberTest("+33 3 34 2312", RegionCode::FR()),
886 };
887 
888 // Strings with number-like things that should be found at all levels.
889 static const NumberTest kExactGroupingCases[] = {
890   NumberTest(
891       /* "4156667777" */
892       "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x96\xEF\xBC\x96\xEF\xBC\x96"
893       "\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
894   NumberTest(
895       /* "415-666-7777" */
896       "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x96"
897       "\xEF\xBC\x96\xEF\xBC\x8D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97"
898       "\xEF\xBC\x97", RegionCode::US()),
899   NumberTest("4156667777", RegionCode::US()),
900   NumberTest("4156667777 x 123", RegionCode::US()),
901   NumberTest("415-666-7777", RegionCode::US()),
902   NumberTest("415/666-7777", RegionCode::US()),
903   NumberTest("415-666-7777 ext. 503", RegionCode::US()),
904   NumberTest("1 415 666 7777 x 123", RegionCode::US()),
905   NumberTest("+1 415-666-7777", RegionCode::US()),
906   NumberTest("+494949 49", RegionCode::DE()),
907   NumberTest("+49-49-34", RegionCode::DE()),
908   NumberTest("+49-4931-49", RegionCode::DE()),
909   NumberTest("04931-49", RegionCode::DE()),  // With National Prefix
910   NumberTest("+49-494949", RegionCode::DE()),  // One group with country code
911   NumberTest("+49-494949 ext. 49", RegionCode::DE()),
912   NumberTest("+49494949 ext. 49", RegionCode::DE()),
913   NumberTest("0494949", RegionCode::DE()),
914   NumberTest("0494949 ext. 49", RegionCode::DE()),
915   NumberTest("01 (33) 3461 2234", RegionCode::MX()),  // Optional NP present
916   NumberTest("(33) 3461 2234", RegionCode::MX()),  // Optional NP omitted
917   // If the user is using alternate formats, test that numbers formatted in
918   // that way are found.
919 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
920   // Breakdown assistance number using alternate formatting pattern.
921   NumberTest("1800-10-10 22", RegionCode::AU()),
922   // Doesn't match any formatting in the test file, but matches an alternate
923   // format exactly.
924   NumberTest("0900-1 123 123", RegionCode::DE()),
925   NumberTest("(0)900-1 123 123", RegionCode::DE()),
926   NumberTest("0 900-1 123 123", RegionCode::DE()),
927 #endif  // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
928   NumberTest("+33 3 34 23 12", RegionCode::FR()),
929 };
930 
TEST_F(PhoneNumberMatcherTest,MatchesWithPossibleLeniency)931 TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) {
932   std::vector<NumberTest> test_cases;
933   test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
934                     kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
935   test_cases.insert(test_cases.begin(), kValidCases,
936                     kValidCases + arraysize(kValidCases));
937   test_cases.insert(
938       test_cases.begin(), kStrictGroupingCases,
939       kStrictGroupingCases + arraysize(kStrictGroupingCases));
940   test_cases.insert(test_cases.begin(), kExactGroupingCases,
941                     kExactGroupingCases + arraysize(kExactGroupingCases));
942   DoTestNumberMatchesForLeniency(test_cases, PhoneNumberMatcher::POSSIBLE);
943 }
944 
TEST_F(PhoneNumberMatcherTest,NonMatchesWithPossibleLeniency)945 TEST_F(PhoneNumberMatcherTest, NonMatchesWithPossibleLeniency) {
946   std::vector<NumberTest> test_cases;
947   test_cases.insert(test_cases.begin(), kImpossibleCases,
948                     kImpossibleCases + arraysize(kImpossibleCases));
949   DoTestNumberNonMatchesForLeniency(test_cases, PhoneNumberMatcher::POSSIBLE);
950 }
951 
TEST_F(PhoneNumberMatcherTest,MatchesWithValidLeniency)952 TEST_F(PhoneNumberMatcherTest, MatchesWithValidLeniency) {
953   std::vector<NumberTest> test_cases;
954   test_cases.insert(test_cases.begin(), kValidCases,
955                     kValidCases + arraysize(kValidCases));
956   test_cases.insert(
957       test_cases.begin(), kStrictGroupingCases,
958       kStrictGroupingCases + arraysize(kStrictGroupingCases));
959   test_cases.insert(test_cases.begin(), kExactGroupingCases,
960                     kExactGroupingCases + arraysize(kExactGroupingCases));
961   DoTestNumberMatchesForLeniency(test_cases, PhoneNumberMatcher::VALID);
962 }
963 
TEST_F(PhoneNumberMatcherTest,NonMatchesWithValidLeniency)964 TEST_F(PhoneNumberMatcherTest, NonMatchesWithValidLeniency) {
965   std::vector<NumberTest> test_cases;
966   test_cases.insert(test_cases.begin(), kImpossibleCases,
967                     kImpossibleCases + arraysize(kImpossibleCases));
968   test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
969                     kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
970   DoTestNumberNonMatchesForLeniency(test_cases, PhoneNumberMatcher::VALID);
971 }
972 
TEST_F(PhoneNumberMatcherTest,MatchesWithStrictGroupingLeniency)973 TEST_F(PhoneNumberMatcherTest, MatchesWithStrictGroupingLeniency) {
974   std::vector<NumberTest> test_cases;
975   test_cases.insert(
976       test_cases.begin(), kStrictGroupingCases,
977       kStrictGroupingCases + arraysize(kStrictGroupingCases));
978   test_cases.insert(test_cases.begin(), kExactGroupingCases,
979                     kExactGroupingCases + arraysize(kExactGroupingCases));
980   DoTestNumberMatchesForLeniency(test_cases,
981                                  PhoneNumberMatcher::STRICT_GROUPING);
982 }
983 
TEST_F(PhoneNumberMatcherTest,NonMatchesWithStrictGroupingLeniency)984 TEST_F(PhoneNumberMatcherTest, NonMatchesWithStrictGroupingLeniency) {
985   std::vector<NumberTest> test_cases;
986   test_cases.insert(test_cases.begin(), kImpossibleCases,
987                     kImpossibleCases + arraysize(kImpossibleCases));
988   test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
989                     kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
990   test_cases.insert(test_cases.begin(), kValidCases,
991                     kValidCases + arraysize(kValidCases));
992   DoTestNumberNonMatchesForLeniency(test_cases,
993                                     PhoneNumberMatcher::STRICT_GROUPING);
994 }
995 
TEST_F(PhoneNumberMatcherTest,MatchesWithExactGroupingLeniency)996 TEST_F(PhoneNumberMatcherTest, MatchesWithExactGroupingLeniency) {
997   std::vector<NumberTest> test_cases;
998   test_cases.insert(test_cases.begin(), kExactGroupingCases,
999                     kExactGroupingCases + arraysize(kExactGroupingCases));
1000   DoTestNumberMatchesForLeniency(test_cases,
1001                                  PhoneNumberMatcher::EXACT_GROUPING);
1002 }
1003 
TEST_F(PhoneNumberMatcherTest,NonMatchesWithExactGroupingLeniency)1004 TEST_F(PhoneNumberMatcherTest, NonMatchesWithExactGroupingLeniency) {
1005   std::vector<NumberTest> test_cases;
1006   test_cases.insert(test_cases.begin(), kImpossibleCases,
1007                     kImpossibleCases + arraysize(kImpossibleCases));
1008   test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
1009                     kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
1010   test_cases.insert(test_cases.begin(), kValidCases,
1011                     kValidCases + arraysize(kValidCases));
1012   test_cases.insert(
1013       test_cases.begin(), kStrictGroupingCases,
1014       kStrictGroupingCases + arraysize(kStrictGroupingCases));
1015   DoTestNumberNonMatchesForLeniency(test_cases,
1016                                     PhoneNumberMatcher::EXACT_GROUPING);
1017 }
1018 
TEST_F(PhoneNumberMatcherTest,ExtractMatchIgnoresAmericanDates)1019 TEST_F(PhoneNumberMatcherTest, ExtractMatchIgnoresAmericanDates) {
1020   PhoneNumberMatch match;
1021   string text = "As I said on 03/10/2011, you may call me at ";
1022   EXPECT_FALSE(ExtractMatch(text, &match));
1023   text = "As I said on 03/27/2011, you may call me at ";
1024   EXPECT_FALSE(ExtractMatch(text, &match));
1025   text = "As I said on 31/8/2011, you may call me at ";
1026   EXPECT_FALSE(ExtractMatch(text, &match));
1027   text = "As I said on 1/12/2011, you may call me at ";
1028   EXPECT_FALSE(ExtractMatch(text, &match));
1029   text = "I was born on 10/12/82. Please call me at ";
1030   EXPECT_FALSE(ExtractMatch(text, &match));
1031 }
1032 
TEST_F(PhoneNumberMatcherTest,NonMatchingBracketsAreInvalid)1033 TEST_F(PhoneNumberMatcherTest, NonMatchingBracketsAreInvalid) {
1034   // The digits up to the ", " form a valid US number, but it shouldn't be
1035   // matched as one since there was a non-matching bracket present.
1036   scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1037       "80.585 [79.964, 81.191]", RegionCode::US(),
1038       PhoneNumberMatcher::VALID));
1039   EXPECT_FALSE(matcher->HasNext());
1040 
1041   // The trailing "]" is thrown away before parsing, so the resultant number,
1042   // while a valid US number, does not have matching brackets.
1043   matcher.reset(GetMatcherWithLeniency(
1044       "80.585 [79.964]", RegionCode::US(), PhoneNumberMatcher::VALID));
1045   EXPECT_FALSE(matcher->HasNext());
1046 
1047   matcher.reset(GetMatcherWithLeniency(
1048       "80.585 ((79.964)", RegionCode::US(), PhoneNumberMatcher::VALID));
1049   EXPECT_FALSE(matcher->HasNext());
1050 
1051   // This case has too many sets of brackets to be valid.
1052   matcher.reset(GetMatcherWithLeniency(
1053       "(80).(585) (79).(9)64", RegionCode::US(), PhoneNumberMatcher::VALID));
1054   EXPECT_FALSE(matcher->HasNext());
1055 }
1056 
TEST_F(PhoneNumberMatcherTest,NoMatchIfRegionIsUnknown)1057 TEST_F(PhoneNumberMatcherTest, NoMatchIfRegionIsUnknown) {
1058   // Fail on non-international prefix if region code is ZZ.
1059   scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1060       "Random text body - number is 0331 6005, see you there",
1061       RegionCode::ZZ(), PhoneNumberMatcher::VALID));
1062   EXPECT_FALSE(matcher->HasNext());
1063 }
1064 
TEST_F(PhoneNumberMatcherTest,NoMatchInEmptyString)1065 TEST_F(PhoneNumberMatcherTest, NoMatchInEmptyString) {
1066   scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1067       "", RegionCode::US(), PhoneNumberMatcher::VALID));
1068   EXPECT_FALSE(matcher->HasNext());
1069   matcher.reset(GetMatcherWithLeniency("  ", RegionCode::US(),
1070                                        PhoneNumberMatcher::VALID));
1071   EXPECT_FALSE(matcher->HasNext());
1072 }
1073 
TEST_F(PhoneNumberMatcherTest,NoMatchIfNoNumber)1074 TEST_F(PhoneNumberMatcherTest, NoMatchIfNoNumber) {
1075   scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1076       "Random text body - number is foobar, see you there", RegionCode::US(),
1077       PhoneNumberMatcher::VALID));
1078   EXPECT_FALSE(matcher->HasNext());
1079 }
1080 
TEST_F(PhoneNumberMatcherTest,NoErrorWithSpecialCharacters)1081 TEST_F(PhoneNumberMatcherTest, NoErrorWithSpecialCharacters) {
1082   string stringWithSpecialCharacters =
1083       "Myfuzzvar1152: \"My info:%415-666-7777 123 fake street\"\nfuzzvar1155: "
1084       "47\nfuzzvar1158: %415-666-1234 "
1085       "i18n_phonenumbers_Pho\356eNumberMatcher_Leniency_VALID_1"
1086       "\nfuzzvar1159: 20316 info:%415-666-7777 123 fake str79ee\nt";
1087   string Numbers;
1088   for (int i = 0; i < 100; ++i)
1089     Numbers.append(stringWithSpecialCharacters);
1090   scoped_ptr<PhoneNumberMatcher> matcher(
1091       GetMatcherWithLeniency(Numbers, RegionCode::US(),
1092                              PhoneNumberMatcher::POSSIBLE));
1093   // Since the input text contains invalid UTF-8, we do not return
1094   // any matches.
1095   EXPECT_FALSE(matcher->HasNext());
1096 }
1097 
TEST_F(PhoneNumberMatcherTest,Sequences)1098 TEST_F(PhoneNumberMatcherTest, Sequences) {
1099   // Test multiple occurrences.
1100   const string text = "Call 033316005  or 032316005!";
1101   const string& region = RegionCode::NZ();
1102 
1103   PhoneNumber number1;
1104   number1.set_country_code(phone_util_.GetCountryCodeForRegion(region));
1105   number1.set_national_number(33316005ULL);
1106   PhoneNumberMatch match1(5, "033316005", number1);
1107 
1108   PhoneNumber number2;
1109   number2.set_country_code(phone_util_.GetCountryCodeForRegion(region));
1110   number2.set_national_number(32316005ULL);
1111   PhoneNumberMatch match2(19, "032316005", number2);
1112 
1113   PhoneNumberMatcher matcher(
1114       phone_util_, text, region, PhoneNumberMatcher::POSSIBLE, 100);
1115 
1116   PhoneNumberMatch actual_match1;
1117   PhoneNumberMatch actual_match2;
1118   matcher.Next(&actual_match1);
1119   matcher.Next(&actual_match2);
1120   EXPECT_TRUE(match1.Equals(actual_match1));
1121   EXPECT_TRUE(match2.Equals(actual_match2));
1122 }
1123 
TEST_F(PhoneNumberMatcherTest,MaxMatches)1124 TEST_F(PhoneNumberMatcherTest, MaxMatches) {
1125   // Set up text with 100 valid phone numbers.
1126   string numbers;
1127   for (int i = 0; i < 100; ++i) {
1128     numbers.append("My info: 415-666-7777,");
1129   }
1130 
1131   // Matches all 100. Max only applies to failed cases.
1132   PhoneNumber number;
1133   phone_util_.Parse("+14156667777", RegionCode::US(), &number);
1134   std::vector<PhoneNumber> expected(100, number);
1135 
1136   PhoneNumberMatcher matcher(
1137       phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1138   std::vector<PhoneNumber> actual;
1139   PhoneNumberMatch match;
1140   while (matcher.HasNext()) {
1141     matcher.Next(&match);
1142     actual.push_back(match.number());
1143   }
1144   EXPECT_EQ(expected, actual);
1145 }
1146 
TEST_F(PhoneNumberMatcherTest,MaxMatchesInvalid)1147 TEST_F(PhoneNumberMatcherTest, MaxMatchesInvalid) {
1148   // Set up text with 10 invalid phone numbers followed by 100 valid.
1149   string numbers;
1150   for (int i = 0; i < 10; ++i) {
1151     numbers.append("My address 949-8945-0");
1152   }
1153   for (int i = 0; i < 100; ++i) {
1154     numbers.append("My info: 415-666-7777,");
1155   }
1156 
1157   PhoneNumberMatcher matcher(
1158       phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1159   EXPECT_FALSE(matcher.HasNext());
1160 }
1161 
TEST_F(PhoneNumberMatcherTest,MaxMatchesMixed)1162 TEST_F(PhoneNumberMatcherTest, MaxMatchesMixed) {
1163   // Set up text with 100 valid numbers inside an invalid number.
1164   string numbers;
1165   for (int i = 0; i < 100; ++i) {
1166     numbers.append("My info: 415-666-7777 123 fake street");
1167   }
1168 
1169   PhoneNumber number;
1170   phone_util_.Parse("+14156667777", RegionCode::ZZ(), &number);
1171   std::vector<PhoneNumber> expected(10, number);
1172 
1173   PhoneNumberMatcher matcher(
1174       phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1175   std::vector<PhoneNumber> actual;
1176   PhoneNumberMatch match;
1177   while (matcher.HasNext()) {
1178     matcher.Next(&match);
1179     actual.push_back(match.number());
1180   }
1181   EXPECT_EQ(expected, actual);
1182 }
1183 
TEST_F(PhoneNumberMatcherTest,NonPlusPrefixedNumbersNotFoundForInvalidRegion)1184 TEST_F(PhoneNumberMatcherTest, NonPlusPrefixedNumbersNotFoundForInvalidRegion) {
1185   PhoneNumberMatch match;
1186   scoped_ptr<PhoneNumberMatcher> matcher(
1187       GetMatcherWithLeniency("1 456 764 156", RegionCode::GetUnknown(),
1188                              PhoneNumberMatcher::VALID));
1189   EXPECT_FALSE(matcher->HasNext());
1190   EXPECT_FALSE(matcher->Next(&match));
1191   EXPECT_FALSE(matcher->HasNext());
1192 }
1193 
TEST_F(PhoneNumberMatcherTest,EmptyIteration)1194 TEST_F(PhoneNumberMatcherTest, EmptyIteration) {
1195   PhoneNumberMatch match;
1196   scoped_ptr<PhoneNumberMatcher> matcher(
1197       GetMatcherWithLeniency("", RegionCode::GetUnknown(),
1198                              PhoneNumberMatcher::VALID));
1199   EXPECT_FALSE(matcher->HasNext());
1200   EXPECT_FALSE(matcher->HasNext());
1201   EXPECT_FALSE(matcher->Next(&match));
1202   EXPECT_FALSE(matcher->HasNext());
1203 }
1204 
TEST_F(PhoneNumberMatcherTest,SingleIteration)1205 TEST_F(PhoneNumberMatcherTest, SingleIteration) {
1206   PhoneNumberMatch match;
1207   scoped_ptr<PhoneNumberMatcher> matcher(
1208       GetMatcherWithLeniency("+14156667777", RegionCode::GetUnknown(),
1209                              PhoneNumberMatcher::VALID));
1210 
1211   // Try HasNext() twice to ensure it does not advance.
1212   EXPECT_TRUE(matcher->HasNext());
1213   EXPECT_TRUE(matcher->HasNext());
1214   EXPECT_TRUE(matcher->Next(&match));
1215 
1216   EXPECT_FALSE(matcher->HasNext());
1217   EXPECT_FALSE(matcher->Next(&match));
1218 }
1219 
TEST_F(PhoneNumberMatcherTest,SingleIteration_WithNextOnly)1220 TEST_F(PhoneNumberMatcherTest, SingleIteration_WithNextOnly) {
1221   PhoneNumberMatch match;
1222   scoped_ptr<PhoneNumberMatcher> matcher(
1223       GetMatcherWithLeniency("+14156667777", RegionCode::GetUnknown(),
1224                              PhoneNumberMatcher::VALID));
1225   EXPECT_TRUE(matcher->Next(&match));
1226   EXPECT_FALSE(matcher->Next(&match));
1227 }
1228 
TEST_F(PhoneNumberMatcherTest,DoubleIteration)1229 TEST_F(PhoneNumberMatcherTest, DoubleIteration) {
1230   PhoneNumberMatch match;
1231   scoped_ptr<PhoneNumberMatcher> matcher(
1232       GetMatcherWithLeniency("+14156667777 foobar +14156667777 ",
1233                              RegionCode::GetUnknown(),
1234                              PhoneNumberMatcher::VALID));
1235 
1236   // Double HasNext() to ensure it does not advance.
1237   EXPECT_TRUE(matcher->HasNext());
1238   EXPECT_TRUE(matcher->HasNext());
1239   EXPECT_TRUE(matcher->Next(&match));
1240   EXPECT_TRUE(matcher->HasNext());
1241   EXPECT_TRUE(matcher->HasNext());
1242   EXPECT_TRUE(matcher->Next(&match));
1243 
1244   EXPECT_FALSE(matcher->HasNext());
1245   EXPECT_FALSE(matcher->Next(&match));
1246   EXPECT_FALSE(matcher->HasNext());
1247 }
1248 
TEST_F(PhoneNumberMatcherTest,DoubleIteration_WithNextOnly)1249 TEST_F(PhoneNumberMatcherTest, DoubleIteration_WithNextOnly) {
1250   PhoneNumberMatch match;
1251   scoped_ptr<PhoneNumberMatcher> matcher(
1252       GetMatcherWithLeniency("+14156667777 foobar +14156667777 ",
1253                              RegionCode::GetUnknown(),
1254                              PhoneNumberMatcher::VALID));
1255 
1256   EXPECT_TRUE(matcher->Next(&match));
1257   EXPECT_TRUE(matcher->Next(&match));
1258   EXPECT_FALSE(matcher->Next(&match));
1259 }
1260 
1261 }  // namespace phonenumbers
1262 }  // namespace i18n
1263