1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "phonenumbers/phonenumbermatcher.h"
16
17 #include <string>
18 #include <vector>
19
20 #include <gtest/gtest.h>
21 #include <unicode/unistr.h>
22
23 #include "phonenumbers/base/basictypes.h"
24 #include "phonenumbers/base/memory/scoped_ptr.h"
25 #include "phonenumbers/base/memory/singleton.h"
26 #include "phonenumbers/default_logger.h"
27 #include "phonenumbers/phonenumber.h"
28 #include "phonenumbers/phonenumber.pb.h"
29 #include "phonenumbers/phonenumbermatch.h"
30 #include "phonenumbers/phonenumberutil.h"
31 #include "phonenumbers/stringutil.h"
32 #include "phonenumbers/test_util.h"
33
34 namespace i18n {
35 namespace phonenumbers {
36
37 using std::string;
38 using icu::UnicodeString;
39
40 namespace {
41 // Small class that holds the context of the number we are testing against. The
42 // test will insert the phone number to be found between leading_text and
43 // trailing_text.
44 struct NumberContext {
45 string leading_text_;
46 string trailing_text_;
NumberContexti18n::phonenumbers::__anon197a462c0111::NumberContext47 NumberContext(const string& leading_text, const string& trailing_text)
48 : leading_text_(leading_text),
49 trailing_text_(trailing_text) {
50 }
51 };
52
53 // Small class that holds the number we want to test and the region for which it
54 // should be valid.
55 struct NumberTest {
56 string raw_string_;
57 string region_;
58
ToStringi18n::phonenumbers::__anon197a462c0111::NumberTest59 string ToString() const {
60 return StrCat(raw_string_, " (", region_, ")");
61 }
62
NumberTesti18n::phonenumbers::__anon197a462c0111::NumberTest63 NumberTest(const string& raw_string, const string& region)
64 : raw_string_(raw_string),
65 region_(region) {
66 }
67 };
68 } // namespace
69
70 class PhoneNumberMatcherTest : public testing::Test {
71 protected:
PhoneNumberMatcherTest()72 PhoneNumberMatcherTest()
73 : phone_util_(*PhoneNumberUtil::GetInstance()),
74 matcher_(phone_util_, "",
75 RegionCode::US(),
76 PhoneNumberMatcher::VALID, 5),
77 offset_(0) {
78 PhoneNumberUtil::GetInstance()->SetLogger(new StdoutLogger());
79 }
80
IsLatinLetter(char32 letter)81 bool IsLatinLetter(char32 letter) {
82 return PhoneNumberMatcher::IsLatinLetter(letter);
83 }
84
ContainsMoreThanOneSlashInNationalNumber(const PhoneNumber & phone_number,const string & candidate)85 bool ContainsMoreThanOneSlashInNationalNumber(
86 const PhoneNumber& phone_number, const string& candidate) {
87 return PhoneNumberMatcher::ContainsMoreThanOneSlashInNationalNumber(
88 phone_number, candidate, phone_util_);
89 }
90
ExtractMatch(const string & text,PhoneNumberMatch * match)91 bool ExtractMatch(const string& text, PhoneNumberMatch* match) {
92 return matcher_.ExtractMatch(text, offset_, match);
93 }
94
GetMatcherWithLeniency(const string & text,const string & region,PhoneNumberMatcher::Leniency leniency) const95 PhoneNumberMatcher* GetMatcherWithLeniency(
96 const string& text, const string& region,
97 PhoneNumberMatcher::Leniency leniency) const {
98 return new PhoneNumberMatcher(phone_util_, text, region, leniency,
99 100 /* max_tries */);
100 }
101
102 // Tests each number in the test cases provided is found in its entirety for
103 // the specified leniency level.
DoTestNumberMatchesForLeniency(const std::vector<NumberTest> & test_cases,PhoneNumberMatcher::Leniency leniency) const104 void DoTestNumberMatchesForLeniency(
105 const std::vector<NumberTest>& test_cases,
106 PhoneNumberMatcher::Leniency leniency) const {
107 scoped_ptr<PhoneNumberMatcher> matcher;
108 for (std::vector<NumberTest>::const_iterator test = test_cases.begin();
109 test != test_cases.end(); ++test) {
110 matcher.reset(GetMatcherWithLeniency(
111 test->raw_string_, test->region_, leniency));
112 EXPECT_TRUE(matcher->HasNext())
113 << "No match found in " << test->ToString()
114 << " for leniency: " << leniency;
115 if (matcher->HasNext()) {
116 PhoneNumberMatch match;
117 matcher->Next(&match);
118 EXPECT_EQ(test->raw_string_, match.raw_string())
119 << "Found wrong match in test " << test->ToString()
120 << ". Found " << match.raw_string();
121 }
122 }
123 }
124
125 // Tests no number in the test cases provided is found for the specified
126 // leniency level.
DoTestNumberNonMatchesForLeniency(const std::vector<NumberTest> & test_cases,PhoneNumberMatcher::Leniency leniency) const127 void DoTestNumberNonMatchesForLeniency(
128 const std::vector<NumberTest>& test_cases,
129 PhoneNumberMatcher::Leniency leniency) const {
130 scoped_ptr<PhoneNumberMatcher> matcher;
131 for (std::vector<NumberTest>::const_iterator test = test_cases.begin();
132 test != test_cases.end(); ++test) {
133 matcher.reset(GetMatcherWithLeniency(
134 test->raw_string_, test->region_, leniency));
135 EXPECT_FALSE(matcher->HasNext()) << "Match found in " << test->ToString()
136 << " for leniency: " << leniency;
137 }
138 }
139
140 // Asserts that the raw string and expected proto buffer for a match are set
141 // appropriately.
AssertMatchProperties(const PhoneNumberMatch & match,const string & text,const string & number,const string & region_code)142 void AssertMatchProperties(const PhoneNumberMatch& match, const string& text,
143 const string& number, const string& region_code) {
144 PhoneNumber expected_result;
145 phone_util_.Parse(number, region_code, &expected_result);
146
147 EXPECT_EQ(expected_result, match.number());
148 EXPECT_EQ(number, match.raw_string()) << " Wrong number found in " << text;
149 }
150
151 // Asserts that another number can be found in "text" starting at "index", and
152 // that its corresponding range is [start, end).
AssertEqualRange(const string & text,int index,int start,int end)153 void AssertEqualRange(const string& text, int index, int start, int end) {
154 string sub = text.substr(index);
155 PhoneNumberMatcher matcher(phone_util_, sub, RegionCode::NZ(),
156 PhoneNumberMatcher::POSSIBLE,
157 1000000 /* max_tries */);
158 PhoneNumberMatch match;
159 ASSERT_TRUE(matcher.HasNext());
160 matcher.Next(&match);
161 EXPECT_EQ(start - index, match.start());
162 EXPECT_EQ(end - index, match.end());
163 EXPECT_EQ(sub.substr(match.start(), match.length()), match.raw_string());
164 }
165
166 // Tests numbers found by the PhoneNumberMatcher in various textual contexts.
DoTestFindInContext(const string & number,const string & default_country)167 void DoTestFindInContext(const string& number,
168 const string& default_country) {
169 FindPossibleInContext(number, default_country);
170
171 PhoneNumber parsed;
172 phone_util_.Parse(number, default_country, &parsed);
173 if (phone_util_.IsValidNumber(parsed)) {
174 FindValidInContext(number, default_country);
175 }
176 }
177
178 // Helper method which tests the contexts provided and ensures that:
179 // -- if is_valid is true, they all find a test number inserted in the middle
180 // when leniency of matching is set to VALID; else no test number should be
181 // extracted at that leniency level
182 // -- if is_possible is true, they all find a test number inserted in the
183 // middle when leniency of matching is set to POSSIBLE; else no test number
184 // should be extracted at that leniency level
FindMatchesInContexts(const std::vector<NumberContext> & contexts,bool is_valid,bool is_possible,const string & region,const string & number)185 void FindMatchesInContexts(const std::vector<NumberContext>& contexts,
186 bool is_valid, bool is_possible,
187 const string& region, const string& number) {
188 if (is_valid) {
189 DoTestInContext(number, region, contexts, PhoneNumberMatcher::VALID);
190 } else {
191 for (std::vector<NumberContext>::const_iterator it = contexts.begin();
192 it != contexts.end(); ++it) {
193 string text = StrCat(it->leading_text_, number, it->trailing_text_);
194 PhoneNumberMatcher matcher(text, region);
195 EXPECT_FALSE(matcher.HasNext());
196 }
197 }
198 if (is_possible) {
199 DoTestInContext(number, region, contexts, PhoneNumberMatcher::POSSIBLE);
200 } else {
201 for (std::vector<NumberContext>::const_iterator it = contexts.begin();
202 it != contexts.end(); ++it) {
203 string text = StrCat(it->leading_text_, number, it->trailing_text_);
204 PhoneNumberMatcher matcher(phone_util_, text, region,
205 PhoneNumberMatcher::POSSIBLE,
206 10000); // Number of matches.
207 EXPECT_FALSE(matcher.HasNext());
208 }
209 }
210 }
211
212 // Variant of FindMatchesInContexts that uses a default number and region.
FindMatchesInContexts(const std::vector<NumberContext> & contexts,bool is_valid,bool is_possible)213 void FindMatchesInContexts(const std::vector<NumberContext>& contexts,
214 bool is_valid, bool is_possible) {
215 const string& region = RegionCode::US();
216 const string number("415-666-7777");
217
218 FindMatchesInContexts(contexts, is_valid, is_possible, region, number);
219 }
220
221 // Tests valid numbers in contexts that should pass for
222 // PhoneNumberMatcher::POSSIBLE.
FindPossibleInContext(const string & number,const string & default_country)223 void FindPossibleInContext(const string& number,
224 const string& default_country) {
225 std::vector<NumberContext> context_pairs;
226 context_pairs.push_back(NumberContext("", "")); // no context
227 context_pairs.push_back(NumberContext(" ", "\t")); // whitespace only
228 context_pairs.push_back(NumberContext("Hello ", "")); // no context at end
229 // No context at start.
230 context_pairs.push_back(NumberContext("", " to call me!"));
231 context_pairs.push_back(NumberContext("Hi there, call ", " to reach me!"));
232 // With commas.
233 context_pairs.push_back(NumberContext("Hi there, call ", ", or don't"));
234 // Three examples without whitespace around the number.
235 context_pairs.push_back(NumberContext("Hi call", ""));
236 context_pairs.push_back(NumberContext("", "forme"));
237 context_pairs.push_back(NumberContext("Hi call", "forme"));
238 // With other small numbers.
239 context_pairs.push_back(NumberContext("It's cheap! Call ", " before 6:30"));
240 // With a second number later.
241 context_pairs.push_back(NumberContext("Call ", " or +1800-123-4567!"));
242 // With a Month-Day date.
243 context_pairs.push_back(NumberContext("Call me on June 2 at", ""));
244 // With publication pages.
245 context_pairs.push_back(NumberContext(
246 "As quoted by Alfonso 12-15 (2009), you may call me at ", ""));
247 context_pairs.push_back(NumberContext(
248 "As quoted by Alfonso et al. 12-15 (2009), you may call me at ", ""));
249 // With dates, written in the American style.
250 context_pairs.push_back(NumberContext(
251 "As I said on 03/10/2011, you may call me at ", ""));
252 // With trailing numbers after a comma. The 45 should not be considered an
253 // extension.
254 context_pairs.push_back(NumberContext("", ", 45 days a year"));
255 // When matching we don't consider semicolon along with legitimate extension
256 // symbol to indicate an extension. The 7246433 should not be considered an
257 // extension.
258 context_pairs.push_back(NumberContext("", ";x 7246433"));
259 // With a postfix stripped off as it looks like the start of another number.
260 context_pairs.push_back(NumberContext("Call ", "/x12 more"));
261
262 DoTestInContext(number, default_country, context_pairs,
263 PhoneNumberMatcher::POSSIBLE);
264 }
265
266 // Tests valid numbers in contexts that fail for PhoneNumberMatcher::POSSIBLE
267 // but are valid for PhoneNumberMatcher::VALID.
FindValidInContext(const string & number,const string & default_country)268 void FindValidInContext(const string& number, const string& default_country) {
269 std::vector<NumberContext> context_pairs;
270 // With other small numbers.
271 context_pairs.push_back(NumberContext("It's only 9.99! Call ", " to buy"));
272 // With a number Day.Month.Year date.
273 context_pairs.push_back(NumberContext("Call me on 21.6.1984 at ", ""));
274 // With a number Month/Day date.
275 context_pairs.push_back(NumberContext("Call me on 06/21 at ", ""));
276 // With a number Day.Month date.
277 context_pairs.push_back(NumberContext("Call me on 21.6. at ", ""));
278 // With a number Month/Day/Year date.
279 context_pairs.push_back(NumberContext("Call me on 06/21/84 at ", ""));
280
281 DoTestInContext(number, default_country, context_pairs,
282 PhoneNumberMatcher::VALID);
283 }
284
DoTestInContext(const string & number,const string & default_country,const std::vector<NumberContext> & context_pairs,PhoneNumberMatcher::Leniency leniency)285 void DoTestInContext(const string& number, const string& default_country,
286 const std::vector<NumberContext>& context_pairs,
287 PhoneNumberMatcher::Leniency leniency) {
288 for (std::vector<NumberContext>::const_iterator it = context_pairs.begin();
289 it != context_pairs.end(); ++it) {
290 string prefix = it->leading_text_;
291 string text = StrCat(prefix, number, it->trailing_text_);
292
293 int start = prefix.length();
294 int end = start + number.length();
295 PhoneNumberMatcher matcher(phone_util_, text, default_country, leniency,
296 1000000 /* max_tries */);
297 PhoneNumberMatch match;
298 ASSERT_TRUE(matcher.HasNext())
299 << "Did not find a number in '" << text << "'; expected '"
300 << number << "'";
301 matcher.Next(&match);
302
303 string extracted = text.substr(match.start(), match.length());
304 EXPECT_EQ(start, match.start());
305 EXPECT_EQ(end, match.end());
306 EXPECT_EQ(number, extracted);
307 EXPECT_EQ(extracted, match.raw_string())
308 << "Unexpected phone region in '" << text << "'; extracted '"
309 << extracted << "'";
310 EnsureTermination(text, default_country, leniency);
311 }
312 }
313
314 // Exhaustively searches for phone numbers from each index within "text" to
315 // test that finding matches always terminates.
EnsureTermination(const string & text,const string & default_country,PhoneNumberMatcher::Leniency leniency)316 void EnsureTermination(const string& text, const string& default_country,
317 PhoneNumberMatcher::Leniency leniency) {
318 for (size_t index = 0; index <= text.length(); ++index) {
319 string sub = text.substr(index);
320 // Iterates over all matches.
321 PhoneNumberMatcher matcher(phone_util_, text, default_country, leniency,
322 1000000 /* max_tries */);
323 string matches;
324 PhoneNumberMatch match;
325 int match_count = 0;
326 while (matcher.HasNext()) {
327 matcher.Next(&match);
328 StrAppend(&matches, ",", match.ToString());
329 ++match_count;
330 }
331 // We should not ever find more than 10 matches in a single candidate text
332 // in these test cases, so we check here that the matcher was limited by
333 // the number of matches, rather than by max_tries.
334 ASSERT_LT(match_count, 10);
335 }
336 }
337
338 const PhoneNumberUtil& phone_util_;
339
340 private:
341 PhoneNumberMatcher matcher_;
342 int offset_;
343 };
344
TEST_F(PhoneNumberMatcherTest,ContainsMoreThanOneSlashInNationalNumber)345 TEST_F(PhoneNumberMatcherTest, ContainsMoreThanOneSlashInNationalNumber) {
346 // A date should return true.
347 PhoneNumber number;
348 number.set_country_code(1);
349 number.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY);
350 string candidate = "1/05/2013";
351 EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
352
353 // Here, the country code source thinks it started with a country calling
354 // code, but this is not the same as the part before the slash, so it's still
355 // true.
356 number.Clear();
357 number.set_country_code(274);
358 number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
359 candidate = "27/4/2013";
360 EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
361
362 // Now it should be false, because the first slash is after the country
363 // calling code.
364 number.Clear();
365 number.set_country_code(49);
366 number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN);
367 candidate = "49/69/2013";
368 EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
369
370 number.Clear();
371 number.set_country_code(49);
372 number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
373 candidate = "+49/69/2013";
374 EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
375
376 candidate = "+ 49/69/2013";
377 EXPECT_FALSE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
378
379 candidate = "+ 49/69/20/13";
380 EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
381
382 // Here, the first group is not assumed to be the country calling code, even
383 // though it is the same as it, so this should return true.
384 number.Clear();
385 number.set_country_code(49);
386 number.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY);
387 candidate = "49/69/2013";
388 EXPECT_TRUE(ContainsMoreThanOneSlashInNationalNumber(number, candidate));
389 }
390
391 // See PhoneNumberUtilTest::ParseNationalNumber.
TEST_F(PhoneNumberMatcherTest,FindNationalNumber)392 TEST_F(PhoneNumberMatcherTest, FindNationalNumber) {
393 // Same cases as in ParseNationalNumber.
394 DoTestFindInContext("033316005", RegionCode::NZ());
395 // "33316005", RegionCode::NZ() is omitted since the national-prefix is
396 // obligatory for these types of numbers in New Zealand.
397 // National prefix attached and some formatting present.
398 DoTestFindInContext("03-331 6005", RegionCode::NZ());
399 DoTestFindInContext("03 331 6005", RegionCode::NZ());
400 // Testing international prefixes.
401 // Should strip country code.
402 DoTestFindInContext("0064 3 331 6005", RegionCode::NZ());
403 // Try again, but this time we have an international number with Region Code
404 // US. It should recognize the country code and parse accordingly.
405 DoTestFindInContext("01164 3 331 6005", RegionCode::US());
406 DoTestFindInContext("+64 3 331 6005", RegionCode::US());
407
408 DoTestFindInContext("64(0)64123456", RegionCode::NZ());
409 // Check that using a "/" is fine in a phone number.
410 // Note that real Polish numbers do *not* start with a 0.
411 DoTestFindInContext("0123/456789", RegionCode::PL());
412 DoTestFindInContext("123-456-7890", RegionCode::US());
413 }
414
415 // See PhoneNumberUtilTest::ParseWithInternationalPrefixes.
TEST_F(PhoneNumberMatcherTest,FindWithInternationalPrefixes)416 TEST_F(PhoneNumberMatcherTest, FindWithInternationalPrefixes) {
417 DoTestFindInContext("+1 (650) 333-6000", RegionCode::NZ());
418 DoTestFindInContext("1-650-333-6000", RegionCode::US());
419 // Calling the US number from Singapore by using different service providers
420 // 1st test: calling using SingTel IDD service (IDD is 001)
421 DoTestFindInContext("0011-650-333-6000", RegionCode::SG());
422 // 2nd test: calling using StarHub IDD service (IDD is 008)
423 DoTestFindInContext("0081-650-333-6000", RegionCode::SG());
424 // 3rd test: calling using SingTel V019 service (IDD is 019)
425 DoTestFindInContext("0191-650-333-6000", RegionCode::SG());
426 // Calling the US number from Poland
427 DoTestFindInContext("0~01-650-333-6000", RegionCode::PL());
428 // Using "++" at the start.
429 DoTestFindInContext("++1 (650) 333-6000", RegionCode::PL());
430 // Using a full-width plus sign.
431 DoTestFindInContext(
432 "\xEF\xBC\x8B""1 (650) 333-6000" /* "+1 (650) 333-6000" */,
433 RegionCode::SG());
434 // The whole number, including punctuation, is here represented in full-width
435 // form.
436 DoTestFindInContext(
437 /* "+1 (650) 333-6000" */
438 "\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95"
439 "\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93"
440 "\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
441 RegionCode::SG());
442 }
443
444 // See PhoneNumberUtilTest::ParseWithLeadingZero.
TEST_F(PhoneNumberMatcherTest,FindWithLeadingZero)445 TEST_F(PhoneNumberMatcherTest, FindWithLeadingZero) {
446 DoTestFindInContext("+39 02-36618 300", RegionCode::NZ());
447 DoTestFindInContext("02-36618 300", RegionCode::IT());
448 DoTestFindInContext("312 345 678", RegionCode::IT());
449 }
450
451 // See PhoneNumberUtilTest::ParseNationalNumberArgentina.
TEST_F(PhoneNumberMatcherTest,FindNationalNumberArgentina)452 TEST_F(PhoneNumberMatcherTest, FindNationalNumberArgentina) {
453 // Test parsing mobile numbers of Argentina.
454 DoTestFindInContext("+54 9 343 555 1212", RegionCode::AR());
455 DoTestFindInContext("0343 15 555 1212", RegionCode::AR());
456
457 DoTestFindInContext("+54 9 3715 65 4320", RegionCode::AR());
458 DoTestFindInContext("03715 15 65 4320", RegionCode::AR());
459
460 // Test parsing fixed-line numbers of Argentina.
461 DoTestFindInContext("+54 11 3797 0000", RegionCode::AR());
462 DoTestFindInContext("011 3797 0000", RegionCode::AR());
463
464 DoTestFindInContext("+54 3715 65 4321", RegionCode::AR());
465 DoTestFindInContext("03715 65 4321", RegionCode::AR());
466
467 DoTestFindInContext("+54 23 1234 0000", RegionCode::AR());
468 DoTestFindInContext("023 1234 0000", RegionCode::AR());
469 }
470
471 // See PhoneNumberMatcherTest::ParseWithXInNumber.
TEST_F(PhoneNumberMatcherTest,FindWithXInNumber)472 TEST_F(PhoneNumberMatcherTest, FindWithXInNumber) {
473 DoTestFindInContext("(0xx) 123456789", RegionCode::AR());
474 // A case where x denotes both carrier codes and extension symbol.
475 DoTestFindInContext("(0xx) 123456789 x 1234", RegionCode::AR());
476
477 // This test is intentionally constructed such that the number of digit after
478 // xx is larger than 7, so that the number won't be mistakenly treated as an
479 // extension, as we allow extensions up to 7 digits. This assumption is okay
480 // for now as all the countries where a carrier selection code is written in
481 // the form of xx have a national significant number of length larger than 7.
482 DoTestFindInContext("011xx5481429712", RegionCode::US());
483 }
484
485 // See PhoneNumberUtilTest::ParseNumbersMexico.
TEST_F(PhoneNumberMatcherTest,FindNumbersMexico)486 TEST_F(PhoneNumberMatcherTest, FindNumbersMexico) {
487 // Test parsing fixed-line numbers of Mexico.
488 DoTestFindInContext("+52 (449)978-0001", RegionCode::MX());
489 DoTestFindInContext("01 (449)978-0001", RegionCode::MX());
490 DoTestFindInContext("(449)978-0001", RegionCode::MX());
491
492 // Test parsing mobile numbers of Mexico.
493 DoTestFindInContext("+52 1 33 1234-5678", RegionCode::MX());
494 DoTestFindInContext("044 (33) 1234-5678", RegionCode::MX());
495 DoTestFindInContext("045 33 1234-5678", RegionCode::MX());
496 }
497
498 // See PhoneNumberUtilTest::ParseNumbersWithPlusWithNoRegion.
TEST_F(PhoneNumberMatcherTest,FindNumbersWithPlusWithNoRegion)499 TEST_F(PhoneNumberMatcherTest, FindNumbersWithPlusWithNoRegion) {
500 // RegionCode::ZZ() is allowed only if the number starts with a '+' - then the
501 // country code can be calculated.
502 DoTestFindInContext("+64 3 331 6005", RegionCode::ZZ());
503 }
504
505 // See PhoneNumberUtilTest::ParseExtensions.
TEST_F(PhoneNumberMatcherTest,FindExtensions)506 TEST_F(PhoneNumberMatcherTest, FindExtensions) {
507 DoTestFindInContext("03 331 6005 ext 3456", RegionCode::NZ());
508 DoTestFindInContext("03-3316005x3456", RegionCode::NZ());
509 DoTestFindInContext("03-3316005 int.3456", RegionCode::NZ());
510 DoTestFindInContext("03 3316005 #3456", RegionCode::NZ());
511 DoTestFindInContext("0~0 1800 7493 524", RegionCode::PL());
512 DoTestFindInContext("(1800) 7493.524", RegionCode::US());
513 // Check that the last instance of an extension token is matched.
514 DoTestFindInContext("0~0 1800 7493 524 ~1234", RegionCode::PL());
515 // Verifying bug-fix where the last digit of a number was previously omitted
516 // if it was a 0 when extracting the extension. Also verifying a few different
517 // cases of extensions.
518 DoTestFindInContext("+44 2034567890x456", RegionCode::NZ());
519 DoTestFindInContext("+44 2034567890x456", RegionCode::GB());
520 DoTestFindInContext("+44 2034567890 x456", RegionCode::GB());
521 DoTestFindInContext("+44 2034567890 X456", RegionCode::GB());
522 DoTestFindInContext("+44 2034567890 X 456", RegionCode::GB());
523 DoTestFindInContext("+44 2034567890 X 456", RegionCode::GB());
524 DoTestFindInContext("+44 2034567890 X 456", RegionCode::GB());
525
526 DoTestFindInContext("(800) 901-3355 x 7246433", RegionCode::US());
527 DoTestFindInContext("(800) 901-3355 , ext 7246433", RegionCode::US());
528 DoTestFindInContext("(800) 901-3355 ,extension 7246433", RegionCode::US());
529 // The next test differs from PhoneNumberUtil -> when matching we don't
530 // consider a lone comma to indicate an extension, although we accept it when
531 // parsing.
532 DoTestFindInContext("(800) 901-3355 ,x 7246433", RegionCode::US());
533 DoTestFindInContext("(800) 901-3355 ext: 7246433", RegionCode::US());
534 }
535
TEST_F(PhoneNumberMatcherTest,FindInterspersedWithSpace)536 TEST_F(PhoneNumberMatcherTest, FindInterspersedWithSpace) {
537 DoTestFindInContext("0 3 3 3 1 6 0 0 5", RegionCode::NZ());
538 }
539
540 // Test matching behavior when starting in the middle of a phone number.
TEST_F(PhoneNumberMatcherTest,IntermediateParsePositions)541 TEST_F(PhoneNumberMatcherTest, IntermediateParsePositions) {
542 string text = "Call 033316005 or 032316005!";
543 // | | | | | |
544 // 0 5 10 15 20 25
545
546 // Iterate over all possible indices.
547 for (int i = 0; i <= 5; ++i) {
548 AssertEqualRange(text, i, 5, 14);
549 }
550 // 7 and 8 digits in a row are still parsed as number.
551 AssertEqualRange(text, 6, 6, 14);
552 AssertEqualRange(text, 7, 7, 14);
553 // Anything smaller is skipped to the second instance.
554 for (int i = 8; i <= 19; ++i) {
555 AssertEqualRange(text, i, 19, 28);
556 }
557 }
558
TEST_F(PhoneNumberMatcherTest,FourMatchesInARow)559 TEST_F(PhoneNumberMatcherTest, FourMatchesInARow) {
560 string number1 = "415-666-7777";
561 string number2 = "800-443-1223";
562 string number3 = "212-443-1223";
563 string number4 = "650-443-1223";
564 string text = StrCat(number1, " - ", number2, " - ", number3, " - ", number4);
565
566 PhoneNumberMatcher matcher(text, RegionCode::US());
567 PhoneNumberMatch match;
568
569 EXPECT_TRUE(matcher.HasNext());
570 EXPECT_TRUE(matcher.Next(&match));
571 AssertMatchProperties(match, text, number1, RegionCode::US());
572
573 EXPECT_TRUE(matcher.HasNext());
574 EXPECT_TRUE(matcher.Next(&match));
575 AssertMatchProperties(match, text, number2, RegionCode::US());
576
577 EXPECT_TRUE(matcher.HasNext());
578 EXPECT_TRUE(matcher.Next(&match));
579 AssertMatchProperties(match, text, number3, RegionCode::US());
580
581 EXPECT_TRUE(matcher.HasNext());
582 EXPECT_TRUE(matcher.Next(&match));
583 AssertMatchProperties(match, text, number4, RegionCode::US());
584 }
585
TEST_F(PhoneNumberMatcherTest,MatchesFoundWithMultipleSpaces)586 TEST_F(PhoneNumberMatcherTest, MatchesFoundWithMultipleSpaces) {
587 string number1 = "415-666-7777";
588 string number2 = "800-443-1223";
589 string text = StrCat(number1, " ", number2);
590
591 PhoneNumberMatcher matcher(text, RegionCode::US());
592 PhoneNumberMatch match;
593
594 EXPECT_TRUE(matcher.HasNext());
595 EXPECT_TRUE(matcher.Next(&match));
596 AssertMatchProperties(match, text, number1, RegionCode::US());
597
598 EXPECT_TRUE(matcher.HasNext());
599 EXPECT_TRUE(matcher.Next(&match));
600 AssertMatchProperties(match, text, number2, RegionCode::US());
601 }
602
TEST_F(PhoneNumberMatcherTest,MatchWithSurroundingZipcodes)603 TEST_F(PhoneNumberMatcherTest, MatchWithSurroundingZipcodes) {
604 string number = "415-666-7777";
605 string zip_preceding =
606 StrCat("My address is CA 34215 - ", number, " is my number.");
607 PhoneNumber expected_result;
608 phone_util_.Parse(number, RegionCode::US(), &expected_result);
609
610 scoped_ptr<PhoneNumberMatcher> matcher(
611 GetMatcherWithLeniency(zip_preceding, RegionCode::US(),
612 PhoneNumberMatcher::VALID));
613
614 PhoneNumberMatch match;
615 EXPECT_TRUE(matcher->HasNext());
616 EXPECT_TRUE(matcher->Next(&match));
617 AssertMatchProperties(match, zip_preceding, number, RegionCode::US());
618
619 // Now repeat, but this time the phone number has spaces in it. It should
620 // still be found.
621 number = "(415) 666 7777";
622
623 string zip_following =
624 StrCat("My number is ", number, ". 34215 is my zip-code.");
625 matcher.reset(
626 GetMatcherWithLeniency(zip_following, RegionCode::US(),
627 PhoneNumberMatcher::VALID));
628
629 PhoneNumberMatch match_with_spaces;
630 EXPECT_TRUE(matcher->HasNext());
631 EXPECT_TRUE(matcher->Next(&match_with_spaces));
632 AssertMatchProperties(
633 match_with_spaces, zip_following, number, RegionCode::US());
634 }
635
TEST_F(PhoneNumberMatcherTest,IsLatinLetter)636 TEST_F(PhoneNumberMatcherTest, IsLatinLetter) {
637 EXPECT_TRUE(IsLatinLetter('c'));
638 EXPECT_TRUE(IsLatinLetter('C'));
639 EXPECT_TRUE(IsLatinLetter(UnicodeString::fromUTF8("\xC3\x89" /* "É" */)[0]));
640 // Combining acute accent.
641 EXPECT_TRUE(IsLatinLetter(UnicodeString::fromUTF8("\xCC\x81")[0]));
642 EXPECT_FALSE(IsLatinLetter(':'));
643 EXPECT_FALSE(IsLatinLetter('5'));
644 EXPECT_FALSE(IsLatinLetter('-'));
645 EXPECT_FALSE(IsLatinLetter('.'));
646 EXPECT_FALSE(IsLatinLetter(' '));
647 EXPECT_FALSE(
648 IsLatinLetter(UnicodeString::fromUTF8("\xE6\x88\x91" /* "我" */)[0]));
649 /* Hiragana letter no (の) - this should neither seem to start or end with a
650 Latin letter. */
651 EXPECT_FALSE(IsLatinLetter(UnicodeString::fromUTF8("\xE3\x81\xAE")[0]));
652 EXPECT_FALSE(IsLatinLetter(UnicodeString::fromUTF8("\xE3\x81\xAE")[2]));
653 }
654
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingLatinChars)655 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingLatinChars) {
656 std::vector<NumberContext> possible_only_contexts;
657 possible_only_contexts.push_back(NumberContext("abc", "def"));
658 possible_only_contexts.push_back(NumberContext("abc", ""));
659 possible_only_contexts.push_back(NumberContext("", "def"));
660 possible_only_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, ""));
661 // e with an acute accent decomposed (with combining mark).
662 possible_only_contexts.push_back(
663 NumberContext("\x20\x22\xCC\x81""e\xCC\x81" /* "́e\xCC\x81" */, ""));
664
665 // Numbers should not be considered valid, if they are surrounded by Latin
666 // characters, but should be considered possible.
667 FindMatchesInContexts(possible_only_contexts, false, true);
668 }
669
TEST_F(PhoneNumberMatcherTest,MoneyNotSeenAsPhoneNumber)670 TEST_F(PhoneNumberMatcherTest, MoneyNotSeenAsPhoneNumber) {
671 std::vector<NumberContext> possible_only_contexts;
672 possible_only_contexts.push_back(NumberContext("$", ""));
673 possible_only_contexts.push_back(NumberContext("", "$"));
674 possible_only_contexts.push_back(NumberContext("\xC2\xA3" /* "£" */, ""));
675 possible_only_contexts.push_back(NumberContext("\xC2\xA5" /* "¥" */, ""));
676 FindMatchesInContexts(possible_only_contexts, false, true);
677 }
678
TEST_F(PhoneNumberMatcherTest,PercentageNotSeenAsPhoneNumber)679 TEST_F(PhoneNumberMatcherTest, PercentageNotSeenAsPhoneNumber) {
680 std::vector<NumberContext> possible_only_contexts;
681 possible_only_contexts.push_back(NumberContext("", "%"));
682 // Numbers followed by % should be dropped.
683 FindMatchesInContexts(possible_only_contexts, false, true);
684 }
685
TEST_F(PhoneNumberMatcherTest,PhoneNumberWithLeadingOrTrailingMoneyMatches)686 TEST_F(PhoneNumberMatcherTest, PhoneNumberWithLeadingOrTrailingMoneyMatches) {
687 std::vector<NumberContext> contexts;
688 contexts.push_back(NumberContext("$20 ", ""));
689 contexts.push_back(NumberContext("", " 100$"));
690 // Because of the space after the 20 (or before the 100) these dollar amounts
691 // should not stop the actual number from being found.
692 FindMatchesInContexts(contexts, true, true);
693 }
694
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingLatinCharsAndLeadingPunctuation)695 TEST_F(PhoneNumberMatcherTest,
696 MatchesWithSurroundingLatinCharsAndLeadingPunctuation) {
697 std::vector<NumberContext> possible_only_contexts;
698 // Contexts with trailing characters. Leading characters are okay here since
699 // the numbers we will insert start with punctuation, but trailing characters
700 // are still not allowed.
701 possible_only_contexts.push_back(NumberContext("abc", "def"));
702 possible_only_contexts.push_back(NumberContext("", "def"));
703 possible_only_contexts.push_back(NumberContext("", "\xC3\x89" /* "É" */));
704
705 // Numbers should not be considered valid, if they have trailing Latin
706 // characters, but should be considered possible.
707 string number_with_plus = "+14156667777";
708 string number_with_brackets = "(415)6667777";
709 FindMatchesInContexts(possible_only_contexts, false, true, RegionCode::US(),
710 number_with_plus);
711 FindMatchesInContexts(possible_only_contexts, false, true, RegionCode::US(),
712 number_with_brackets);
713
714 std::vector<NumberContext> valid_contexts;
715 valid_contexts.push_back(NumberContext("abc", ""));
716 valid_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, ""));
717 valid_contexts.push_back(
718 NumberContext("\xC3\x89" /* "É" */, ".")); // Trailing punctuation.
719 // Trailing white-space.
720 valid_contexts.push_back(NumberContext("\xC3\x89" /* "É" */, " def"));
721
722 // Numbers should be considered valid, since they start with punctuation.
723 FindMatchesInContexts(valid_contexts, true, true, RegionCode::US(),
724 number_with_plus);
725 FindMatchesInContexts(valid_contexts, true, true, RegionCode::US(),
726 number_with_brackets);
727 }
728
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingChineseChars)729 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingChineseChars) {
730 std::vector<NumberContext> valid_contexts;
731 valid_contexts.push_back(NumberContext(
732 /* "我的电话号码是" */
733 "\xE6\x88\x91\xE7\x9A\x84\xE7\x94\xB5\xE8\xAF\x9D\xE5\x8F\xB7\xE7\xA0\x81"
734 "\xE6\x98\xAF", ""));
735 valid_contexts.push_back(NumberContext(
736 "",
737 /* "是我的电话号码" */
738 "\xE6\x98\xAF\xE6\x88\x91\xE7\x9A\x84\xE7\x94\xB5\xE8\xAF\x9D\xE5\x8F\xB7"
739 "\xE7\xA0\x81"));
740 valid_contexts.push_back(NumberContext(
741 "\xE8\xAF\xB7\xE6\x8B\xA8\xE6\x89\x93" /* "请拨打" */,
742 "\xE6\x88\x91\xE5\x9C\xA8\xE6\x98\x8E\xE5\xA4\xA9" /* "我在明天" */));
743
744 // Numbers should be considered valid, since they are surrounded by Chinese.
745 FindMatchesInContexts(valid_contexts, true, true);
746 }
747
TEST_F(PhoneNumberMatcherTest,MatchesWithSurroundingPunctuation)748 TEST_F(PhoneNumberMatcherTest, MatchesWithSurroundingPunctuation) {
749 std::vector<NumberContext> valid_contexts;
750 // At end of text.
751 valid_contexts.push_back(NumberContext("My number-", ""));
752 // At start of text.
753 valid_contexts.push_back(NumberContext("", ".Nice day."));
754 // Punctuation surround number.
755 valid_contexts.push_back(NumberContext("Tel:", "."));
756 // White-space is also fine.
757 valid_contexts.push_back(NumberContext("Tel: ", " on Saturdays."));
758
759 // Numbers should be considered valid, since they are surrounded by
760 // punctuation.
761 FindMatchesInContexts(valid_contexts, true, true);
762 }
763
TEST_F(PhoneNumberMatcherTest,MatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation)764 TEST_F(PhoneNumberMatcherTest,
765 MatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation) {
766 const string text = "Call 650-253-4561 -- 455-234-3451";
767 const string& region = RegionCode::US();
768 PhoneNumber number1;
769 number1.set_country_code(phone_util_.GetCountryCodeForRegion(region));
770 number1.set_national_number(6502534561ULL);
771 PhoneNumberMatch match1(5, "650-253-4561", number1);
772
773 PhoneNumber number2;
774 number2.set_country_code(phone_util_.GetCountryCodeForRegion(region));
775 number2.set_national_number(4552343451ULL);
776 PhoneNumberMatch match2(21, "455-234-3451", number2);
777
778 PhoneNumberMatcher matcher(
779 phone_util_, text, region, PhoneNumberMatcher::VALID, 100);
780
781 PhoneNumberMatch actual_match1;
782 PhoneNumberMatch actual_match2;
783 matcher.Next(&actual_match1);
784 matcher.Next(&actual_match2);
785 EXPECT_TRUE(match1.Equals(actual_match1))
786 << "Got: " << actual_match1.ToString();
787 EXPECT_TRUE(match2.Equals(actual_match2))
788 << "Got: " << actual_match2.ToString();
789 }
790
TEST_F(PhoneNumberMatcherTest,DoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace)791 TEST_F(PhoneNumberMatcherTest,
792 DoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace) {
793 const string text = "Call 650-253-4561--455-234-3451";
794 const string& region = RegionCode::US();
795 PhoneNumberMatcher matcher(
796 phone_util_, text, region, PhoneNumberMatcher::VALID, 100);
797 EXPECT_FALSE(matcher.HasNext());
798 }
799
800 // Strings with number-like things that shouldn't be found under any level.
801 static const NumberTest kImpossibleCases[] = {
802 NumberTest("12345", RegionCode::US()),
803 NumberTest("23456789", RegionCode::US()),
804 NumberTest("234567890112", RegionCode::US()),
805 NumberTest("650+253+1234", RegionCode::US()),
806 NumberTest("3/10/1984", RegionCode::CA()),
807 NumberTest("03/27/2011", RegionCode::US()),
808 NumberTest("31/8/2011", RegionCode::US()),
809 NumberTest("1/12/2011", RegionCode::US()),
810 NumberTest("10/12/82", RegionCode::DE()),
811 NumberTest("650x2531234", RegionCode::US()),
812 NumberTest("2012-01-02 08:00", RegionCode::US()),
813 NumberTest("2012/01/02 08:00", RegionCode::US()),
814 NumberTest("20120102 08:00", RegionCode::US()),
815 NumberTest("2014-04-12 04:04 PM", RegionCode::US()),
816 NumberTest("2014-04-12 04:04 PM", RegionCode::US()),
817 NumberTest("2014-04-12 04:04 PM", RegionCode::US()),
818 NumberTest("2014-04-12 04:04 PM", RegionCode::US()),
819 };
820
821 // Strings with number-like things that should only be found under "possible".
822 static const NumberTest kPossibleOnlyCases[] = {
823 // US numbers cannot start with 7 in the test metadata to be valid.
824 NumberTest("7121115678", RegionCode::US()),
825 // 'X' should not be found in numbers at leniencies stricter than POSSIBLE,
826 // unless it represents a carrier code or extension.
827 NumberTest("1650 x 253 - 1234", RegionCode::US()),
828 NumberTest("650 x 253 - 1234", RegionCode::US()),
829 NumberTest("6502531x234", RegionCode::US()),
830 NumberTest("(20) 3346 1234", RegionCode::GB()), // Non-optional NP omitted
831 };
832
833 // Strings with number-like things that should only be found up to and including
834 // the "valid" leniency level.
835 static const NumberTest kValidCases[] = {
836 NumberTest("65 02 53 00 00", RegionCode::US()),
837 NumberTest("6502 538365", RegionCode::US()),
838 // 2 slashes are illegal at higher levels.
839 NumberTest("650//253-1234", RegionCode::US()),
840 NumberTest("650/253/1234", RegionCode::US()),
841 NumberTest("9002309. 158", RegionCode::US()),
842 NumberTest("12 7/8 - 14 12/34 - 5", RegionCode::US()),
843 NumberTest("12.1 - 23.71 - 23.45", RegionCode::US()),
844 NumberTest("800 234 1 111x1111", RegionCode::US()),
845 NumberTest("1979-2011 100", RegionCode::US()),
846 // National number in wrong format.
847 NumberTest("+494949-4-94", RegionCode::DE()),
848 NumberTest(
849 /* "415666-7777" */
850 "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x96\xEF\xBC\x96\xEF\xBC\x96"
851 "\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
852 NumberTest("2012-0102 08", RegionCode::US()), // Very strange formatting.
853 NumberTest("2012-01-02 08", RegionCode::US()),
854 // Breakdown assistance number with unexpected formatting.
855 NumberTest("1800-1-0-10 22", RegionCode::AU()),
856 NumberTest("030-3-2 23 12 34", RegionCode::DE()),
857 NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()),
858 NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()),
859 NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),
860 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
861 // Fits an alternate pattern, but the leading digits don't match.
862 NumberTest("+52 332 123 23 23", RegionCode::MX()),
863 #endif // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
864 };
865
866 // Strings with number-like things that should only be found up to and including
867 // the "strict_grouping" leniency level.
868 static const NumberTest kStrictGroupingCases[] = {
869 NumberTest("(415) 6667777", RegionCode::US()),
870 NumberTest("415-6667777", RegionCode::US()),
871 // Should be found by strict grouping but not exact grouping, as the last two
872 // groups are formatted together as a block.
873 NumberTest("0800-2491234", RegionCode::DE()),
874 // If the user is using alternate formats, test that numbers formatted in
875 // that way are found.
876 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
877 // Doesn't match any formatting in the test file, but almost matches an
878 // alternate format (the last two groups have been squashed together here).
879 NumberTest("0900-1 123123", RegionCode::DE()),
880 NumberTest("(0)900-1 123123", RegionCode::DE()),
881 NumberTest("0 900-1 123123", RegionCode::DE()),
882 #endif // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
883 // NDC also found as part of the country calling code; this shouldn't ruin the
884 // grouping expectations.
885 NumberTest("+33 3 34 2312", RegionCode::FR()),
886 };
887
888 // Strings with number-like things that should be found at all levels.
889 static const NumberTest kExactGroupingCases[] = {
890 NumberTest(
891 /* "4156667777" */
892 "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x96\xEF\xBC\x96\xEF\xBC\x96"
893 "\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
894 NumberTest(
895 /* "415-666-7777" */
896 "\xEF\xBC\x94\xEF\xBC\x91\xEF\xBC\x95\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x96"
897 "\xEF\xBC\x96\xEF\xBC\x8D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97"
898 "\xEF\xBC\x97", RegionCode::US()),
899 NumberTest("4156667777", RegionCode::US()),
900 NumberTest("4156667777 x 123", RegionCode::US()),
901 NumberTest("415-666-7777", RegionCode::US()),
902 NumberTest("415/666-7777", RegionCode::US()),
903 NumberTest("415-666-7777 ext. 503", RegionCode::US()),
904 NumberTest("1 415 666 7777 x 123", RegionCode::US()),
905 NumberTest("+1 415-666-7777", RegionCode::US()),
906 NumberTest("+494949 49", RegionCode::DE()),
907 NumberTest("+49-49-34", RegionCode::DE()),
908 NumberTest("+49-4931-49", RegionCode::DE()),
909 NumberTest("04931-49", RegionCode::DE()), // With National Prefix
910 NumberTest("+49-494949", RegionCode::DE()), // One group with country code
911 NumberTest("+49-494949 ext. 49", RegionCode::DE()),
912 NumberTest("+49494949 ext. 49", RegionCode::DE()),
913 NumberTest("0494949", RegionCode::DE()),
914 NumberTest("0494949 ext. 49", RegionCode::DE()),
915 NumberTest("01 (33) 3461 2234", RegionCode::MX()), // Optional NP present
916 NumberTest("(33) 3461 2234", RegionCode::MX()), // Optional NP omitted
917 // If the user is using alternate formats, test that numbers formatted in
918 // that way are found.
919 #ifdef I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
920 // Breakdown assistance number using alternate formatting pattern.
921 NumberTest("1800-10-10 22", RegionCode::AU()),
922 // Doesn't match any formatting in the test file, but matches an alternate
923 // format exactly.
924 NumberTest("0900-1 123 123", RegionCode::DE()),
925 NumberTest("(0)900-1 123 123", RegionCode::DE()),
926 NumberTest("0 900-1 123 123", RegionCode::DE()),
927 #endif // I18N_PHONENUMBERS_USE_ALTERNATE_FORMATS
928 NumberTest("+33 3 34 23 12", RegionCode::FR()),
929 };
930
TEST_F(PhoneNumberMatcherTest,MatchesWithPossibleLeniency)931 TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) {
932 std::vector<NumberTest> test_cases;
933 test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
934 kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
935 test_cases.insert(test_cases.begin(), kValidCases,
936 kValidCases + arraysize(kValidCases));
937 test_cases.insert(
938 test_cases.begin(), kStrictGroupingCases,
939 kStrictGroupingCases + arraysize(kStrictGroupingCases));
940 test_cases.insert(test_cases.begin(), kExactGroupingCases,
941 kExactGroupingCases + arraysize(kExactGroupingCases));
942 DoTestNumberMatchesForLeniency(test_cases, PhoneNumberMatcher::POSSIBLE);
943 }
944
TEST_F(PhoneNumberMatcherTest,NonMatchesWithPossibleLeniency)945 TEST_F(PhoneNumberMatcherTest, NonMatchesWithPossibleLeniency) {
946 std::vector<NumberTest> test_cases;
947 test_cases.insert(test_cases.begin(), kImpossibleCases,
948 kImpossibleCases + arraysize(kImpossibleCases));
949 DoTestNumberNonMatchesForLeniency(test_cases, PhoneNumberMatcher::POSSIBLE);
950 }
951
TEST_F(PhoneNumberMatcherTest,MatchesWithValidLeniency)952 TEST_F(PhoneNumberMatcherTest, MatchesWithValidLeniency) {
953 std::vector<NumberTest> test_cases;
954 test_cases.insert(test_cases.begin(), kValidCases,
955 kValidCases + arraysize(kValidCases));
956 test_cases.insert(
957 test_cases.begin(), kStrictGroupingCases,
958 kStrictGroupingCases + arraysize(kStrictGroupingCases));
959 test_cases.insert(test_cases.begin(), kExactGroupingCases,
960 kExactGroupingCases + arraysize(kExactGroupingCases));
961 DoTestNumberMatchesForLeniency(test_cases, PhoneNumberMatcher::VALID);
962 }
963
TEST_F(PhoneNumberMatcherTest,NonMatchesWithValidLeniency)964 TEST_F(PhoneNumberMatcherTest, NonMatchesWithValidLeniency) {
965 std::vector<NumberTest> test_cases;
966 test_cases.insert(test_cases.begin(), kImpossibleCases,
967 kImpossibleCases + arraysize(kImpossibleCases));
968 test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
969 kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
970 DoTestNumberNonMatchesForLeniency(test_cases, PhoneNumberMatcher::VALID);
971 }
972
TEST_F(PhoneNumberMatcherTest,MatchesWithStrictGroupingLeniency)973 TEST_F(PhoneNumberMatcherTest, MatchesWithStrictGroupingLeniency) {
974 std::vector<NumberTest> test_cases;
975 test_cases.insert(
976 test_cases.begin(), kStrictGroupingCases,
977 kStrictGroupingCases + arraysize(kStrictGroupingCases));
978 test_cases.insert(test_cases.begin(), kExactGroupingCases,
979 kExactGroupingCases + arraysize(kExactGroupingCases));
980 DoTestNumberMatchesForLeniency(test_cases,
981 PhoneNumberMatcher::STRICT_GROUPING);
982 }
983
TEST_F(PhoneNumberMatcherTest,NonMatchesWithStrictGroupingLeniency)984 TEST_F(PhoneNumberMatcherTest, NonMatchesWithStrictGroupingLeniency) {
985 std::vector<NumberTest> test_cases;
986 test_cases.insert(test_cases.begin(), kImpossibleCases,
987 kImpossibleCases + arraysize(kImpossibleCases));
988 test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
989 kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
990 test_cases.insert(test_cases.begin(), kValidCases,
991 kValidCases + arraysize(kValidCases));
992 DoTestNumberNonMatchesForLeniency(test_cases,
993 PhoneNumberMatcher::STRICT_GROUPING);
994 }
995
TEST_F(PhoneNumberMatcherTest,MatchesWithExactGroupingLeniency)996 TEST_F(PhoneNumberMatcherTest, MatchesWithExactGroupingLeniency) {
997 std::vector<NumberTest> test_cases;
998 test_cases.insert(test_cases.begin(), kExactGroupingCases,
999 kExactGroupingCases + arraysize(kExactGroupingCases));
1000 DoTestNumberMatchesForLeniency(test_cases,
1001 PhoneNumberMatcher::EXACT_GROUPING);
1002 }
1003
TEST_F(PhoneNumberMatcherTest,NonMatchesWithExactGroupingLeniency)1004 TEST_F(PhoneNumberMatcherTest, NonMatchesWithExactGroupingLeniency) {
1005 std::vector<NumberTest> test_cases;
1006 test_cases.insert(test_cases.begin(), kImpossibleCases,
1007 kImpossibleCases + arraysize(kImpossibleCases));
1008 test_cases.insert(test_cases.begin(), kPossibleOnlyCases,
1009 kPossibleOnlyCases + arraysize(kPossibleOnlyCases));
1010 test_cases.insert(test_cases.begin(), kValidCases,
1011 kValidCases + arraysize(kValidCases));
1012 test_cases.insert(
1013 test_cases.begin(), kStrictGroupingCases,
1014 kStrictGroupingCases + arraysize(kStrictGroupingCases));
1015 DoTestNumberNonMatchesForLeniency(test_cases,
1016 PhoneNumberMatcher::EXACT_GROUPING);
1017 }
1018
TEST_F(PhoneNumberMatcherTest,ExtractMatchIgnoresAmericanDates)1019 TEST_F(PhoneNumberMatcherTest, ExtractMatchIgnoresAmericanDates) {
1020 PhoneNumberMatch match;
1021 string text = "As I said on 03/10/2011, you may call me at ";
1022 EXPECT_FALSE(ExtractMatch(text, &match));
1023 text = "As I said on 03/27/2011, you may call me at ";
1024 EXPECT_FALSE(ExtractMatch(text, &match));
1025 text = "As I said on 31/8/2011, you may call me at ";
1026 EXPECT_FALSE(ExtractMatch(text, &match));
1027 text = "As I said on 1/12/2011, you may call me at ";
1028 EXPECT_FALSE(ExtractMatch(text, &match));
1029 text = "I was born on 10/12/82. Please call me at ";
1030 EXPECT_FALSE(ExtractMatch(text, &match));
1031 }
1032
TEST_F(PhoneNumberMatcherTest,NonMatchingBracketsAreInvalid)1033 TEST_F(PhoneNumberMatcherTest, NonMatchingBracketsAreInvalid) {
1034 // The digits up to the ", " form a valid US number, but it shouldn't be
1035 // matched as one since there was a non-matching bracket present.
1036 scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1037 "80.585 [79.964, 81.191]", RegionCode::US(),
1038 PhoneNumberMatcher::VALID));
1039 EXPECT_FALSE(matcher->HasNext());
1040
1041 // The trailing "]" is thrown away before parsing, so the resultant number,
1042 // while a valid US number, does not have matching brackets.
1043 matcher.reset(GetMatcherWithLeniency(
1044 "80.585 [79.964]", RegionCode::US(), PhoneNumberMatcher::VALID));
1045 EXPECT_FALSE(matcher->HasNext());
1046
1047 matcher.reset(GetMatcherWithLeniency(
1048 "80.585 ((79.964)", RegionCode::US(), PhoneNumberMatcher::VALID));
1049 EXPECT_FALSE(matcher->HasNext());
1050
1051 // This case has too many sets of brackets to be valid.
1052 matcher.reset(GetMatcherWithLeniency(
1053 "(80).(585) (79).(9)64", RegionCode::US(), PhoneNumberMatcher::VALID));
1054 EXPECT_FALSE(matcher->HasNext());
1055 }
1056
TEST_F(PhoneNumberMatcherTest,NoMatchIfRegionIsUnknown)1057 TEST_F(PhoneNumberMatcherTest, NoMatchIfRegionIsUnknown) {
1058 // Fail on non-international prefix if region code is ZZ.
1059 scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1060 "Random text body - number is 0331 6005, see you there",
1061 RegionCode::ZZ(), PhoneNumberMatcher::VALID));
1062 EXPECT_FALSE(matcher->HasNext());
1063 }
1064
TEST_F(PhoneNumberMatcherTest,NoMatchInEmptyString)1065 TEST_F(PhoneNumberMatcherTest, NoMatchInEmptyString) {
1066 scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1067 "", RegionCode::US(), PhoneNumberMatcher::VALID));
1068 EXPECT_FALSE(matcher->HasNext());
1069 matcher.reset(GetMatcherWithLeniency(" ", RegionCode::US(),
1070 PhoneNumberMatcher::VALID));
1071 EXPECT_FALSE(matcher->HasNext());
1072 }
1073
TEST_F(PhoneNumberMatcherTest,NoMatchIfNoNumber)1074 TEST_F(PhoneNumberMatcherTest, NoMatchIfNoNumber) {
1075 scoped_ptr<PhoneNumberMatcher> matcher(GetMatcherWithLeniency(
1076 "Random text body - number is foobar, see you there", RegionCode::US(),
1077 PhoneNumberMatcher::VALID));
1078 EXPECT_FALSE(matcher->HasNext());
1079 }
1080
TEST_F(PhoneNumberMatcherTest,NoErrorWithSpecialCharacters)1081 TEST_F(PhoneNumberMatcherTest, NoErrorWithSpecialCharacters) {
1082 string stringWithSpecialCharacters =
1083 "Myfuzzvar1152: \"My info:%415-666-7777 123 fake street\"\nfuzzvar1155: "
1084 "47\nfuzzvar1158: %415-666-1234 "
1085 "i18n_phonenumbers_Pho\356eNumberMatcher_Leniency_VALID_1"
1086 "\nfuzzvar1159: 20316 info:%415-666-7777 123 fake str79ee\nt";
1087 string Numbers;
1088 for (int i = 0; i < 100; ++i)
1089 Numbers.append(stringWithSpecialCharacters);
1090 scoped_ptr<PhoneNumberMatcher> matcher(
1091 GetMatcherWithLeniency(Numbers, RegionCode::US(),
1092 PhoneNumberMatcher::POSSIBLE));
1093 // Since the input text contains invalid UTF-8, we do not return
1094 // any matches.
1095 EXPECT_FALSE(matcher->HasNext());
1096 }
1097
TEST_F(PhoneNumberMatcherTest,Sequences)1098 TEST_F(PhoneNumberMatcherTest, Sequences) {
1099 // Test multiple occurrences.
1100 const string text = "Call 033316005 or 032316005!";
1101 const string& region = RegionCode::NZ();
1102
1103 PhoneNumber number1;
1104 number1.set_country_code(phone_util_.GetCountryCodeForRegion(region));
1105 number1.set_national_number(33316005ULL);
1106 PhoneNumberMatch match1(5, "033316005", number1);
1107
1108 PhoneNumber number2;
1109 number2.set_country_code(phone_util_.GetCountryCodeForRegion(region));
1110 number2.set_national_number(32316005ULL);
1111 PhoneNumberMatch match2(19, "032316005", number2);
1112
1113 PhoneNumberMatcher matcher(
1114 phone_util_, text, region, PhoneNumberMatcher::POSSIBLE, 100);
1115
1116 PhoneNumberMatch actual_match1;
1117 PhoneNumberMatch actual_match2;
1118 matcher.Next(&actual_match1);
1119 matcher.Next(&actual_match2);
1120 EXPECT_TRUE(match1.Equals(actual_match1));
1121 EXPECT_TRUE(match2.Equals(actual_match2));
1122 }
1123
TEST_F(PhoneNumberMatcherTest,MaxMatches)1124 TEST_F(PhoneNumberMatcherTest, MaxMatches) {
1125 // Set up text with 100 valid phone numbers.
1126 string numbers;
1127 for (int i = 0; i < 100; ++i) {
1128 numbers.append("My info: 415-666-7777,");
1129 }
1130
1131 // Matches all 100. Max only applies to failed cases.
1132 PhoneNumber number;
1133 phone_util_.Parse("+14156667777", RegionCode::US(), &number);
1134 std::vector<PhoneNumber> expected(100, number);
1135
1136 PhoneNumberMatcher matcher(
1137 phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1138 std::vector<PhoneNumber> actual;
1139 PhoneNumberMatch match;
1140 while (matcher.HasNext()) {
1141 matcher.Next(&match);
1142 actual.push_back(match.number());
1143 }
1144 EXPECT_EQ(expected, actual);
1145 }
1146
TEST_F(PhoneNumberMatcherTest,MaxMatchesInvalid)1147 TEST_F(PhoneNumberMatcherTest, MaxMatchesInvalid) {
1148 // Set up text with 10 invalid phone numbers followed by 100 valid.
1149 string numbers;
1150 for (int i = 0; i < 10; ++i) {
1151 numbers.append("My address 949-8945-0");
1152 }
1153 for (int i = 0; i < 100; ++i) {
1154 numbers.append("My info: 415-666-7777,");
1155 }
1156
1157 PhoneNumberMatcher matcher(
1158 phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1159 EXPECT_FALSE(matcher.HasNext());
1160 }
1161
TEST_F(PhoneNumberMatcherTest,MaxMatchesMixed)1162 TEST_F(PhoneNumberMatcherTest, MaxMatchesMixed) {
1163 // Set up text with 100 valid numbers inside an invalid number.
1164 string numbers;
1165 for (int i = 0; i < 100; ++i) {
1166 numbers.append("My info: 415-666-7777 123 fake street");
1167 }
1168
1169 PhoneNumber number;
1170 phone_util_.Parse("+14156667777", RegionCode::ZZ(), &number);
1171 std::vector<PhoneNumber> expected(10, number);
1172
1173 PhoneNumberMatcher matcher(
1174 phone_util_, numbers, RegionCode::US(), PhoneNumberMatcher::VALID, 10);
1175 std::vector<PhoneNumber> actual;
1176 PhoneNumberMatch match;
1177 while (matcher.HasNext()) {
1178 matcher.Next(&match);
1179 actual.push_back(match.number());
1180 }
1181 EXPECT_EQ(expected, actual);
1182 }
1183
TEST_F(PhoneNumberMatcherTest,NonPlusPrefixedNumbersNotFoundForInvalidRegion)1184 TEST_F(PhoneNumberMatcherTest, NonPlusPrefixedNumbersNotFoundForInvalidRegion) {
1185 PhoneNumberMatch match;
1186 scoped_ptr<PhoneNumberMatcher> matcher(
1187 GetMatcherWithLeniency("1 456 764 156", RegionCode::GetUnknown(),
1188 PhoneNumberMatcher::VALID));
1189 EXPECT_FALSE(matcher->HasNext());
1190 EXPECT_FALSE(matcher->Next(&match));
1191 EXPECT_FALSE(matcher->HasNext());
1192 }
1193
TEST_F(PhoneNumberMatcherTest,EmptyIteration)1194 TEST_F(PhoneNumberMatcherTest, EmptyIteration) {
1195 PhoneNumberMatch match;
1196 scoped_ptr<PhoneNumberMatcher> matcher(
1197 GetMatcherWithLeniency("", RegionCode::GetUnknown(),
1198 PhoneNumberMatcher::VALID));
1199 EXPECT_FALSE(matcher->HasNext());
1200 EXPECT_FALSE(matcher->HasNext());
1201 EXPECT_FALSE(matcher->Next(&match));
1202 EXPECT_FALSE(matcher->HasNext());
1203 }
1204
TEST_F(PhoneNumberMatcherTest,SingleIteration)1205 TEST_F(PhoneNumberMatcherTest, SingleIteration) {
1206 PhoneNumberMatch match;
1207 scoped_ptr<PhoneNumberMatcher> matcher(
1208 GetMatcherWithLeniency("+14156667777", RegionCode::GetUnknown(),
1209 PhoneNumberMatcher::VALID));
1210
1211 // Try HasNext() twice to ensure it does not advance.
1212 EXPECT_TRUE(matcher->HasNext());
1213 EXPECT_TRUE(matcher->HasNext());
1214 EXPECT_TRUE(matcher->Next(&match));
1215
1216 EXPECT_FALSE(matcher->HasNext());
1217 EXPECT_FALSE(matcher->Next(&match));
1218 }
1219
TEST_F(PhoneNumberMatcherTest,SingleIteration_WithNextOnly)1220 TEST_F(PhoneNumberMatcherTest, SingleIteration_WithNextOnly) {
1221 PhoneNumberMatch match;
1222 scoped_ptr<PhoneNumberMatcher> matcher(
1223 GetMatcherWithLeniency("+14156667777", RegionCode::GetUnknown(),
1224 PhoneNumberMatcher::VALID));
1225 EXPECT_TRUE(matcher->Next(&match));
1226 EXPECT_FALSE(matcher->Next(&match));
1227 }
1228
TEST_F(PhoneNumberMatcherTest,DoubleIteration)1229 TEST_F(PhoneNumberMatcherTest, DoubleIteration) {
1230 PhoneNumberMatch match;
1231 scoped_ptr<PhoneNumberMatcher> matcher(
1232 GetMatcherWithLeniency("+14156667777 foobar +14156667777 ",
1233 RegionCode::GetUnknown(),
1234 PhoneNumberMatcher::VALID));
1235
1236 // Double HasNext() to ensure it does not advance.
1237 EXPECT_TRUE(matcher->HasNext());
1238 EXPECT_TRUE(matcher->HasNext());
1239 EXPECT_TRUE(matcher->Next(&match));
1240 EXPECT_TRUE(matcher->HasNext());
1241 EXPECT_TRUE(matcher->HasNext());
1242 EXPECT_TRUE(matcher->Next(&match));
1243
1244 EXPECT_FALSE(matcher->HasNext());
1245 EXPECT_FALSE(matcher->Next(&match));
1246 EXPECT_FALSE(matcher->HasNext());
1247 }
1248
TEST_F(PhoneNumberMatcherTest,DoubleIteration_WithNextOnly)1249 TEST_F(PhoneNumberMatcherTest, DoubleIteration_WithNextOnly) {
1250 PhoneNumberMatch match;
1251 scoped_ptr<PhoneNumberMatcher> matcher(
1252 GetMatcherWithLeniency("+14156667777 foobar +14156667777 ",
1253 RegionCode::GetUnknown(),
1254 PhoneNumberMatcher::VALID));
1255
1256 EXPECT_TRUE(matcher->Next(&match));
1257 EXPECT_TRUE(matcher->Next(&match));
1258 EXPECT_FALSE(matcher->Next(&match));
1259 }
1260
1261 } // namespace phonenumbers
1262 } // namespace i18n
1263