• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2009 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "phonenumbers/phonenumberutil.h"
16 
17 #include <algorithm>
18 #include <cctype>
19 #include <cstring>
20 #include <iterator>
21 #include <map>
22 #include <utility>
23 #include <vector>
24 
25 #include <unicode/uchar.h>
26 #include <unicode/utf8.h>
27 
28 #include "phonenumbers/asyoutypeformatter.h"
29 #include "phonenumbers/base/basictypes.h"
30 #include "phonenumbers/base/logging.h"
31 #include "phonenumbers/base/memory/singleton.h"
32 #include "phonenumbers/default_logger.h"
33 #include "phonenumbers/encoding_utils.h"
34 #include "phonenumbers/matcher_api.h"
35 #include "phonenumbers/metadata.h"
36 #include "phonenumbers/normalize_utf8.h"
37 #ifdef LIBPHONENUMBER_UPGRADE
38 #include "phonenumbers/ohos/update_metadata.h"
39 #include "phonenumbers/ohos/update_libphonenumber.h"
40 #endif
41 #include "phonenumbers/phonemetadata.pb.h"
42 #include "phonenumbers/phonenumber.h"
43 #include "phonenumbers/phonenumber.pb.h"
44 #include "phonenumbers/regex_based_matcher.h"
45 #include "phonenumbers/regexp_adapter.h"
46 #include "phonenumbers/regexp_cache.h"
47 #include "phonenumbers/regexp_factory.h"
48 #include "phonenumbers/region_code.h"
49 #include "phonenumbers/stl_util.h"
50 #include "phonenumbers/stringutil.h"
51 #include "phonenumbers/utf/unicodetext.h"
52 #include "phonenumbers/utf/utf.h"
53 
54 namespace i18n {
55 namespace phonenumbers {
56 
57 using google::protobuf::RepeatedField;
58 using gtl::OrderByFirst;
59 
60 // static constants
61 const size_t PhoneNumberUtil::kMinLengthForNsn;
62 const size_t PhoneNumberUtil::kMaxLengthForNsn;
63 const size_t PhoneNumberUtil::kMaxLengthCountryCode;
64 const int PhoneNumberUtil::kNanpaCountryCode;
65 
66 // static
67 const char PhoneNumberUtil::kPlusChars[] = "+\xEF\xBC\x8B";  /* "++" */
68 // Regular expression of acceptable punctuation found in phone numbers, used to
69 // find numbers in text and to decide what is a viable phone number. This
70 // excludes diallable characters.
71 // This consists of dash characters, white space characters, full stops,
72 // slashes, square brackets, parentheses and tildes. It also includes the letter
73 // 'x' as that is found as a placeholder for carrier information in some phone
74 // numbers. Full-width variants are also present.
75 // To find out the unicode code-point of the characters below in vim, highlight
76 // the character and type 'ga'. Note that the - is used to express ranges of
77 // full-width punctuation below, as well as being present in the expression
78 // itself. In emacs, you can use M-x unicode-what to query information about the
79 // unicode character.
80 // static
81 const char PhoneNumberUtil::kValidPunctuation[] =
82     /* "-x‐-―−ー--/  ­<U+200B><U+2060> ()()[].\\[\\]/~⁓∼" */
83     "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC"
84     "\x8F \xC2\xA0\xC2\xAD\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88"
85     "\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC";
86 
87 // static
88 const char PhoneNumberUtil::kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x";
89 
90 // static
91 const char PhoneNumberUtil::kRegionCodeForNonGeoEntity[] = "001";
92 
93 namespace {
94 
95 // The kPlusSign signifies the international prefix.
96 const char kPlusSign[] = "+";
97 
98 const char kStarSign[] = "*";
99 
100 const char kRfc3966ExtnPrefix[] = ";ext=";
101 const char kRfc3966Prefix[] = "tel:";
102 const char kRfc3966PhoneContext[] = ";phone-context=";
103 const char kRfc3966IsdnSubaddress[] = ";isub=";
104 const char kRfc3966VisualSeparator[] = "[\\-\\.\\(\\)]?";
105 
106 const char kDigits[] = "\\p{Nd}";
107 // We accept alpha characters in phone numbers, ASCII only. We store lower-case
108 // here only since our regular expressions are case-insensitive.
109 const char kValidAlpha[] = "a-z";
110 const char kValidAlphaInclUppercase[] = "A-Za-z";
111 
112 // Default extension prefix to use when formatting. This will be put in front of
113 // any extension component of the number, after the main national number is
114 // formatted. For example, if you wish the default extension formatting to be "
115 // extn: 3456", then you should specify " extn: " here as the default extension
116 // prefix. This can be overridden by region-specific preferences.
117 const char kDefaultExtnPrefix[] = " ext. ";
118 
119 const char kPossibleSeparatorsBetweenNumberAndExtLabel[] =
120     "[ \xC2\xA0\\t,]*";
121 
122 // Optional full stop (.) or colon, followed by zero or more
123 // spaces/tabs/commas.
124 const char kPossibleCharsAfterExtLabel[] =
125     "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*";
126 const char kOptionalExtSuffix[] = "#?";
127 
LoadCompiledInMetadata(PhoneMetadataCollection * metadata)128 bool LoadCompiledInMetadata(PhoneMetadataCollection* metadata) {
129   if (!metadata->ParseFromArray(metadata_get(), metadata_size())) {
130     LOG(ERROR) << "Could not parse binary data.";
131     return false;
132   }
133   return true;
134 }
135 
136 // Returns a pointer to the description inside the metadata of the appropriate
137 // type.
GetNumberDescByType(const PhoneMetadata & metadata,PhoneNumberUtil::PhoneNumberType type)138 const PhoneNumberDesc* GetNumberDescByType(
139     const PhoneMetadata& metadata,
140     PhoneNumberUtil::PhoneNumberType type) {
141   switch (type) {
142     case PhoneNumberUtil::PREMIUM_RATE:
143       return &metadata.premium_rate();
144     case PhoneNumberUtil::TOLL_FREE:
145       return &metadata.toll_free();
146     case PhoneNumberUtil::MOBILE:
147       return &metadata.mobile();
148     case PhoneNumberUtil::FIXED_LINE:
149     case PhoneNumberUtil::FIXED_LINE_OR_MOBILE:
150       return &metadata.fixed_line();
151     case PhoneNumberUtil::SHARED_COST:
152       return &metadata.shared_cost();
153     case PhoneNumberUtil::VOIP:
154       return &metadata.voip();
155     case PhoneNumberUtil::PERSONAL_NUMBER:
156       return &metadata.personal_number();
157     case PhoneNumberUtil::PAGER:
158       return &metadata.pager();
159     case PhoneNumberUtil::UAN:
160       return &metadata.uan();
161     case PhoneNumberUtil::VOICEMAIL:
162       return &metadata.voicemail();
163     default:
164       return &metadata.general_desc();
165   }
166 }
167 
168 // A helper function that is used by Format and FormatByPattern.
PrefixNumberWithCountryCallingCode(int country_calling_code,PhoneNumberUtil::PhoneNumberFormat number_format,string * formatted_number)169 void PrefixNumberWithCountryCallingCode(
170     int country_calling_code,
171     PhoneNumberUtil::PhoneNumberFormat number_format,
172     string* formatted_number) {
173   switch (number_format) {
174     case PhoneNumberUtil::E164:
175       formatted_number->insert(0, StrCat(kPlusSign, country_calling_code));
176       return;
177     case PhoneNumberUtil::INTERNATIONAL:
178       formatted_number->insert(0, StrCat(kPlusSign, country_calling_code, " "));
179       return;
180     case PhoneNumberUtil::RFC3966:
181       formatted_number->insert(0, StrCat(kRfc3966Prefix, kPlusSign,
182                                          country_calling_code, "-"));
183       return;
184     case PhoneNumberUtil::NATIONAL:
185     default:
186       // Do nothing.
187       return;
188   }
189 }
190 
191 // Returns true when one national number is the suffix of the other or both are
192 // the same.
IsNationalNumberSuffixOfTheOther(const PhoneNumber & first_number,const PhoneNumber & second_number)193 bool IsNationalNumberSuffixOfTheOther(const PhoneNumber& first_number,
194                                       const PhoneNumber& second_number) {
195   const string& first_number_national_number =
196     SimpleItoa(static_cast<uint64>(first_number.national_number()));
197   const string& second_number_national_number =
198     SimpleItoa(static_cast<uint64>(second_number.national_number()));
199   // Note that HasSuffixString returns true if the numbers are equal.
200   return HasSuffixString(first_number_national_number,
201                          second_number_national_number) ||
202          HasSuffixString(second_number_national_number,
203                          first_number_national_number);
204 }
205 
ToUnicodeCodepoint(const char * unicode_char)206 char32 ToUnicodeCodepoint(const char* unicode_char) {
207   char32 codepoint;
208   EncodingUtils::DecodeUTF8Char(unicode_char, &codepoint);
209   return codepoint;
210 }
211 
212 // Helper method for constructing regular expressions for parsing. Creates an
213 // expression that captures up to max_length digits.
ExtnDigits(int max_length)214 std::string ExtnDigits(int max_length) {
215   return StrCat("([", kDigits, "]{1,", max_length, "})");
216 }
217 
218 // Helper initialiser method to create the regular-expression pattern to match
219 // extensions. Note that:
220 // - There are currently six capturing groups for the extension itself. If this
221 // number is changed, MaybeStripExtension needs to be updated.
222 // - The only capturing groups should be around the digits that you want to
223 // capture as part of the extension, or else parsing will fail!
CreateExtnPattern(bool for_parsing)224 std::string CreateExtnPattern(bool for_parsing) {
225   // We cap the maximum length of an extension based on the ambiguity of the
226   // way the extension is prefixed. As per ITU, the officially allowed
227   // length for extensions is actually 40, but we don't support this since we
228   // haven't seen real examples and this introduces many false interpretations
229   // as the extension labels are not standardized.
230   int ext_limit_after_explicit_label = 20;
231   int ext_limit_after_likely_label = 15;
232   int ext_limit_after_ambiguous_char = 9;
233   int ext_limit_when_not_sure = 6;
234 
235   // Canonical-equivalence doesn't seem to be an option with RE2, so we allow
236   // two options for representing any non-ASCII character like ó - the character
237   // itself, and one in the unicode decomposed form with the combining acute
238   // accent.
239 
240   // Here the extension is called out in a more explicit way, i.e mentioning it
241   // obvious patterns like "ext.".
242   string explicit_ext_labels =
243       "(?:e?xt(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|(?:\xEF\xBD\x85)?"
244       "\xEF\xBD\x98\xEF\xBD\x94(?:\xEF\xBD\x8E)?|\xD0\xB4\xD0\xBE\xD0\xB1|"
245       "anexo)";
246   // One-character symbols that can be used to indicate an extension, and less
247   // commonly used or more ambiguous extension labels.
248   string ambiguous_ext_labels =
249       "(?:[x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E]|int|"
250       "\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94)";
251   // When extension is not separated clearly.
252   string ambiguous_separator = "[- ]+";
253 
254   string rfc_extn = StrCat(kRfc3966ExtnPrefix,
255                            ExtnDigits(ext_limit_after_explicit_label));
256   string explicit_extn = StrCat(
257       kPossibleSeparatorsBetweenNumberAndExtLabel,
258       explicit_ext_labels, kPossibleCharsAfterExtLabel,
259       ExtnDigits(ext_limit_after_explicit_label),
260       kOptionalExtSuffix);
261   string ambiguous_extn = StrCat(
262       kPossibleSeparatorsBetweenNumberAndExtLabel,
263       ambiguous_ext_labels, kPossibleCharsAfterExtLabel,
264       ExtnDigits(ext_limit_after_ambiguous_char),
265       kOptionalExtSuffix);
266   string american_style_extn_with_suffix = StrCat(
267       ambiguous_separator, ExtnDigits(ext_limit_when_not_sure), "#");
268 
269   // The first regular expression covers RFC 3966 format, where the extension is
270   // added using ";ext=". The second more generic where extension is mentioned
271   // with explicit labels like "ext:". In both the above cases we allow more
272   // numbers in extension than any other extension labels. The third one
273   // captures when single character extension labels or less commonly used
274   // labels are present. In such cases we capture fewer extension digits in
275   // order to reduce the chance of falsely interpreting two numbers beside each
276   // other as a number + extension. The fourth one covers the special case of
277   // American numbers where the extension is written with a hash at the end,
278   // such as "- 503#".
279   string extension_pattern = StrCat(
280       rfc_extn, "|",
281       explicit_extn, "|",
282       ambiguous_extn, "|",
283       american_style_extn_with_suffix);
284   // Additional pattern that is supported when parsing extensions, not when
285   // matching.
286   if (for_parsing) {
287     // ",," is commonly used for auto dialling the extension when connected.
288     // Semi-colon works in Iphone and also in Android to pop up a button with
289     // the extension number following.
290     string auto_dialling_and_ext_labels_found = "(?:,{2}|;)";
291     // This is same as kPossibleSeparatorsBetweenNumberAndExtLabel, but not
292     // matching comma as extension label may have it.
293     string possible_separators_number_extLabel_no_comma = "[ \xC2\xA0\\t]*";
294 
295     string auto_dialling_extn = StrCat(
296       possible_separators_number_extLabel_no_comma,
297       auto_dialling_and_ext_labels_found, kPossibleCharsAfterExtLabel,
298       ExtnDigits(ext_limit_after_likely_label),
299       kOptionalExtSuffix);
300     string only_commas_extn = StrCat(
301       possible_separators_number_extLabel_no_comma,
302       "(?:,)+", kPossibleCharsAfterExtLabel,
303       ExtnDigits(ext_limit_after_ambiguous_char),
304       kOptionalExtSuffix);
305     // Here the first pattern is exclusive for extension autodialling formats
306     // which are used when dialling and in this case we accept longer
307     // extensions. However, the second pattern is more liberal on number of
308     // commas that acts as extension labels, so we have strict cap on number of
309     // digits in such extensions.
310     return StrCat(extension_pattern, "|",
311                   auto_dialling_extn, "|",
312                   only_commas_extn);
313   }
314   return extension_pattern;
315 }
316 
317 // Normalizes a string of characters representing a phone number by replacing
318 // all characters found in the accompanying map with the values therein, and
319 // stripping all other characters if remove_non_matches is true.
320 // Parameters:
321 // number - a pointer to a string of characters representing a phone number to
322 //   be normalized.
323 // normalization_replacements - a mapping of characters to what they should be
324 //   replaced by in the normalized version of the phone number
325 // remove_non_matches - indicates whether characters that are not able to be
326 //   replaced should be stripped from the number. If this is false, they will be
327 //   left unchanged in the number.
NormalizeHelper(const std::map<char32,char> & normalization_replacements,bool remove_non_matches,string * number)328 void NormalizeHelper(const std::map<char32, char>& normalization_replacements,
329                      bool remove_non_matches,
330                      string* number) {
331   DCHECK(number);
332   UnicodeText number_as_unicode;
333   number_as_unicode.PointToUTF8(number->data(), static_cast<int>(number->size()));
334   if (!number_as_unicode.UTF8WasValid()) {
335     // The input wasn't valid UTF-8. Produce an empty string to indicate an error.
336     number->clear();
337     return;
338   }
339   string normalized_number;
340   char unicode_char[5];
341   for (UnicodeText::const_iterator it = number_as_unicode.begin();
342        it != number_as_unicode.end();
343        ++it) {
344     std::map<char32, char>::const_iterator found_glyph_pair =
345         normalization_replacements.find(*it);
346     if (found_glyph_pair != normalization_replacements.end()) {
347       normalized_number.push_back(found_glyph_pair->second);
348     } else if (!remove_non_matches) {
349       // Find out how long this unicode char is so we can append it all.
350       int char_len = it.get_utf8(unicode_char);
351       normalized_number.append(unicode_char, char_len);
352     }
353     // If neither of the above are true, we remove this character.
354   }
355   number->assign(normalized_number);
356 }
357 
358 // Returns true if there is any possible number data set for a particular
359 // PhoneNumberDesc.
DescHasPossibleNumberData(const PhoneNumberDesc & desc)360 bool DescHasPossibleNumberData(const PhoneNumberDesc& desc) {
361   // If this is empty, it means numbers of this type inherit from the "general
362   // desc" -> the value "-1" means that no numbers exist for this type.
363   return desc.possible_length_size() != 1 || desc.possible_length(0) != -1;
364 }
365 
366 // Note: DescHasData must account for any of MetadataFilter's
367 // excludableChildFields potentially being absent from the metadata. It must
368 // check them all. For any changes in DescHasData, ensure that all the
369 // excludableChildFields are still being checked. If your change is safe simply
370 // mention why during a review without needing to change MetadataFilter.
371 // Returns true if there is any data set for a particular PhoneNumberDesc.
DescHasData(const PhoneNumberDesc & desc)372 bool DescHasData(const PhoneNumberDesc& desc) {
373   // Checking most properties since we don't know what's present, since a custom
374   // build may have stripped just one of them (e.g. USE_METADATA_LITE strips
375   // exampleNumber). We don't bother checking the PossibleLengthsLocalOnly,
376   // since if this is the only thing that's present we don't really support the
377   // type at all: no type-specific methods will work with only this data.
378   return desc.has_example_number() || DescHasPossibleNumberData(desc) ||
379          desc.has_national_number_pattern();
380 }
381 
382 // Returns the types we have metadata for based on the PhoneMetadata object
383 // passed in.
GetSupportedTypesForMetadata(const PhoneMetadata & metadata,std::set<PhoneNumberUtil::PhoneNumberType> * types)384 void GetSupportedTypesForMetadata(
385     const PhoneMetadata& metadata,
386     std::set<PhoneNumberUtil::PhoneNumberType>* types) {
387   DCHECK(types);
388   for (int i = 0; i <= static_cast<int>(PhoneNumberUtil::kMaxNumberType); ++i) {
389     PhoneNumberUtil::PhoneNumberType type =
390         static_cast<PhoneNumberUtil::PhoneNumberType>(i);
391     if (type == PhoneNumberUtil::FIXED_LINE_OR_MOBILE ||
392         type == PhoneNumberUtil::UNKNOWN) {
393       // Never return FIXED_LINE_OR_MOBILE (it is a convenience type, and
394       // represents that a particular number type can't be
395       // determined) or UNKNOWN (the non-type).
396       continue;
397     }
398     if (DescHasData(*GetNumberDescByType(metadata, type))) {
399       types->insert(type);
400     }
401   }
402 }
403 
404 // Helper method to check a number against possible lengths for this number
405 // type, and determine whether it matches, or is too short or too long.
TestNumberLength(const string & number,const PhoneMetadata & metadata,PhoneNumberUtil::PhoneNumberType type)406 PhoneNumberUtil::ValidationResult TestNumberLength(
407     const string& number, const PhoneMetadata& metadata,
408     PhoneNumberUtil::PhoneNumberType type) {
409   const PhoneNumberDesc* desc_for_type = GetNumberDescByType(metadata, type);
410   // There should always be "possibleLengths" set for every element. This is
411   // declared in the XML schema which is verified by
412   // PhoneNumberMetadataSchemaTest. For size efficiency, where a
413   // sub-description (e.g. fixed-line) has the same possibleLengths as the
414   // parent, this is missing, so we fall back to the general desc (where no
415   // numbers of the type exist at all, there is one possible length (-1) which
416   // is guaranteed not to match the length of any real phone number).
417   RepeatedField<int> possible_lengths =
418       desc_for_type->possible_length_size() == 0
419           ? metadata.general_desc().possible_length()
420           : desc_for_type->possible_length();
421   RepeatedField<int> local_lengths =
422       desc_for_type->possible_length_local_only();
423   if (type == PhoneNumberUtil::FIXED_LINE_OR_MOBILE) {
424     const PhoneNumberDesc* fixed_line_desc =
425         GetNumberDescByType(metadata, PhoneNumberUtil::FIXED_LINE);
426     if (!DescHasPossibleNumberData(*fixed_line_desc)) {
427       // The rare case has been encountered where no fixedLine data is available
428       // (true for some non-geographical entities), so we just check mobile.
429       return TestNumberLength(number, metadata, PhoneNumberUtil::MOBILE);
430     } else {
431       const PhoneNumberDesc* mobile_desc =
432           GetNumberDescByType(metadata, PhoneNumberUtil::MOBILE);
433       if (DescHasPossibleNumberData(*mobile_desc)) {
434         // Merge the mobile data in if there was any. Note that when adding the
435         // possible lengths from mobile, we have to again check they aren't
436         // empty since if they are this indicates they are the same as the
437         // general desc and should be obtained from there.
438         possible_lengths.MergeFrom(
439             mobile_desc->possible_length_size() == 0
440             ? metadata.general_desc().possible_length()
441             : mobile_desc->possible_length());
442         std::sort(possible_lengths.begin(), possible_lengths.end());
443 
444         if (local_lengths.size() == 0) {
445           local_lengths = mobile_desc->possible_length_local_only();
446         } else {
447           local_lengths.MergeFrom(mobile_desc->possible_length_local_only());
448           std::sort(local_lengths.begin(), local_lengths.end());
449         }
450       }
451     }
452   }
453 
454   // If the type is not suported at all (indicated by the possible lengths
455   // containing -1 at this point) we return invalid length.
456   if (possible_lengths.Get(0) == -1) {
457     return PhoneNumberUtil::INVALID_LENGTH;
458   }
459 
460   int actual_length = static_cast<int>(number.length());
461   // This is safe because there is never an overlap beween the possible lengths
462   // and the local-only lengths; this is checked at build time.
463   if (std::find(local_lengths.begin(), local_lengths.end(), actual_length) !=
464       local_lengths.end()) {
465     return PhoneNumberUtil::IS_POSSIBLE_LOCAL_ONLY;
466   }
467   int minimum_length = possible_lengths.Get(0);
468   if (minimum_length == actual_length) {
469     return PhoneNumberUtil::IS_POSSIBLE;
470   } else if (minimum_length > actual_length) {
471     return PhoneNumberUtil::TOO_SHORT;
472   } else if (*(possible_lengths.end() - 1) < actual_length) {
473     return PhoneNumberUtil::TOO_LONG;
474   }
475   // We skip the first element; we've already checked it.
476   return std::find(possible_lengths.begin() + 1, possible_lengths.end(),
477                    actual_length) != possible_lengths.end()
478              ? PhoneNumberUtil::IS_POSSIBLE
479              : PhoneNumberUtil::INVALID_LENGTH;
480 }
481 
482 // Helper method to check a number against possible lengths for this region,
483 // based on the metadata being passed in, and determine whether it matches, or
484 // is too short or too long.
TestNumberLength(const string & number,const PhoneMetadata & metadata)485 PhoneNumberUtil::ValidationResult TestNumberLength(
486     const string& number, const PhoneMetadata& metadata) {
487   return TestNumberLength(number, metadata, PhoneNumberUtil::UNKNOWN);
488 }
489 
490 // Returns a new phone number containing only the fields needed to uniquely
491 // identify a phone number, rather than any fields that capture the context in
492 // which the phone number was created.
493 // These fields correspond to those set in Parse() rather than
494 // ParseAndKeepRawInput().
CopyCoreFieldsOnly(const PhoneNumber & number,PhoneNumber * pruned_number)495 void CopyCoreFieldsOnly(const PhoneNumber& number, PhoneNumber* pruned_number) {
496   pruned_number->set_country_code(number.country_code());
497   pruned_number->set_national_number(number.national_number());
498   if (!number.extension().empty()) {
499     pruned_number->set_extension(number.extension());
500   }
501   if (number.italian_leading_zero()) {
502     pruned_number->set_italian_leading_zero(true);
503     // This field is only relevant if there are leading zeros at all.
504     pruned_number->set_number_of_leading_zeros(
505         number.number_of_leading_zeros());
506   }
507 }
508 
509 // Determines whether the given number is a national number match for the given
510 // PhoneNumberDesc. Does not check against possible lengths!
IsMatch(const MatcherApi & matcher_api,const string & number,const PhoneNumberDesc & desc)511 bool IsMatch(const MatcherApi& matcher_api,
512              const string& number, const PhoneNumberDesc& desc) {
513   return matcher_api.MatchNationalNumber(number, desc, false);
514 }
515 
516 }  // namespace
517 
SetLogger(Logger * logger)518 void PhoneNumberUtil::SetLogger(Logger* logger) {
519   logger_.reset(logger);
520   Logger::set_logger_impl(logger_.get());
521 }
522 
523 class PhoneNumberRegExpsAndMappings {
524  private:
InitializeMapsAndSets()525   void InitializeMapsAndSets() {
526     diallable_char_mappings_.insert(std::make_pair('+', '+'));
527     diallable_char_mappings_.insert(std::make_pair('*', '*'));
528     diallable_char_mappings_.insert(std::make_pair('#', '#'));
529     // Here we insert all punctuation symbols that we wish to respect when
530     // formatting alpha numbers, as they show the intended number groupings.
531     all_plus_number_grouping_symbols_.insert(
532         std::make_pair(ToUnicodeCodepoint("-"), '-'));
533     all_plus_number_grouping_symbols_.insert(
534         std::make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "-" */), '-'));
535     all_plus_number_grouping_symbols_.insert(
536         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "‐" */), '-'));
537     all_plus_number_grouping_symbols_.insert(
538         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "‑" */), '-'));
539     all_plus_number_grouping_symbols_.insert(
540         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "‒" */), '-'));
541     all_plus_number_grouping_symbols_.insert(
542         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "–" */), '-'));
543     all_plus_number_grouping_symbols_.insert(
544         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "—" */), '-'));
545     all_plus_number_grouping_symbols_.insert(
546         std::make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "―" */), '-'));
547     all_plus_number_grouping_symbols_.insert(
548         std::make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "−" */), '-'));
549     all_plus_number_grouping_symbols_.insert(
550         std::make_pair(ToUnicodeCodepoint("/"), '/'));
551     all_plus_number_grouping_symbols_.insert(
552         std::make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "/" */), '/'));
553     all_plus_number_grouping_symbols_.insert(
554         std::make_pair(ToUnicodeCodepoint(" "), ' '));
555     all_plus_number_grouping_symbols_.insert(
556         std::make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* " " */), ' '));
557     all_plus_number_grouping_symbols_.insert(
558         std::make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
559     all_plus_number_grouping_symbols_.insert(
560         std::make_pair(ToUnicodeCodepoint("."), '.'));
561     all_plus_number_grouping_symbols_.insert(
562         std::make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "." */), '.'));
563     // Only the upper-case letters are added here - the lower-case versions are
564     // added programmatically.
565     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("A"), '2'));
566     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("B"), '2'));
567     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("C"), '2'));
568     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("D"), '3'));
569     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("E"), '3'));
570     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("F"), '3'));
571     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("G"), '4'));
572     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("H"), '4'));
573     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("I"), '4'));
574     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("J"), '5'));
575     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("K"), '5'));
576     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("L"), '5'));
577     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("M"), '6'));
578     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("N"), '6'));
579     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("O"), '6'));
580     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("P"), '7'));
581     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("Q"), '7'));
582     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("R"), '7'));
583     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("S"), '7'));
584     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("T"), '8'));
585     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("U"), '8'));
586     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("V"), '8'));
587     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("W"), '9'));
588     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("X"), '9'));
589     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("Y"), '9'));
590     alpha_mappings_.insert(std::make_pair(ToUnicodeCodepoint("Z"), '9'));
591     std::map<char32, char> lower_case_mappings;
592     std::map<char32, char> alpha_letters;
593     for (std::map<char32, char>::const_iterator it = alpha_mappings_.begin();
594          it != alpha_mappings_.end();
595          ++it) {
596       // Convert all the upper-case ASCII letters to lower-case.
597       if (it->first < 128) {
598         char letter_as_upper = static_cast<char>(it->first);
599         char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper));
600         lower_case_mappings.insert(std::make_pair(letter_as_lower, it->second));
601         // Add the letters in both variants to the alpha_letters map. This just
602         // pairs each letter with its upper-case representation so that it can
603         // be retained when normalising alpha numbers.
604         alpha_letters.insert(std::make_pair(letter_as_lower, letter_as_upper));
605         alpha_letters.insert(std::make_pair(it->first, letter_as_upper));
606       }
607     }
608     // In the Java version we don't insert the lower-case mappings in the map,
609     // because we convert to upper case on the fly. Doing this here would
610     // involve pulling in all of ICU, which we don't want to do if we don't have
611     // to.
612     alpha_mappings_.insert(lower_case_mappings.begin(),
613                            lower_case_mappings.end());
614     alpha_phone_mappings_.insert(alpha_mappings_.begin(),
615                                  alpha_mappings_.end());
616     all_plus_number_grouping_symbols_.insert(alpha_letters.begin(),
617                                              alpha_letters.end());
618     // Add the ASCII digits so that they don't get deleted by NormalizeHelper().
619     for (char c = '0'; c <= '9'; ++c) {
620       diallable_char_mappings_.insert(std::make_pair(c, c));
621       alpha_phone_mappings_.insert(std::make_pair(c, c));
622       all_plus_number_grouping_symbols_.insert(std::make_pair(c, c));
623     }
624 
625     mobile_token_mappings_.insert(std::make_pair(54, '9'));
626     geo_mobile_countries_without_mobile_area_codes_.insert(86);  // China
627     geo_mobile_countries_.insert(52);  // Mexico
628     geo_mobile_countries_.insert(54);  // Argentina
629     geo_mobile_countries_.insert(55);  // Brazil
630     // Indonesia: some prefixes only (fixed CMDA wireless)
631     geo_mobile_countries_.insert(62);
632     geo_mobile_countries_.insert(
633         geo_mobile_countries_without_mobile_area_codes_.begin(),
634         geo_mobile_countries_without_mobile_area_codes_.end());
635   }
636 
637   // Regular expression of viable phone numbers. This is location independent.
638   // Checks we have at least three leading digits, and only valid punctuation,
639   // alpha characters and digits in the phone number. Does not include extension
640   // data. The symbol 'x' is allowed here as valid punctuation since it is often
641   // used as a placeholder for carrier codes, for example in Brazilian phone
642   // numbers. We also allow multiple plus-signs at the start.
643   // Corresponds to the following:
644   // [digits]{minLengthNsn}|
645   // plus_sign*(([punctuation]|[star])*[digits]){3,}
646   // ([punctuation]|[star]|[digits]|[alpha])*
647   //
648   // The first reg-ex is to allow short numbers (two digits long) to be parsed
649   // if they are entered as "15" etc, but only if there is no punctuation in
650   // them. The second expression restricts the number of digits to three or
651   // more, but then allows them to be in international form, and to have
652   // alpha-characters and punctuation.
653   const string valid_phone_number_;
654 
655   // Regexp of all possible ways to write extensions, for use when parsing. This
656   // will be run as a case-insensitive regexp match. Wide character versions are
657   // also provided after each ASCII version.
658   // For parsing, we are slightly more lenient in our interpretation than for
659   // matching. Here we allow "comma" and "semicolon" as possible extension
660   // indicators. When matching, these are hardly ever used to indicate this.
661   const string extn_patterns_for_parsing_;
662 
663   // Regular expressions of different parts of the phone-context parameter,
664   // following the syntax defined in RFC3966.
665   const std::string rfc3966_phone_digit_;
666   const std::string alphanum_;
667   const std::string rfc3966_domainlabel_;
668   const std::string rfc3966_toplabel_;
669 
670  public:
671   scoped_ptr<const AbstractRegExpFactory> regexp_factory_;
672   scoped_ptr<RegExpCache> regexp_cache_;
673 
674   // A map that contains characters that are essential when dialling. That means
675   // any of the characters in this map must not be removed from a number when
676   // dialing, otherwise the call will not reach the intended destination.
677   std::map<char32, char> diallable_char_mappings_;
678   // These mappings map a character (key) to a specific digit that should
679   // replace it for normalization purposes.
680   std::map<char32, char> alpha_mappings_;
681   // For performance reasons, store a map of combining alpha_mappings with ASCII
682   // digits.
683   std::map<char32, char> alpha_phone_mappings_;
684 
685   // Separate map of all symbols that we wish to retain when formatting alpha
686   // numbers. This includes digits, ascii letters and number grouping symbols
687   // such as "-" and " ".
688   std::map<char32, char> all_plus_number_grouping_symbols_;
689 
690   // Map of country calling codes that use a mobile token before the area code.
691   // One example of when this is relevant is when determining the length of the
692   // national destination code, which should be the length of the area code plus
693   // the length of the mobile token.
694   std::map<int, char> mobile_token_mappings_;
695 
696   // Set of country codes that have geographically assigned mobile numbers (see
697   // geo_mobile_countries_ below) which are not based on *area codes*. For
698   // example, in China mobile numbers start with a carrier indicator, and beyond
699   // that are geographically assigned: this carrier indicator is not considered
700   // to be an area code.
701   std::set<int> geo_mobile_countries_without_mobile_area_codes_;
702 
703   // Set of country calling codes that have geographically assigned mobile
704   // numbers. This may not be complete; we add calling codes case by case, as we
705   // find geographical mobile numbers or hear from user reports.
706   std::set<int> geo_mobile_countries_;
707 
708   // Pattern that makes it easy to distinguish whether a region has a single
709   // international dialing prefix or not. If a region has a single international
710   // prefix (e.g. 011 in USA), it will be represented as a string that contains
711   // a sequence of ASCII digits, and possibly a tilde, which signals waiting for
712   // the tone. If there are multiple available international prefixes in a
713   // region, they will be represented as a regex string that always contains one
714   // or more characters that are not ASCII digits or a tilde.
715   scoped_ptr<const RegExp> single_international_prefix_;
716 
717   scoped_ptr<const RegExp> digits_pattern_;
718   scoped_ptr<const RegExp> capturing_digit_pattern_;
719   scoped_ptr<const RegExp> capturing_ascii_digits_pattern_;
720 
721   // Regular expression of acceptable characters that may start a phone number
722   // for the purposes of parsing. This allows us to strip away meaningless
723   // prefixes to phone numbers that may be mistakenly given to us. This consists
724   // of digits, the plus symbol and arabic-indic digits. This does not contain
725   // alpha characters, although they may be used later in the number. It also
726   // does not include other punctuation, as this will be stripped later during
727   // parsing and is of no information value when parsing a number. The string
728   // starting with this valid character is captured.
729   // This corresponds to VALID_START_CHAR in the java version.
730   scoped_ptr<const RegExp> valid_start_char_pattern_;
731 
732   // Regular expression of valid characters before a marker that might indicate
733   // a second number.
734   scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern_;
735 
736   // Regular expression of trailing characters that we want to remove. We remove
737   // all characters that are not alpha or numerical characters. The hash
738   // character is retained here, as it may signify the previous block was an
739   // extension. Note the capturing block at the start to capture the rest of the
740   // number if this was a match.
741   // This corresponds to UNWANTED_END_CHAR_PATTERN in the java version.
742   scoped_ptr<const RegExp> unwanted_end_char_pattern_;
743 
744   // Regular expression of groups of valid punctuation characters.
745   scoped_ptr<const RegExp> separator_pattern_;
746 
747   // Regexp of all possible ways to write extensions, for use when finding phone
748   // numbers in text. This will be run as a case-insensitive regexp match. Wide
749   // character versions are also provided after each ASCII version.
750   const string extn_patterns_for_matching_;
751 
752   // Regexp of all known extension prefixes used by different regions followed
753   // by 1 or more valid digits, for use when parsing.
754   scoped_ptr<const RegExp> extn_pattern_;
755 
756   // We append optionally the extension pattern to the end here, as a valid
757   // phone number may have an extension prefix appended, followed by 1 or more
758   // digits.
759   scoped_ptr<const RegExp> valid_phone_number_pattern_;
760 
761   // We use this pattern to check if the phone number has at least three letters
762   // in it - if so, then we treat it as a number where some phone-number digits
763   // are represented by letters.
764   scoped_ptr<const RegExp> valid_alpha_phone_pattern_;
765 
766   scoped_ptr<const RegExp> first_group_capturing_pattern_;
767 
768   scoped_ptr<const RegExp> carrier_code_pattern_;
769 
770   scoped_ptr<const RegExp> plus_chars_pattern_;
771 
772   // Regular expression of valid global-number-digits for the phone-context
773   // parameter, following the syntax defined in RFC3966.
774   std::unique_ptr<const RegExp> rfc3966_global_number_digits_pattern_;
775 
776   // Regular expression of valid domainname for the phone-context parameter,
777   // following the syntax defined in RFC3966.
778   std::unique_ptr<const RegExp> rfc3966_domainname_pattern_;
779 
PhoneNumberRegExpsAndMappings()780   PhoneNumberRegExpsAndMappings()
781       : valid_phone_number_(
782             StrCat(kDigits, "{", PhoneNumberUtil::kMinLengthForNsn, "}|[",
783                    PhoneNumberUtil::kPlusChars, "]*(?:[",
784                    PhoneNumberUtil::kValidPunctuation, kStarSign, "]*",
785                    kDigits, "){3,}[", PhoneNumberUtil::kValidPunctuation,
786                    kStarSign, kValidAlpha, kDigits, "]*")),
787         extn_patterns_for_parsing_(CreateExtnPattern(/* for_parsing= */ true)),
788         rfc3966_phone_digit_(
789             StrCat("(", kDigits, "|", kRfc3966VisualSeparator, ")")),
790         alphanum_(StrCat(kValidAlphaInclUppercase, kDigits)),
791         rfc3966_domainlabel_(
792             StrCat("[", alphanum_, "]+((\\-)*[", alphanum_, "])*")),
793         rfc3966_toplabel_(StrCat("[", kValidAlphaInclUppercase,
794                                  "]+((\\-)*[", alphanum_, "])*")),
795         regexp_factory_(new RegExpFactory()),
796         regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)),
797         diallable_char_mappings_(),
798         alpha_mappings_(),
799         alpha_phone_mappings_(),
800         all_plus_number_grouping_symbols_(),
801         mobile_token_mappings_(),
802         geo_mobile_countries_without_mobile_area_codes_(),
803         geo_mobile_countries_(),
804         single_international_prefix_(regexp_factory_->CreateRegExp(
805             /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
806             "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")),
807         digits_pattern_(
808             regexp_factory_->CreateRegExp(StrCat("[", kDigits, "]*"))),
809         capturing_digit_pattern_(
810             regexp_factory_->CreateRegExp(StrCat("([", kDigits, "])"))),
811         capturing_ascii_digits_pattern_(
812             regexp_factory_->CreateRegExp("(\\d+)")),
813         valid_start_char_pattern_(regexp_factory_->CreateRegExp(
814             StrCat("[", PhoneNumberUtil::kPlusChars, kDigits, "]"))),
815         capture_up_to_second_number_start_pattern_(
816             regexp_factory_->CreateRegExp(
817                 PhoneNumberUtil::kCaptureUpToSecondNumberStart)),
818         unwanted_end_char_pattern_(
819             regexp_factory_->CreateRegExp("[^\\p{N}\\p{L}#]")),
820         separator_pattern_(regexp_factory_->CreateRegExp(
821             StrCat("[", PhoneNumberUtil::kValidPunctuation, "]+"))),
822         extn_patterns_for_matching_(
823             CreateExtnPattern(/* for_parsing= */ false)),
824         extn_pattern_(regexp_factory_->CreateRegExp(
825             StrCat("(?i)(?:", extn_patterns_for_parsing_, ")$"))),
826         valid_phone_number_pattern_(regexp_factory_->CreateRegExp(
827             StrCat("(?i)", valid_phone_number_,
828                    "(?:", extn_patterns_for_parsing_, ")?"))),
829         valid_alpha_phone_pattern_(regexp_factory_->CreateRegExp(
830             StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))),
831         // The first_group_capturing_pattern was originally set to $1 but there
832         // are some countries for which the first group is not used in the
833         // national pattern (e.g. Argentina) so the $1 group does not match
834         // correctly. Therefore, we use \d, so that the first group actually
835         // used in the pattern will be matched.
836         first_group_capturing_pattern_(
837             regexp_factory_->CreateRegExp("(\\$\\d)")),
838         carrier_code_pattern_(regexp_factory_->CreateRegExp("\\$CC")),
839         plus_chars_pattern_(regexp_factory_->CreateRegExp(
840             StrCat("[", PhoneNumberUtil::kPlusChars, "]+"))),
841         rfc3966_global_number_digits_pattern_(regexp_factory_->CreateRegExp(
842             StrCat("^\\", kPlusSign, rfc3966_phone_digit_, "*", kDigits,
843                    rfc3966_phone_digit_, "*$"))),
844         rfc3966_domainname_pattern_(regexp_factory_->CreateRegExp(StrCat(
845             "^(", rfc3966_domainlabel_, "\\.)*", rfc3966_toplabel_, "\\.?$"))) {
846     InitializeMapsAndSets();
847   }
848 
849  // This type is neither copyable nor movable.
850   PhoneNumberRegExpsAndMappings(const PhoneNumberRegExpsAndMappings&) = delete;
851   PhoneNumberRegExpsAndMappings& operator=(
852       const PhoneNumberRegExpsAndMappings&) = delete;
853 };
854 
855 // Private constructor. Also takes care of initialisation.
PhoneNumberUtil()856 PhoneNumberUtil::PhoneNumberUtil()
857     : logger_(Logger::set_logger_impl(new NullLogger())),
858       matcher_api_(new RegexBasedMatcher()),
859       reg_exps_(new PhoneNumberRegExpsAndMappings),
860       country_calling_code_to_region_code_map_(
861           new std::vector<IntRegionsPair>()),
862       nanpa_regions_(new std::set<string>()),
863       region_to_metadata_map_(new std::map<string, PhoneMetadata>()),
864       country_code_to_non_geographical_metadata_map_(
865           new std::map<int, PhoneMetadata>) {
866   Logger::set_logger_impl(logger_.get());
867   // TODO: Update the java version to put the contents of the init
868   // method inside the constructor as well to keep both in sync.
869   PhoneMetadataCollection metadata_collection;
870   if (!LoadCompiledInMetadata(&metadata_collection)) {
871     LOG(DFATAL) << "Could not parse compiled-in metadata.";
872     return;
873   }
874   // Storing data in a temporary map to make it easier to find other regions
875   // that share a country calling code when inserting data.
876   std::map<int, std::list<string>* > country_calling_code_to_region_map;
877   for (RepeatedPtrField<PhoneMetadata>::const_iterator it =
878            metadata_collection.metadata().begin();
879        it != metadata_collection.metadata().end();
880        ++it) {
881     const string& region_code = it->id();
882     if (region_code == RegionCode::GetUnknown()) {
883       continue;
884     }
885 
886     int country_calling_code = it->country_code();
887     if (kRegionCodeForNonGeoEntity == region_code) {
888       country_code_to_non_geographical_metadata_map_->insert(std::make_pair(country_calling_code, *it));
889     } else {
890       region_to_metadata_map_->insert(std::make_pair(region_code, *it));
891     }
892     std::map<int, std::list<string>* >::iterator calling_code_in_map =
893         country_calling_code_to_region_map.find(country_calling_code);
894     if (calling_code_in_map != country_calling_code_to_region_map.end()) {
895       if (it->main_country_for_code()) {
896         calling_code_in_map->second->push_front(region_code);
897       } else {
898         calling_code_in_map->second->push_back(region_code);
899       }
900     } else {
901       // For most country calling codes, there will be only one region code.
902       std::list<string>* list_with_region_code = new std::list<string>();
903       list_with_region_code->push_back(region_code);
904       country_calling_code_to_region_map.insert(
905           std::make_pair(country_calling_code, list_with_region_code));
906     }
907     if (country_calling_code == kNanpaCountryCode) {
908         nanpa_regions_->insert(region_code);
909     }
910   }
911 
912   country_calling_code_to_region_code_map_->insert(
913       country_calling_code_to_region_code_map_->begin(),
914       country_calling_code_to_region_map.begin(),
915       country_calling_code_to_region_map.end());
916   // Sort all the pairs in ascending order according to country calling code.
917   std::sort(country_calling_code_to_region_code_map_->begin(),
918             country_calling_code_to_region_code_map_->end(), OrderByFirst());
919 
920 #ifdef LIBPHONENUMBER_UPGRADE
921   UpdateLibphonenumber::LoadUpdateData();
922   UpdateMetadata::UpdatePhoneNumber(country_code_to_non_geographical_metadata_map_, region_to_metadata_map_);
923 #endif
924 }
925 
~PhoneNumberUtil()926 PhoneNumberUtil::~PhoneNumberUtil() {
927   gtl::STLDeleteContainerPairSecondPointers(
928       country_calling_code_to_region_code_map_->begin(),
929       country_calling_code_to_region_code_map_->end());
930 }
931 
GetSupportedRegions(std::set<string> * regions) const932 void PhoneNumberUtil::GetSupportedRegions(std::set<string>* regions)
933     const {
934   DCHECK(regions);
935   for (std::map<string, PhoneMetadata>::const_iterator it =
936        region_to_metadata_map_->begin(); it != region_to_metadata_map_->end();
937        ++it) {
938     regions->insert(it->first);
939   }
940 }
941 
GetSupportedGlobalNetworkCallingCodes(std::set<int> * calling_codes) const942 void PhoneNumberUtil::GetSupportedGlobalNetworkCallingCodes(
943     std::set<int>* calling_codes) const {
944   DCHECK(calling_codes);
945   for (std::map<int, PhoneMetadata>::const_iterator it =
946            country_code_to_non_geographical_metadata_map_->begin();
947        it != country_code_to_non_geographical_metadata_map_->end(); ++it) {
948     calling_codes->insert(it->first);
949   }
950 }
951 
GetSupportedCallingCodes(std::set<int> * calling_codes) const952 void PhoneNumberUtil::GetSupportedCallingCodes(
953     std::set<int>* calling_codes) const {
954   DCHECK(calling_codes);
955   for (std::vector<IntRegionsPair>::const_iterator it =
956            country_calling_code_to_region_code_map_->begin();
957        it != country_calling_code_to_region_code_map_->end(); ++it) {
958     calling_codes->insert(it->first);
959   }
960 }
961 
GetSupportedTypesForRegion(const string & region_code,std::set<PhoneNumberType> * types) const962 void PhoneNumberUtil::GetSupportedTypesForRegion(
963     const string& region_code,
964     std::set<PhoneNumberType>* types) const {
965   DCHECK(types);
966   if (!IsValidRegionCode(region_code)) {
967     LOG(WARNING) << "Invalid or unknown region code provided: " << region_code;
968     return;
969   }
970   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
971   GetSupportedTypesForMetadata(*metadata, types);
972 }
973 
GetSupportedTypesForNonGeoEntity(int country_calling_code,std::set<PhoneNumberType> * types) const974 void PhoneNumberUtil::GetSupportedTypesForNonGeoEntity(
975     int country_calling_code,
976     std::set<PhoneNumberType>* types) const {
977   DCHECK(types);
978   const PhoneMetadata* metadata =
979       GetMetadataForNonGeographicalRegion(country_calling_code);
980   if (metadata == NULL) {
981     LOG(WARNING) << "Unknown country calling code for a non-geographical "
982                  << "entity provided: "
983                  << country_calling_code;
984     return;
985   }
986   GetSupportedTypesForMetadata(*metadata, types);
987 }
988 
989 // Public wrapper function to get a PhoneNumberUtil instance with the default
990 // metadata file.
991 // static
GetInstance()992 PhoneNumberUtil* PhoneNumberUtil::GetInstance() {
993   return Singleton<PhoneNumberUtil>::GetInstance();
994 }
995 
GetExtnPatternsForMatching() const996 const string& PhoneNumberUtil::GetExtnPatternsForMatching() const {
997   return reg_exps_->extn_patterns_for_matching_;
998 }
999 
StartsWithPlusCharsPattern(const string & number) const1000 bool PhoneNumberUtil::StartsWithPlusCharsPattern(const string& number)
1001     const {
1002   const scoped_ptr<RegExpInput> number_string_piece(
1003       reg_exps_->regexp_factory_->CreateInput(number));
1004   return reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get());
1005 }
1006 
ContainsOnlyValidDigits(const string & s) const1007 bool PhoneNumberUtil::ContainsOnlyValidDigits(const string& s) const {
1008   return reg_exps_->digits_pattern_->FullMatch(s);
1009 }
1010 
TrimUnwantedEndChars(string * number) const1011 void PhoneNumberUtil::TrimUnwantedEndChars(string* number) const {
1012   DCHECK(number);
1013   UnicodeText number_as_unicode;
1014   number_as_unicode.PointToUTF8(number->data(), static_cast<int>(number->size()));
1015   if (!number_as_unicode.UTF8WasValid()) {
1016     // The input wasn't valid UTF-8. Produce an empty string to indicate an error.
1017     number->clear();
1018     return;
1019   }
1020   char current_char[5];
1021   int len;
1022   UnicodeText::const_reverse_iterator reverse_it(number_as_unicode.end());
1023   for (; reverse_it.base() != number_as_unicode.begin(); ++reverse_it) {
1024     len = reverse_it.get_utf8(current_char);
1025     current_char[len] = '\0';
1026     if (!reg_exps_->unwanted_end_char_pattern_->FullMatch(current_char)) {
1027       break;
1028     }
1029   }
1030 
1031   number->assign(UnicodeText::UTF8Substring(number_as_unicode.begin(),
1032                                             reverse_it.base()));
1033 }
1034 
IsFormatEligibleForAsYouTypeFormatter(const string & format) const1035 bool PhoneNumberUtil::IsFormatEligibleForAsYouTypeFormatter(
1036     const string& format) const {
1037   // A pattern that is used to determine if a numberFormat under
1038   // availableFormats is eligible to be used by the AYTF. It is eligible when
1039   // the format element under numberFormat contains groups of the dollar sign
1040   // followed by a single digit, separated by valid phone number punctuation.
1041   // This prevents invalid punctuation (such as the star sign in Israeli star
1042   // numbers) getting into the output of the AYTF. We require that the first
1043   // group is present in the output pattern to ensure no data is lost while
1044   // formatting; when we format as you type, this should always be the case.
1045   const RegExp& eligible_format_pattern = reg_exps_->regexp_cache_->GetRegExp(
1046       StrCat("[", kValidPunctuation, "]*", "\\$1",
1047              "[", kValidPunctuation, "]*", "(\\$\\d",
1048              "[", kValidPunctuation, "]*)*"));
1049   return eligible_format_pattern.FullMatch(format);
1050 }
1051 
FormattingRuleHasFirstGroupOnly(const string & national_prefix_formatting_rule) const1052 bool PhoneNumberUtil::FormattingRuleHasFirstGroupOnly(
1053     const string& national_prefix_formatting_rule) const {
1054   // A pattern that is used to determine if the national prefix formatting rule
1055   // has the first group only, i.e., does not start with the national prefix.
1056   // Note that the pattern explicitly allows for unbalanced parentheses.
1057   const RegExp& first_group_only_prefix_pattern =
1058       reg_exps_->regexp_cache_->GetRegExp("\\(?\\$1\\)?");
1059   return national_prefix_formatting_rule.empty() ||
1060       first_group_only_prefix_pattern.FullMatch(
1061           national_prefix_formatting_rule);
1062 }
1063 
GetNddPrefixForRegion(const string & region_code,bool strip_non_digits,string * national_prefix) const1064 void PhoneNumberUtil::GetNddPrefixForRegion(const string& region_code,
1065                                             bool strip_non_digits,
1066                                             string* national_prefix) const {
1067   DCHECK(national_prefix);
1068   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
1069   if (!metadata) {
1070     LOG(WARNING) << "Invalid or unknown region code (" << region_code
1071                  << ") provided.";
1072     return;
1073   }
1074   national_prefix->assign(metadata->national_prefix());
1075   if (strip_non_digits) {
1076     // Note: if any other non-numeric symbols are ever used in national
1077     // prefixes, these would have to be removed here as well.
1078     strrmm(national_prefix, "~");
1079   }
1080 }
1081 
IsValidRegionCode(const string & region_code) const1082 bool PhoneNumberUtil::IsValidRegionCode(const string& region_code) const {
1083   return (region_to_metadata_map_->find(region_code) !=
1084           region_to_metadata_map_->end());
1085 }
1086 
HasValidCountryCallingCode(int country_calling_code) const1087 bool PhoneNumberUtil::HasValidCountryCallingCode(
1088     int country_calling_code) const {
1089   // Create an IntRegionsPair with the country_code passed in, and use it to
1090   // locate the pair with the same country_code in the sorted vector.
1091   IntRegionsPair target_pair;
1092   target_pair.first = country_calling_code;
1093   return (std::binary_search(country_calling_code_to_region_code_map_->begin(),
1094                              country_calling_code_to_region_code_map_->end(),
1095                              target_pair, OrderByFirst()));
1096 }
1097 
1098 // Returns a pointer to the phone metadata for the appropriate region or NULL
1099 // if the region code is invalid or unknown.
GetMetadataForRegion(const string & region_code) const1100 const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegion(
1101     const string& region_code) const {
1102   std::map<string, PhoneMetadata>::const_iterator it =
1103       region_to_metadata_map_->find(region_code);
1104   if (it != region_to_metadata_map_->end()) {
1105     return &it->second;
1106   }
1107   return NULL;
1108 }
1109 
GetMetadataForNonGeographicalRegion(int country_calling_code) const1110 const PhoneMetadata* PhoneNumberUtil::GetMetadataForNonGeographicalRegion(
1111     int country_calling_code) const {
1112   std::map<int, PhoneMetadata>::const_iterator it =
1113       country_code_to_non_geographical_metadata_map_->find(
1114           country_calling_code);
1115   if (it != country_code_to_non_geographical_metadata_map_->end()) {
1116     return &it->second;
1117   }
1118   return NULL;
1119 }
1120 
Format(const PhoneNumber & number,PhoneNumberFormat number_format,string * formatted_number) const1121 void PhoneNumberUtil::Format(const PhoneNumber& number,
1122                              PhoneNumberFormat number_format,
1123                              string* formatted_number) const {
1124   DCHECK(formatted_number);
1125   if (number.national_number() == 0) {
1126     const string& raw_input = number.raw_input();
1127     if (!raw_input.empty()) {
1128       // Unparseable numbers that kept their raw input just use that.
1129       // This is the only case where a number can be formatted as E164 without a
1130       // leading '+' symbol (but the original number wasn't parseable anyway).
1131       // TODO: Consider removing the 'if' above so that unparseable
1132       // strings without raw input format to the empty string instead of "+00".
1133       formatted_number->assign(raw_input);
1134       return;
1135     }
1136   }
1137   int country_calling_code = number.country_code();
1138   string national_significant_number;
1139   GetNationalSignificantNumber(number, &national_significant_number);
1140   if (number_format == E164) {
1141     // Early exit for E164 case (even if the country calling code is invalid)
1142     // since no formatting of the national number needs to be applied.
1143     // Extensions are not formatted.
1144     formatted_number->assign(national_significant_number);
1145     PrefixNumberWithCountryCallingCode(country_calling_code, E164,
1146                                        formatted_number);
1147     return;
1148   }
1149   if (!HasValidCountryCallingCode(country_calling_code)) {
1150     formatted_number->assign(national_significant_number);
1151     return;
1152   }
1153   // Note here that all NANPA formatting rules are contained by US, so we use
1154   // that to format NANPA numbers. The same applies to Russian Fed regions -
1155   // rules are contained by Russia. French Indian Ocean country rules are
1156   // contained by Réunion.
1157   string region_code;
1158   GetRegionCodeForCountryCode(country_calling_code, &region_code);
1159   // Metadata cannot be NULL because the country calling code is valid (which
1160   // means that the region code cannot be ZZ and must be one of our supported
1161   // region codes).
1162   const PhoneMetadata* metadata =
1163       GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
1164   FormatNsn(national_significant_number, *metadata, number_format,
1165             formatted_number);
1166   MaybeAppendFormattedExtension(number, *metadata, number_format,
1167                                 formatted_number);
1168   PrefixNumberWithCountryCallingCode(country_calling_code, number_format,
1169                                      formatted_number);
1170 }
1171 
FormatByPattern(const PhoneNumber & number,PhoneNumberFormat number_format,const RepeatedPtrField<NumberFormat> & user_defined_formats,string * formatted_number) const1172 void PhoneNumberUtil::FormatByPattern(
1173     const PhoneNumber& number,
1174     PhoneNumberFormat number_format,
1175     const RepeatedPtrField<NumberFormat>& user_defined_formats,
1176     string* formatted_number) const {
1177   DCHECK(formatted_number);
1178   int country_calling_code = number.country_code();
1179   // Note GetRegionCodeForCountryCode() is used because formatting information
1180   // for regions which share a country calling code is contained by only one
1181   // region for performance reasons. For example, for NANPA regions it will be
1182   // contained in the metadata for US.
1183   string national_significant_number;
1184   GetNationalSignificantNumber(number, &national_significant_number);
1185   if (!HasValidCountryCallingCode(country_calling_code)) {
1186     formatted_number->assign(national_significant_number);
1187     return;
1188   }
1189   string region_code;
1190   GetRegionCodeForCountryCode(country_calling_code, &region_code);
1191   // Metadata cannot be NULL because the country calling code is valid.
1192   const PhoneMetadata* metadata =
1193       GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
1194   const NumberFormat* formatting_pattern =
1195       ChooseFormattingPatternForNumber(user_defined_formats,
1196                                        national_significant_number);
1197   if (!formatting_pattern) {
1198     // If no pattern above is matched, we format the number as a whole.
1199     formatted_number->assign(national_significant_number);
1200   } else {
1201     NumberFormat num_format_copy;
1202     // Before we do a replacement of the national prefix pattern $NP with the
1203     // national prefix, we need to copy the rule so that subsequent replacements
1204     // for different numbers have the appropriate national prefix.
1205     num_format_copy.MergeFrom(*formatting_pattern);
1206     string national_prefix_formatting_rule(
1207         formatting_pattern->national_prefix_formatting_rule());
1208     if (!national_prefix_formatting_rule.empty()) {
1209       const string& national_prefix = metadata->national_prefix();
1210       if (!national_prefix.empty()) {
1211         // Replace $NP with national prefix and $FG with the first group ($1).
1212         GlobalReplaceSubstring("$NP", national_prefix,
1213                             &national_prefix_formatting_rule);
1214         GlobalReplaceSubstring("$FG", "$1", &national_prefix_formatting_rule);
1215         num_format_copy.set_national_prefix_formatting_rule(
1216             national_prefix_formatting_rule);
1217       } else {
1218         // We don't want to have a rule for how to format the national prefix if
1219         // there isn't one.
1220         num_format_copy.clear_national_prefix_formatting_rule();
1221       }
1222     }
1223     FormatNsnUsingPattern(national_significant_number, num_format_copy,
1224                           number_format, formatted_number);
1225   }
1226   MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number);
1227   PrefixNumberWithCountryCallingCode(country_calling_code, number_format,
1228                                      formatted_number);
1229 }
1230 
FormatNationalNumberWithCarrierCode(const PhoneNumber & number,const string & carrier_code,string * formatted_number) const1231 void PhoneNumberUtil::FormatNationalNumberWithCarrierCode(
1232     const PhoneNumber& number,
1233     const string& carrier_code,
1234     string* formatted_number) const {
1235   int country_calling_code = number.country_code();
1236   string national_significant_number;
1237   GetNationalSignificantNumber(number, &national_significant_number);
1238   if (!HasValidCountryCallingCode(country_calling_code)) {
1239     formatted_number->assign(national_significant_number);
1240     return;
1241   }
1242 
1243   // Note GetRegionCodeForCountryCode() is used because formatting information
1244   // for regions which share a country calling code is contained by only one
1245   // region for performance reasons. For example, for NANPA regions it will be
1246   // contained in the metadata for US.
1247   string region_code;
1248   GetRegionCodeForCountryCode(country_calling_code, &region_code);
1249   // Metadata cannot be NULL because the country calling code is valid.
1250   const PhoneMetadata* metadata =
1251       GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
1252   FormatNsnWithCarrier(national_significant_number, *metadata, NATIONAL,
1253                        carrier_code, formatted_number);
1254   MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number);
1255   PrefixNumberWithCountryCallingCode(country_calling_code, NATIONAL,
1256                                      formatted_number);
1257 }
1258 
GetMetadataForRegionOrCallingCode(int country_calling_code,const string & region_code) const1259 const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegionOrCallingCode(
1260       int country_calling_code, const string& region_code) const {
1261   return kRegionCodeForNonGeoEntity == region_code
1262       ? GetMetadataForNonGeographicalRegion(country_calling_code)
1263       : GetMetadataForRegion(region_code);
1264 }
1265 
FormatNationalNumberWithPreferredCarrierCode(const PhoneNumber & number,const string & fallback_carrier_code,string * formatted_number) const1266 void PhoneNumberUtil::FormatNationalNumberWithPreferredCarrierCode(
1267     const PhoneNumber& number,
1268     const string& fallback_carrier_code,
1269     string* formatted_number) const {
1270   FormatNationalNumberWithCarrierCode(
1271       number,
1272       // Historically, we set this to an empty string when parsing with raw
1273       // input if none was found in the input string. However, this doesn't
1274       // result in a number we can dial. For this reason, we treat the empty
1275       // string the same as if it isn't set at all.
1276       !number.preferred_domestic_carrier_code().empty()
1277           ? number.preferred_domestic_carrier_code()
1278           : fallback_carrier_code,
1279       formatted_number);
1280 }
1281 
FormatNumberForMobileDialing(const PhoneNumber & number,const string & calling_from,bool with_formatting,string * formatted_number) const1282 void PhoneNumberUtil::FormatNumberForMobileDialing(
1283     const PhoneNumber& number,
1284     const string& calling_from,
1285     bool with_formatting,
1286     string* formatted_number) const {
1287   int country_calling_code = number.country_code();
1288   if (!HasValidCountryCallingCode(country_calling_code)) {
1289     formatted_number->assign(number.has_raw_input() ? number.raw_input() : "");
1290     return;
1291   }
1292 
1293   formatted_number->assign("");
1294   // Clear the extension, as that part cannot normally be dialed together with
1295   // the main number.
1296   PhoneNumber number_no_extension(number);
1297   number_no_extension.clear_extension();
1298   string region_code;
1299   GetRegionCodeForCountryCode(country_calling_code, &region_code);
1300   PhoneNumberType number_type = GetNumberType(number_no_extension);
1301   bool is_valid_number = (number_type != UNKNOWN);
1302   if (calling_from == region_code) {
1303     bool is_fixed_line_or_mobile =
1304         (number_type == FIXED_LINE) || (number_type == MOBILE) ||
1305         (number_type == FIXED_LINE_OR_MOBILE);
1306     // Carrier codes may be needed in some countries. We handle this here.
1307     if ((region_code == "BR") && (is_fixed_line_or_mobile)) {
1308       // Historically, we set this to an empty string when parsing with raw
1309       // input if none was found in the input string. However, this doesn't
1310       // result in a number we can dial. For this reason, we treat the empty
1311       // string the same as if it isn't set at all.
1312       if (!number_no_extension.preferred_domestic_carrier_code().empty()) {
1313         FormatNationalNumberWithPreferredCarrierCode(number_no_extension, "",
1314                                                      formatted_number);
1315       } else {
1316         // Brazilian fixed line and mobile numbers need to be dialed with a
1317         // carrier code when called within Brazil. Without that, most of the
1318         // carriers won't connect the call. Because of that, we return an empty
1319         // string here.
1320         formatted_number->assign("");
1321       }
1322     } else if (country_calling_code == kNanpaCountryCode) {
1323       // For NANPA countries, we output international format for numbers that
1324       // can be dialed internationally, since that always works, except for
1325       // numbers which might potentially be short numbers, which are always
1326       // dialled in national format.
1327       const PhoneMetadata* region_metadata = GetMetadataForRegion(calling_from);
1328       string national_number;
1329       GetNationalSignificantNumber(number_no_extension, &national_number);
1330       if (CanBeInternationallyDialled(number_no_extension) &&
1331           TestNumberLength(national_number, *region_metadata) != TOO_SHORT) {
1332         Format(number_no_extension, INTERNATIONAL, formatted_number);
1333       } else {
1334         Format(number_no_extension, NATIONAL, formatted_number);
1335       }
1336     } else {
1337       // For non-geographical countries, and Mexican, Chilean and Uzbek fixed
1338       // line and mobile numbers, we output international format for numbers
1339       // that can be dialed internationally as that always works.
1340       if ((region_code == kRegionCodeForNonGeoEntity ||
1341            // MX fixed line and mobile numbers should always be formatted in
1342            // international format, even when dialed within MX. For national
1343            // format to work, a carrier code needs to be used, and the correct
1344            // carrier code depends on if the caller and callee are from the same
1345            // local area. It is trickier to get that to work correctly than
1346            // using international format, which is tested to work fine on all
1347            // carriers.
1348            // CL fixed line numbers need the national prefix when dialing in the
1349            // national format, but don't have it when used for display. The
1350            // reverse is true for mobile numbers. As a result, we output them in
1351            // the international format to make it work.
1352 	   // UZ mobile and fixed-line numbers have to be formatted in
1353            // international format or prefixed with special codes like 03, 04
1354            // (for fixed-line) and 05 (for mobile) for dialling successfully
1355            // from mobile devices. As we do not have complete information on
1356            // special codes and to be consistent with formatting across all
1357            // phone types we return the number in international format here.
1358            ((region_code == "MX" ||
1359              region_code == "CL" ||
1360              region_code == "UZ") &&
1361             is_fixed_line_or_mobile)) &&
1362           CanBeInternationallyDialled(number_no_extension)) {
1363         Format(number_no_extension, INTERNATIONAL, formatted_number);
1364       } else {
1365         Format(number_no_extension, NATIONAL, formatted_number);
1366       }
1367     }
1368   } else if (is_valid_number &&
1369       CanBeInternationallyDialled(number_no_extension)) {
1370     // We assume that short numbers are not diallable from outside their
1371     // region, so if a number is not a valid regular length phone number, we
1372     // treat it as if it cannot be internationally dialled.
1373     with_formatting
1374         ? Format(number_no_extension, INTERNATIONAL, formatted_number)
1375         : Format(number_no_extension, E164, formatted_number);
1376     return;
1377   }
1378   if (!with_formatting) {
1379     NormalizeDiallableCharsOnly(formatted_number);
1380   }
1381 }
1382 
FormatOutOfCountryCallingNumber(const PhoneNumber & number,const string & calling_from,string * formatted_number) const1383 void PhoneNumberUtil::FormatOutOfCountryCallingNumber(
1384     const PhoneNumber& number,
1385     const string& calling_from,
1386     string* formatted_number) const {
1387   DCHECK(formatted_number);
1388   if (!IsValidRegionCode(calling_from)) {
1389     VLOG(1) << "Trying to format number from invalid region " << calling_from
1390             << ". International formatting applied.";
1391     Format(number, INTERNATIONAL, formatted_number);
1392     return;
1393   }
1394   int country_code = number.country_code();
1395   string national_significant_number;
1396   GetNationalSignificantNumber(number, &national_significant_number);
1397   if (!HasValidCountryCallingCode(country_code)) {
1398     formatted_number->assign(national_significant_number);
1399     return;
1400   }
1401   if (country_code == kNanpaCountryCode) {
1402     if (IsNANPACountry(calling_from)) {
1403       // For NANPA regions, return the national format for these regions but
1404       // prefix it with the country calling code.
1405       Format(number, NATIONAL, formatted_number);
1406       formatted_number->insert(0, StrCat(country_code, " "));
1407       return;
1408     }
1409   } else if (country_code == GetCountryCodeForValidRegion(calling_from)) {
1410     // If neither region is a NANPA region, then we check to see if the
1411     // country calling code of the number and the country calling code of the
1412     // region we are calling from are the same.
1413     // For regions that share a country calling code, the country calling code
1414     // need not be dialled. This also applies when dialling within a region, so
1415     // this if clause covers both these cases.
1416     // Technically this is the case for dialling from la Réunion to other
1417     // overseas departments of France (French Guiana, Martinique, Guadeloupe),
1418     // but not vice versa - so we don't cover this edge case for now and for
1419     // those cases return the version including country calling code.
1420     // Details here:
1421     // http://www.petitfute.com/voyage/225-info-pratiques-reunion
1422     Format(number, NATIONAL, formatted_number);
1423     return;
1424   }
1425   // Metadata cannot be NULL because we checked 'IsValidRegionCode()' above.
1426   const PhoneMetadata* metadata_calling_from =
1427       GetMetadataForRegion(calling_from);
1428   const string& international_prefix =
1429       metadata_calling_from->international_prefix();
1430 
1431   // In general, if there is a preferred international prefix, use that.
1432   // Otherwise, for regions that have multiple international prefixes, the
1433   // international format of the number is returned since we would not know
1434   // which one to use.
1435   std::string international_prefix_for_formatting;
1436   if (metadata_calling_from->has_preferred_international_prefix()) {
1437     international_prefix_for_formatting =
1438         metadata_calling_from->preferred_international_prefix();
1439   } else if (reg_exps_->single_international_prefix_->FullMatch(
1440                  international_prefix)) {
1441     international_prefix_for_formatting = international_prefix;
1442   }
1443 
1444   string region_code;
1445   GetRegionCodeForCountryCode(country_code, &region_code);
1446   // Metadata cannot be NULL because the country_code is valid.
1447   const PhoneMetadata* metadata_for_region =
1448       GetMetadataForRegionOrCallingCode(country_code, region_code);
1449   FormatNsn(national_significant_number, *metadata_for_region, INTERNATIONAL,
1450             formatted_number);
1451   MaybeAppendFormattedExtension(number, *metadata_for_region, INTERNATIONAL,
1452                                 formatted_number);
1453   if (!international_prefix_for_formatting.empty()) {
1454     formatted_number->insert(
1455         0, StrCat(international_prefix_for_formatting, " ", country_code, " "));
1456   } else {
1457     PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL,
1458                                        formatted_number);
1459   }
1460 }
1461 
FormatInOriginalFormat(const PhoneNumber & number,const string & region_calling_from,string * formatted_number) const1462 void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number,
1463                                              const string& region_calling_from,
1464                                              string* formatted_number) const {
1465   DCHECK(formatted_number);
1466 
1467   if (number.has_raw_input() && !HasFormattingPatternForNumber(number)) {
1468     // We check if we have the formatting pattern because without that, we might
1469     // format the number as a group without national prefix.
1470     formatted_number->assign(number.raw_input());
1471     return;
1472   }
1473   if (!number.has_country_code_source()) {
1474     Format(number, NATIONAL, formatted_number);
1475     return;
1476   }
1477   switch (number.country_code_source()) {
1478     case PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN:
1479       Format(number, INTERNATIONAL, formatted_number);
1480       break;
1481     case PhoneNumber::FROM_NUMBER_WITH_IDD:
1482       FormatOutOfCountryCallingNumber(number, region_calling_from,
1483                                       formatted_number);
1484       break;
1485     case PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN:
1486       Format(number, INTERNATIONAL, formatted_number);
1487       formatted_number->erase(formatted_number->begin());
1488       break;
1489     case PhoneNumber::FROM_DEFAULT_COUNTRY:
1490       // Fall-through to default case.
1491     default:
1492       string region_code;
1493       GetRegionCodeForCountryCode(number.country_code(), &region_code);
1494       // We strip non-digits from the NDD here, and from the raw input later, so
1495       // that we can compare them easily.
1496       string national_prefix;
1497       GetNddPrefixForRegion(region_code, true /* strip non-digits */,
1498                             &national_prefix);
1499       if (national_prefix.empty()) {
1500         // If the region doesn't have a national prefix at all, we can safely
1501         // return the national format without worrying about a national prefix
1502         // being added.
1503         Format(number, NATIONAL, formatted_number);
1504         break;
1505       }
1506       // Otherwise, we check if the original number was entered with a national
1507       // prefix.
1508       if (RawInputContainsNationalPrefix(number.raw_input(), national_prefix,
1509                                          region_code)) {
1510         // If so, we can safely return the national format.
1511         Format(number, NATIONAL, formatted_number);
1512         break;
1513       }
1514       // Metadata cannot be NULL here because GetNddPrefixForRegion() (above)
1515       // leaves the prefix empty if there is no metadata for the region.
1516       const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
1517       string national_number;
1518       GetNationalSignificantNumber(number, &national_number);
1519       // This shouldn't be NULL, because we have checked that above with
1520       // HasFormattingPatternForNumber.
1521       const NumberFormat* format_rule =
1522           ChooseFormattingPatternForNumber(metadata->number_format(),
1523                                            national_number);
1524       // The format rule could still be NULL here if the national number was 0
1525       // and there was no raw input (this should not be possible for numbers
1526       // generated by the phonenumber library as they would also not have a
1527       // country calling code and we would have exited earlier).
1528       if (!format_rule) {
1529         Format(number, NATIONAL, formatted_number);
1530         break;
1531       }
1532       // When the format we apply to this number doesn't contain national
1533       // prefix, we can just return the national format.
1534       // TODO: Refactor the code below with the code in
1535       // IsNationalPrefixPresentIfRequired.
1536       string candidate_national_prefix_rule(
1537           format_rule->national_prefix_formatting_rule());
1538       // We assume that the first-group symbol will never be _before_ the
1539       // national prefix.
1540       if (!candidate_national_prefix_rule.empty()) {
1541         size_t index_of_first_group = candidate_national_prefix_rule.find("$1");
1542         if (index_of_first_group == string::npos) {
1543           LOG(ERROR) << "First group missing in national prefix rule: "
1544               << candidate_national_prefix_rule;
1545           Format(number, NATIONAL, formatted_number);
1546           break;
1547         }
1548         candidate_national_prefix_rule.erase(index_of_first_group);
1549         NormalizeDigitsOnly(&candidate_national_prefix_rule);
1550       }
1551       if (candidate_national_prefix_rule.empty()) {
1552         // National prefix not used when formatting this number.
1553         Format(number, NATIONAL, formatted_number);
1554         break;
1555       }
1556       // Otherwise, we need to remove the national prefix from our output.
1557       RepeatedPtrField<NumberFormat> number_formats;
1558       NumberFormat* number_format = number_formats.Add();
1559       number_format->MergeFrom(*format_rule);
1560       number_format->clear_national_prefix_formatting_rule();
1561       FormatByPattern(number, NATIONAL, number_formats, formatted_number);
1562       break;
1563   }
1564   // If no digit is inserted/removed/modified as a result of our formatting, we
1565   // return the formatted phone number; otherwise we return the raw input the
1566   // user entered.
1567   if (!formatted_number->empty() && !number.raw_input().empty()) {
1568     string normalized_formatted_number(*formatted_number);
1569     NormalizeDiallableCharsOnly(&normalized_formatted_number);
1570     string normalized_raw_input(number.raw_input());
1571     NormalizeDiallableCharsOnly(&normalized_raw_input);
1572     if (normalized_formatted_number != normalized_raw_input) {
1573       formatted_number->assign(number.raw_input());
1574     }
1575   }
1576 }
1577 
1578 // Check if raw_input, which is assumed to be in the national format, has a
1579 // national prefix. The national prefix is assumed to be in digits-only form.
RawInputContainsNationalPrefix(const string & raw_input,const string & national_prefix,const string & region_code) const1580 bool PhoneNumberUtil::RawInputContainsNationalPrefix(
1581     const string& raw_input,
1582     const string& national_prefix,
1583     const string& region_code) const {
1584   string normalized_national_number(raw_input);
1585   NormalizeDigitsOnly(&normalized_national_number);
1586   if (HasPrefixString(normalized_national_number, national_prefix)) {
1587     // Some Japanese numbers (e.g. 00777123) might be mistaken to contain
1588     // the national prefix when written without it (e.g. 0777123) if we just
1589     // do prefix matching. To tackle that, we check the validity of the
1590     // number if the assumed national prefix is removed (777123 won't be
1591     // valid in Japan).
1592     PhoneNumber number_without_national_prefix;
1593     if (Parse(normalized_national_number.substr(national_prefix.length()),
1594               region_code, &number_without_national_prefix)
1595         == NO_PARSING_ERROR) {
1596       return IsValidNumber(number_without_national_prefix);
1597     }
1598   }
1599   return false;
1600 }
1601 
HasFormattingPatternForNumber(const PhoneNumber & number) const1602 bool PhoneNumberUtil::HasFormattingPatternForNumber(
1603     const PhoneNumber& number) const {
1604   int country_calling_code = number.country_code();
1605   string region_code;
1606   GetRegionCodeForCountryCode(country_calling_code, &region_code);
1607   const PhoneMetadata* metadata =
1608       GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
1609   if (!metadata) {
1610     return false;
1611   }
1612   string national_number;
1613   GetNationalSignificantNumber(number, &national_number);
1614   const NumberFormat* format_rule =
1615       ChooseFormattingPatternForNumber(metadata->number_format(),
1616                                        national_number);
1617   return format_rule;
1618 }
1619 
FormatOutOfCountryKeepingAlphaChars(const PhoneNumber & number,const string & calling_from,string * formatted_number) const1620 void PhoneNumberUtil::FormatOutOfCountryKeepingAlphaChars(
1621     const PhoneNumber& number,
1622     const string& calling_from,
1623     string* formatted_number) const {
1624   // If there is no raw input, then we can't keep alpha characters because there
1625   // aren't any. In this case, we return FormatOutOfCountryCallingNumber.
1626   if (number.raw_input().empty()) {
1627     FormatOutOfCountryCallingNumber(number, calling_from, formatted_number);
1628     return;
1629   }
1630   int country_code = number.country_code();
1631   if (!HasValidCountryCallingCode(country_code)) {
1632     formatted_number->assign(number.raw_input());
1633     return;
1634   }
1635   // Strip any prefix such as country calling code, IDD, that was present. We do
1636   // this by comparing the number in raw_input with the parsed number.
1637   string raw_input_copy(number.raw_input());
1638   // Normalize punctuation. We retain number grouping symbols such as " " only.
1639   NormalizeHelper(reg_exps_->all_plus_number_grouping_symbols_, true,
1640                   &raw_input_copy);
1641   // Now we trim everything before the first three digits in the parsed number.
1642   // We choose three because all valid alpha numbers have 3 digits at the start
1643   // - if it does not, then we don't trim anything at all. Similarly, if the
1644   // national number was less than three digits, we don't trim anything at all.
1645   string national_number;
1646   GetNationalSignificantNumber(number, &national_number);
1647   if (national_number.length() > 3) {
1648     size_t first_national_number_digit =
1649         raw_input_copy.find(national_number.substr(0, 3));
1650     if (first_national_number_digit != string::npos) {
1651       raw_input_copy = raw_input_copy.substr(first_national_number_digit);
1652     }
1653   }
1654   const PhoneMetadata* metadata = GetMetadataForRegion(calling_from);
1655   if (country_code == kNanpaCountryCode) {
1656     if (IsNANPACountry(calling_from)) {
1657       StrAppend(formatted_number, country_code, " ", raw_input_copy);
1658       return;
1659     }
1660   } else if (metadata &&
1661              country_code == GetCountryCodeForValidRegion(calling_from)) {
1662     const NumberFormat* formatting_pattern =
1663         ChooseFormattingPatternForNumber(metadata->number_format(),
1664                                          national_number);
1665     if (!formatting_pattern) {
1666       // If no pattern above is matched, we format the original input.
1667       formatted_number->assign(raw_input_copy);
1668       return;
1669     }
1670     NumberFormat new_format;
1671     new_format.MergeFrom(*formatting_pattern);
1672     // The first group is the first group of digits that the user wrote
1673     // together.
1674     new_format.set_pattern("(\\d+)(.*)");
1675     // Here we just concatenate them back together after the national prefix
1676     // has been fixed.
1677     new_format.set_format("$1$2");
1678     // Now we format using this pattern instead of the default pattern, but
1679     // with the national prefix prefixed if necessary.
1680     // This will not work in the cases where the pattern (and not the
1681     // leading digits) decide whether a national prefix needs to be used, since
1682     // we have overridden the pattern to match anything, but that is not the
1683     // case in the metadata to date.
1684     FormatNsnUsingPattern(raw_input_copy, new_format, NATIONAL,
1685                           formatted_number);
1686     return;
1687   }
1688 
1689   string international_prefix_for_formatting;
1690   // If an unsupported region-calling-from is entered, or a country with
1691   // multiple international prefixes, the international format of the number is
1692   // returned, unless there is a preferred international prefix.
1693   if (metadata) {
1694     const string& international_prefix = metadata->international_prefix();
1695     international_prefix_for_formatting =
1696         reg_exps_->single_international_prefix_->FullMatch(international_prefix)
1697         ? international_prefix
1698         : metadata->preferred_international_prefix();
1699   }
1700   if (!international_prefix_for_formatting.empty()) {
1701     StrAppend(formatted_number, international_prefix_for_formatting, " ",
1702               country_code, " ", raw_input_copy);
1703   } else {
1704     // Invalid region entered as country-calling-from (so no metadata was found
1705     // for it) or the region chosen has multiple international dialling
1706     // prefixes.
1707     if (!IsValidRegionCode(calling_from)) {
1708       VLOG(1) << "Trying to format number from invalid region " << calling_from
1709               << ". International formatting applied.";
1710     }
1711     formatted_number->assign(raw_input_copy);
1712     PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL,
1713                                        formatted_number);
1714   }
1715 }
1716 
ChooseFormattingPatternForNumber(const RepeatedPtrField<NumberFormat> & available_formats,const string & national_number) const1717 const NumberFormat* PhoneNumberUtil::ChooseFormattingPatternForNumber(
1718     const RepeatedPtrField<NumberFormat>& available_formats,
1719     const string& national_number) const {
1720   for (RepeatedPtrField<NumberFormat>::const_iterator
1721        it = available_formats.begin(); it != available_formats.end(); ++it) {
1722     int size = it->leading_digits_pattern_size();
1723     if (size > 0) {
1724       const scoped_ptr<RegExpInput> number_copy(
1725           reg_exps_->regexp_factory_->CreateInput(national_number));
1726       // We always use the last leading_digits_pattern, as it is the most
1727       // detailed.
1728       if (!reg_exps_->regexp_cache_->GetRegExp(
1729               it->leading_digits_pattern(size - 1)).Consume(
1730                   number_copy.get())) {
1731         continue;
1732       }
1733     }
1734     const RegExp& pattern_to_match(
1735         reg_exps_->regexp_cache_->GetRegExp(it->pattern()));
1736     if (pattern_to_match.FullMatch(national_number)) {
1737       return &(*it);
1738     }
1739   }
1740   return NULL;
1741 }
1742 
1743 // Note that carrier_code is optional - if an empty string, no carrier code
1744 // replacement will take place.
FormatNsnUsingPatternWithCarrier(const string & national_number,const NumberFormat & formatting_pattern,PhoneNumberUtil::PhoneNumberFormat number_format,const string & carrier_code,string * formatted_number) const1745 void PhoneNumberUtil::FormatNsnUsingPatternWithCarrier(
1746     const string& national_number,
1747     const NumberFormat& formatting_pattern,
1748     PhoneNumberUtil::PhoneNumberFormat number_format,
1749     const string& carrier_code,
1750     string* formatted_number) const {
1751   DCHECK(formatted_number);
1752   string number_format_rule(formatting_pattern.format());
1753   if (number_format == PhoneNumberUtil::NATIONAL &&
1754       carrier_code.length() > 0 &&
1755       formatting_pattern.domestic_carrier_code_formatting_rule().length() > 0) {
1756     // Replace the $CC in the formatting rule with the desired carrier code.
1757     string carrier_code_formatting_rule =
1758         formatting_pattern.domestic_carrier_code_formatting_rule();
1759     reg_exps_->carrier_code_pattern_->Replace(&carrier_code_formatting_rule,
1760                                               carrier_code);
1761     reg_exps_->first_group_capturing_pattern_->
1762         Replace(&number_format_rule, carrier_code_formatting_rule);
1763   } else {
1764     // Use the national prefix formatting rule instead.
1765     string national_prefix_formatting_rule =
1766         formatting_pattern.national_prefix_formatting_rule();
1767     if (number_format == PhoneNumberUtil::NATIONAL &&
1768         national_prefix_formatting_rule.length() > 0) {
1769       // Apply the national_prefix_formatting_rule as the formatting_pattern
1770       // contains only information on how the national significant number
1771       // should be formatted at this point.
1772       reg_exps_->first_group_capturing_pattern_->Replace(
1773           &number_format_rule, national_prefix_formatting_rule);
1774     }
1775   }
1776   formatted_number->assign(national_number);
1777 
1778   const RegExp& pattern_to_match(
1779       reg_exps_->regexp_cache_->GetRegExp(formatting_pattern.pattern()));
1780   pattern_to_match.GlobalReplace(formatted_number, number_format_rule);
1781 
1782   if (number_format == RFC3966) {
1783     // First consume any leading punctuation, if any was present.
1784     const scoped_ptr<RegExpInput> number(
1785         reg_exps_->regexp_factory_->CreateInput(*formatted_number));
1786     if (reg_exps_->separator_pattern_->Consume(number.get())) {
1787       formatted_number->assign(number->ToString());
1788     }
1789     // Then replace all separators with a "-".
1790     reg_exps_->separator_pattern_->GlobalReplace(formatted_number, "-");
1791   }
1792 }
1793 
1794 // Simple wrapper of FormatNsnUsingPatternWithCarrier for the common case of
1795 // no carrier code.
FormatNsnUsingPattern(const string & national_number,const NumberFormat & formatting_pattern,PhoneNumberUtil::PhoneNumberFormat number_format,string * formatted_number) const1796 void PhoneNumberUtil::FormatNsnUsingPattern(
1797     const string& national_number,
1798     const NumberFormat& formatting_pattern,
1799     PhoneNumberUtil::PhoneNumberFormat number_format,
1800     string* formatted_number) const {
1801   DCHECK(formatted_number);
1802   FormatNsnUsingPatternWithCarrier(national_number, formatting_pattern,
1803                                    number_format, "", formatted_number);
1804 }
1805 
FormatNsn(const string & number,const PhoneMetadata & metadata,PhoneNumberFormat number_format,string * formatted_number) const1806 void PhoneNumberUtil::FormatNsn(const string& number,
1807                                 const PhoneMetadata& metadata,
1808                                 PhoneNumberFormat number_format,
1809                                 string* formatted_number) const {
1810   DCHECK(formatted_number);
1811   FormatNsnWithCarrier(number, metadata, number_format, "", formatted_number);
1812 }
1813 
1814 // Note in some regions, the national number can be written in two completely
1815 // different ways depending on whether it forms part of the NATIONAL format or
1816 // INTERNATIONAL format. The number_format parameter here is used to specify
1817 // which format to use for those cases. If a carrier_code is specified, this
1818 // will be inserted into the formatted string to replace $CC.
FormatNsnWithCarrier(const string & number,const PhoneMetadata & metadata,PhoneNumberFormat number_format,const string & carrier_code,string * formatted_number) const1819 void PhoneNumberUtil::FormatNsnWithCarrier(const string& number,
1820                                            const PhoneMetadata& metadata,
1821                                            PhoneNumberFormat number_format,
1822                                            const string& carrier_code,
1823                                            string* formatted_number) const {
1824   DCHECK(formatted_number);
1825   // When the intl_number_formats exists, we use that to format national number
1826   // for the INTERNATIONAL format instead of using the number_formats.
1827   const RepeatedPtrField<NumberFormat> available_formats =
1828       (metadata.intl_number_format_size() == 0 || number_format == NATIONAL)
1829       ? metadata.number_format()
1830       : metadata.intl_number_format();
1831   const NumberFormat* formatting_pattern =
1832       ChooseFormattingPatternForNumber(available_formats, number);
1833   if (!formatting_pattern) {
1834     formatted_number->assign(number);
1835   } else {
1836     FormatNsnUsingPatternWithCarrier(number, *formatting_pattern, number_format,
1837                                      carrier_code, formatted_number);
1838   }
1839 }
1840 
1841 // Appends the formatted extension of a phone number, if the phone number had an
1842 // extension specified.
MaybeAppendFormattedExtension(const PhoneNumber & number,const PhoneMetadata & metadata,PhoneNumberFormat number_format,string * formatted_number) const1843 void PhoneNumberUtil::MaybeAppendFormattedExtension(
1844     const PhoneNumber& number,
1845     const PhoneMetadata& metadata,
1846     PhoneNumberFormat number_format,
1847     string* formatted_number) const {
1848   DCHECK(formatted_number);
1849   if (number.has_extension() && number.extension().length() > 0) {
1850     if (number_format == RFC3966) {
1851       StrAppend(formatted_number, kRfc3966ExtnPrefix, number.extension());
1852     } else {
1853       if (metadata.has_preferred_extn_prefix()) {
1854         StrAppend(formatted_number, metadata.preferred_extn_prefix(),
1855                   number.extension());
1856       } else {
1857         StrAppend(formatted_number, kDefaultExtnPrefix, number.extension());
1858       }
1859     }
1860   }
1861 }
1862 
IsNANPACountry(const string & region_code) const1863 bool PhoneNumberUtil::IsNANPACountry(const string& region_code) const {
1864   return nanpa_regions_->find(region_code) != nanpa_regions_->end();
1865 }
1866 
1867 // Returns the region codes that matches the specific country calling code. In
1868 // the case of no region code being found, region_codes will be left empty.
GetRegionCodesForCountryCallingCode(int country_calling_code,std::list<string> * region_codes) const1869 void PhoneNumberUtil::GetRegionCodesForCountryCallingCode(
1870     int country_calling_code,
1871     std::list<string>* region_codes) const {
1872   DCHECK(region_codes);
1873   // Create a IntRegionsPair with the country_code passed in, and use it to
1874   // locate the pair with the same country_code in the sorted vector.
1875   IntRegionsPair target_pair;
1876   target_pair.first = country_calling_code;
1877   typedef std::vector<IntRegionsPair>::const_iterator ConstIterator;
1878   std::pair<ConstIterator, ConstIterator> range =
1879       std::equal_range(country_calling_code_to_region_code_map_->begin(),
1880                        country_calling_code_to_region_code_map_->end(),
1881                        target_pair, OrderByFirst());
1882   if (range.first != range.second) {
1883     region_codes->insert(region_codes->begin(),
1884                          range.first->second->begin(),
1885                          range.first->second->end());
1886   }
1887 }
1888 
1889 // Returns the region code that matches the specific country calling code. In
1890 // the case of no region code being found, the unknown region code will be
1891 // returned.
GetRegionCodeForCountryCode(int country_calling_code,string * region_code) const1892 void PhoneNumberUtil::GetRegionCodeForCountryCode(
1893     int country_calling_code,
1894     string* region_code) const {
1895   DCHECK(region_code);
1896   std::list<string> region_codes;
1897 
1898   GetRegionCodesForCountryCallingCode(country_calling_code, &region_codes);
1899   *region_code = (region_codes.size() > 0) ?
1900       region_codes.front() : RegionCode::GetUnknown();
1901 }
1902 
GetRegionCodeForNumber(const PhoneNumber & number,string * region_code) const1903 void PhoneNumberUtil::GetRegionCodeForNumber(const PhoneNumber& number,
1904                                              string* region_code) const {
1905   DCHECK(region_code);
1906   int country_calling_code = number.country_code();
1907   std::list<string> region_codes;
1908   GetRegionCodesForCountryCallingCode(country_calling_code, &region_codes);
1909   if (region_codes.size() == 0) {
1910     VLOG(1) << "Missing/invalid country calling code ("
1911             << country_calling_code << ")";
1912     *region_code = RegionCode::GetUnknown();
1913     return;
1914   }
1915   if (region_codes.size() == 1) {
1916     *region_code = region_codes.front();
1917   } else {
1918     GetRegionCodeForNumberFromRegionList(number, region_codes, region_code);
1919   }
1920 }
1921 
GetRegionCodeForNumberFromRegionList(const PhoneNumber & number,const std::list<string> & region_codes,string * region_code) const1922 void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList(
1923     const PhoneNumber& number, const std::list<string>& region_codes,
1924     string* region_code) const {
1925   DCHECK(region_code);
1926   string national_number;
1927   GetNationalSignificantNumber(number, &national_number);
1928   for (std::list<string>::const_iterator it = region_codes.begin();
1929        it != region_codes.end(); ++it) {
1930     // Metadata cannot be NULL because the region codes come from the country
1931     // calling code map.
1932     const PhoneMetadata* metadata = GetMetadataForRegion(*it);
1933     if (metadata->has_leading_digits()) {
1934       const scoped_ptr<RegExpInput> number(
1935           reg_exps_->regexp_factory_->CreateInput(national_number));
1936       if (reg_exps_->regexp_cache_->
1937               GetRegExp(metadata->leading_digits()).Consume(number.get())) {
1938         *region_code = *it;
1939         return;
1940       }
1941     } else if (GetNumberTypeHelper(national_number, *metadata) != UNKNOWN) {
1942       *region_code = *it;
1943       return;
1944     }
1945   }
1946   *region_code = RegionCode::GetUnknown();
1947 }
1948 
GetCountryCodeForRegion(const string & region_code) const1949 int PhoneNumberUtil::GetCountryCodeForRegion(const string& region_code) const {
1950   if (!IsValidRegionCode(region_code)) {
1951     LOG(WARNING) << "Invalid or unknown region code (" << region_code
1952                  << ") provided.";
1953     return 0;
1954   }
1955   return GetCountryCodeForValidRegion(region_code);
1956 }
1957 
GetCountryCodeForValidRegion(const string & region_code) const1958 int PhoneNumberUtil::GetCountryCodeForValidRegion(
1959     const string& region_code) const {
1960   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
1961   return metadata->country_code();
1962 }
1963 
1964 // Gets a valid fixed-line number for the specified region_code. Returns false
1965 // if the region was unknown or 001 (representing non-geographical regions), or
1966 // if no number exists.
GetExampleNumber(const string & region_code,PhoneNumber * number) const1967 bool PhoneNumberUtil::GetExampleNumber(const string& region_code,
1968                                        PhoneNumber* number) const {
1969   DCHECK(number);
1970   return GetExampleNumberForType(region_code, FIXED_LINE, number);
1971 }
1972 
GetInvalidExampleNumber(const string & region_code,PhoneNumber * number) const1973 bool PhoneNumberUtil::GetInvalidExampleNumber(const string& region_code,
1974                                               PhoneNumber* number) const {
1975   DCHECK(number);
1976   if (!IsValidRegionCode(region_code)) {
1977     LOG(WARNING) << "Invalid or unknown region code (" << region_code
1978                  << ") provided.";
1979     return false;
1980   }
1981   // We start off with a valid fixed-line number since every country supports
1982   // this. Alternatively we could start with a different number type, since
1983   // fixed-line numbers typically have a wide breadth of valid number lengths
1984   // and we may have to make it very short before we get an invalid number.
1985   const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code);
1986   const PhoneNumberDesc* desc =
1987       GetNumberDescByType(*region_metadata, FIXED_LINE);
1988   if (!desc->has_example_number()) {
1989     // This shouldn't happen - we have a test for this.
1990     return false;
1991   }
1992   const string& example_number = desc->example_number();
1993   // Try and make the number invalid. We do this by changing the length. We try
1994   // reducing the length of the number, since currently no region has a number
1995   // that is the same length as kMinLengthForNsn. This is probably quicker than
1996   // making the number longer, which is another alternative. We could also use
1997   // the possible number pattern to extract the possible lengths of the number
1998   // to make this faster, but this method is only for unit-testing so simplicity
1999   // is preferred to performance.
2000   // We don't want to return a number that can't be parsed, so we check the
2001   // number is long enough. We try all possible lengths because phone number
2002   // plans often have overlapping prefixes so the number 123456 might be valid
2003   // as a fixed-line number, and 12345 as a mobile number. It would be faster to
2004   // loop in a different order, but we prefer numbers that look closer to real
2005   // numbers (and it gives us a variety of different lengths for the resulting
2006   // phone numbers - otherwise they would all be kMinLengthForNsn digits long.)
2007   for (size_t phone_number_length = example_number.length() - 1;
2008        phone_number_length >= kMinLengthForNsn;
2009        phone_number_length--) {
2010     string number_to_try = example_number.substr(0, phone_number_length);
2011     PhoneNumber possibly_valid_number;
2012     Parse(number_to_try, region_code, &possibly_valid_number);
2013     // We don't check the return value since we have already checked the
2014     // length, we know example numbers have only valid digits, and we know the
2015     // region code is fine.
2016     if (!IsValidNumber(possibly_valid_number)) {
2017       number->MergeFrom(possibly_valid_number);
2018       return true;
2019     }
2020   }
2021   // We have a test to check that this doesn't happen for any of our supported
2022   // regions.
2023   return false;
2024 }
2025 
2026 // Gets a valid number for the specified region_code and type.  Returns false if
2027 // the country was unknown or 001 (representing non-geographical regions), or if
2028 // no number exists.
GetExampleNumberForType(const string & region_code,PhoneNumberUtil::PhoneNumberType type,PhoneNumber * number) const2029 bool PhoneNumberUtil::GetExampleNumberForType(
2030     const string& region_code,
2031     PhoneNumberUtil::PhoneNumberType type,
2032     PhoneNumber* number) const {
2033   DCHECK(number);
2034   if (!IsValidRegionCode(region_code)) {
2035     LOG(WARNING) << "Invalid or unknown region code (" << region_code
2036                  << ") provided.";
2037     return false;
2038   }
2039   const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code);
2040   const PhoneNumberDesc* desc = GetNumberDescByType(*region_metadata, type);
2041   if (desc && desc->has_example_number()) {
2042     ErrorType success = Parse(desc->example_number(), region_code, number);
2043     if (success == NO_PARSING_ERROR) {
2044       return true;
2045     } else {
2046       LOG(ERROR) << "Error parsing example number ("
2047                  << static_cast<int>(success) << ")";
2048     }
2049   }
2050   return false;
2051 }
2052 
GetExampleNumberForType(PhoneNumberUtil::PhoneNumberType type,PhoneNumber * number) const2053 bool PhoneNumberUtil::GetExampleNumberForType(
2054     PhoneNumberUtil::PhoneNumberType type,
2055     PhoneNumber* number) const {
2056   DCHECK(number);
2057   std::set<string> regions;
2058   GetSupportedRegions(&regions);
2059   for (const string& region_code : regions) {
2060     if (GetExampleNumberForType(region_code, type, number)) {
2061       return true;
2062     }
2063   }
2064   // If there wasn't an example number for a region, try the non-geographical
2065   // entities.
2066   std::set<int> global_network_calling_codes;
2067   GetSupportedGlobalNetworkCallingCodes(&global_network_calling_codes);
2068   for (std::set<int>::const_iterator it = global_network_calling_codes.begin();
2069        it != global_network_calling_codes.end(); ++it) {
2070     int country_calling_code = *it;
2071     const PhoneMetadata* metadata =
2072         GetMetadataForNonGeographicalRegion(country_calling_code);
2073     const PhoneNumberDesc* desc = GetNumberDescByType(*metadata, type);
2074     if (desc->has_example_number()) {
2075       ErrorType success = Parse(StrCat(kPlusSign,
2076                                        country_calling_code,
2077                                        desc->example_number()),
2078                                 RegionCode::GetUnknown(), number);
2079       if (success == NO_PARSING_ERROR) {
2080         return true;
2081       } else {
2082         LOG(ERROR) << "Error parsing example number ("
2083                    << static_cast<int>(success) << ")";
2084       }
2085     }
2086   }
2087   // There are no example numbers of this type for any country in the library.
2088   return false;
2089 }
2090 
GetExampleNumberForNonGeoEntity(int country_calling_code,PhoneNumber * number) const2091 bool PhoneNumberUtil::GetExampleNumberForNonGeoEntity(
2092     int country_calling_code, PhoneNumber* number) const {
2093   DCHECK(number);
2094   const PhoneMetadata* metadata =
2095       GetMetadataForNonGeographicalRegion(country_calling_code);
2096   if (metadata) {
2097     // For geographical entities, fixed-line data is always present. However,
2098     // for non-geographical entities, this is not the case, so we have to go
2099     // through different types to find the example number. We don't check
2100     // fixed-line or personal number since they aren't used by non-geographical
2101     // entities (if this changes, a unit-test will catch this.)
2102     const int kNumberTypes = 7;
2103     PhoneNumberDesc types[kNumberTypes] = {
2104         metadata->mobile(), metadata->toll_free(), metadata->shared_cost(),
2105         metadata->voip(), metadata->voicemail(), metadata->uan(),
2106         metadata->premium_rate()};
2107     for (int i = 0; i < kNumberTypes; ++i) {
2108       if (types[i].has_example_number()) {
2109         ErrorType success = Parse(StrCat(kPlusSign,
2110                                          SimpleItoa(country_calling_code),
2111                                          types[i].example_number()),
2112                                   RegionCode::GetUnknown(), number);
2113         if (success == NO_PARSING_ERROR) {
2114           return true;
2115         } else {
2116           LOG(ERROR) << "Error parsing example number ("
2117                      << static_cast<int>(success) << ")";
2118         }
2119       }
2120     }
2121   } else {
2122     LOG(WARNING) << "Invalid or unknown country calling code provided: "
2123                  << country_calling_code;
2124   }
2125   return false;
2126 }
2127 
Parse(const string & number_to_parse,const string & default_region,PhoneNumber * number) const2128 PhoneNumberUtil::ErrorType PhoneNumberUtil::Parse(const string& number_to_parse,
2129                                                   const string& default_region,
2130                                                   PhoneNumber* number) const {
2131   DCHECK(number);
2132   return ParseHelper(number_to_parse, default_region, false, true, number);
2133 }
2134 
ParseAndKeepRawInput(const string & number_to_parse,const string & default_region,PhoneNumber * number) const2135 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseAndKeepRawInput(
2136     const string& number_to_parse,
2137     const string& default_region,
2138     PhoneNumber* number) const {
2139   DCHECK(number);
2140   return ParseHelper(number_to_parse, default_region, true, true, number);
2141 }
2142 
2143 // Checks to see that the region code used is valid, or if it is not valid, that
2144 // the number to parse starts with a + symbol so that we can attempt to infer
2145 // the country from the number. Returns false if it cannot use the region
2146 // provided and the region cannot be inferred.
CheckRegionForParsing(const string & number_to_parse,const string & default_region) const2147 bool PhoneNumberUtil::CheckRegionForParsing(
2148     const string& number_to_parse,
2149     const string& default_region) const {
2150   if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
2151     const scoped_ptr<RegExpInput> number(
2152         reg_exps_->regexp_factory_->CreateInput(number_to_parse));
2153     if (!reg_exps_->plus_chars_pattern_->Consume(number.get())) {
2154       return false;
2155     }
2156   }
2157   return true;
2158 }
2159 
2160 // Converts number_to_parse to a form that we can parse and write it to
2161 // national_number if it is written in RFC3966; otherwise extract a possible
2162 // number out of it and write to national_number.
BuildNationalNumberForParsing(const string & number_to_parse,string * national_number) const2163 void PhoneNumberUtil::BuildNationalNumberForParsing(
2164     const string& number_to_parse, string* national_number) const {
2165   size_t index_of_phone_context = number_to_parse.find(kRfc3966PhoneContext);
2166   if (index_of_phone_context != string::npos) {
2167     size_t phone_context_start =
2168         index_of_phone_context + strlen(kRfc3966PhoneContext);
2169     // If the phone context contains a phone number prefix, we need to capture
2170     // it, whereas domains will be ignored.
2171     if (phone_context_start < (number_to_parse.length() - 1) &&
2172         number_to_parse.at(phone_context_start) == kPlusSign[0]) {
2173       // Additional parameters might follow the phone context. If so, we will
2174       // remove them here because the parameters after phone context are not
2175       // important for parsing the phone number.
2176       size_t phone_context_end = number_to_parse.find(';', phone_context_start);
2177       if (phone_context_end != string::npos) {
2178         StrAppend(
2179             national_number, number_to_parse.substr(
2180                 phone_context_start, phone_context_end - phone_context_start));
2181       } else {
2182         StrAppend(national_number, number_to_parse.substr(phone_context_start));
2183       }
2184     }
2185 
2186     // Now append everything between the "tel:" prefix and the phone-context.
2187     // This should include the national number, an optional extension or
2188     // isdn-subaddress component. Note we also handle the case when "tel:" is
2189     // missing, as we have seen in some of the phone number inputs. In that
2190     // case, we append everything from the beginning.
2191     size_t index_of_rfc_prefix = number_to_parse.find(kRfc3966Prefix);
2192     int index_of_national_number = (index_of_rfc_prefix != string::npos) ?
2193         static_cast<int>(index_of_rfc_prefix + strlen(kRfc3966Prefix)) : 0;
2194     StrAppend(
2195         national_number,
2196         number_to_parse.substr(
2197             index_of_national_number,
2198             index_of_phone_context - index_of_national_number));
2199   } else {
2200     // Extract a possible number from the string passed in (this strips leading
2201     // characters that could not be the start of a phone number.)
2202     ExtractPossibleNumber(number_to_parse, national_number);
2203   }
2204 
2205   // Delete the isdn-subaddress and everything after it if it is present. Note
2206   // extension won't appear at the same time with isdn-subaddress according to
2207   // paragraph 5.3 of the RFC3966 spec.
2208   size_t index_of_isdn = national_number->find(kRfc3966IsdnSubaddress);
2209   if (index_of_isdn != string::npos) {
2210     national_number->erase(index_of_isdn);
2211   }
2212   // If both phone context and isdn-subaddress are absent but other parameters
2213   // are present, the parameters are left in nationalNumber. This is because
2214   // we are concerned about deleting content from a potential number string
2215   // when there is no strong evidence that the number is actually written in
2216   // RFC3966.
2217 }
2218 
2219 // Note if any new field is added to this method that should always be filled
2220 // in, even when keepRawInput is false, it should also be handled in the
2221 // CopyCoreFieldsOnly() method.
ParseHelper(const string & number_to_parse,const string & default_region,bool keep_raw_input,bool check_region,PhoneNumber * phone_number) const2222 PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper(
2223     const string& number_to_parse,
2224     const string& default_region,
2225     bool keep_raw_input,
2226     bool check_region,
2227     PhoneNumber* phone_number) const {
2228   DCHECK(phone_number);
2229 
2230   string national_number;
2231   BuildNationalNumberForParsing(number_to_parse, &national_number);
2232 
2233   if (!IsViablePhoneNumber(national_number)) {
2234     VLOG(2) << "The string supplied did not seem to be a phone number.";
2235     return NOT_A_NUMBER;
2236   }
2237 
2238   if (check_region &&
2239       !CheckRegionForParsing(national_number, default_region)) {
2240     VLOG(1) << "Missing or invalid default country.";
2241     return INVALID_COUNTRY_CODE_ERROR;
2242   }
2243   PhoneNumber temp_number;
2244   if (keep_raw_input) {
2245     temp_number.set_raw_input(number_to_parse);
2246   }
2247   // Attempt to parse extension first, since it doesn't require country-specific
2248   // data and we want to have the non-normalised number here.
2249   string extension;
2250   MaybeStripExtension(&national_number, &extension);
2251   if (!extension.empty()) {
2252     temp_number.set_extension(extension);
2253   }
2254   const PhoneMetadata* country_metadata = GetMetadataForRegion(default_region);
2255   // Check to see if the number is given in international format so we know
2256   // whether this number is from the default country or not.
2257   string normalized_national_number(national_number);
2258   ErrorType country_code_error =
2259       MaybeExtractCountryCode(country_metadata, keep_raw_input,
2260                               &normalized_national_number, &temp_number);
2261   if (country_code_error != NO_PARSING_ERROR) {
2262     const scoped_ptr<RegExpInput> number_string_piece(
2263         reg_exps_->regexp_factory_->CreateInput(national_number));
2264     if ((country_code_error == INVALID_COUNTRY_CODE_ERROR) &&
2265         (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get()))) {
2266       normalized_national_number.assign(number_string_piece->ToString());
2267       // Strip the plus-char, and try again.
2268       MaybeExtractCountryCode(country_metadata,
2269                               keep_raw_input,
2270                               &normalized_national_number,
2271                               &temp_number);
2272       if (temp_number.country_code() == 0) {
2273         return INVALID_COUNTRY_CODE_ERROR;
2274       }
2275     } else {
2276       return country_code_error;
2277     }
2278   }
2279   int country_code = temp_number.country_code();
2280   if (country_code != 0) {
2281     string phone_number_region;
2282     GetRegionCodeForCountryCode(country_code, &phone_number_region);
2283     if (phone_number_region != default_region) {
2284       country_metadata =
2285           GetMetadataForRegionOrCallingCode(country_code, phone_number_region);
2286     }
2287   } else if (country_metadata) {
2288     // If no extracted country calling code, use the region supplied instead.
2289     // Note that the national number was already normalized by
2290     // MaybeExtractCountryCode.
2291     country_code = country_metadata->country_code();
2292   }
2293   if (normalized_national_number.length() < kMinLengthForNsn) {
2294     VLOG(2) << "The string supplied is too short to be a phone number.";
2295     return TOO_SHORT_NSN;
2296   }
2297   if (country_metadata) {
2298     string carrier_code;
2299     string potential_national_number(normalized_national_number);
2300     MaybeStripNationalPrefixAndCarrierCode(*country_metadata,
2301                                            &potential_national_number,
2302                                            &carrier_code);
2303     // We require that the NSN remaining after stripping the national prefix
2304     // and carrier code be long enough to be a possible length for the region.
2305     // Otherwise, we don't do the stripping, since the original number could be
2306     // a valid short number.
2307     ValidationResult validation_result =
2308         TestNumberLength(potential_national_number, *country_metadata);
2309     if (validation_result != TOO_SHORT &&
2310         validation_result != IS_POSSIBLE_LOCAL_ONLY &&
2311         validation_result != INVALID_LENGTH) {
2312       normalized_national_number.assign(potential_national_number);
2313       if (keep_raw_input && !carrier_code.empty()) {
2314         temp_number.set_preferred_domestic_carrier_code(carrier_code);
2315       }
2316     }
2317   }
2318   size_t normalized_national_number_length =
2319       normalized_national_number.length();
2320   if (normalized_national_number_length < kMinLengthForNsn) {
2321     VLOG(2) << "The string supplied is too short to be a phone number.";
2322     return TOO_SHORT_NSN;
2323   }
2324   if (normalized_national_number_length > kMaxLengthForNsn) {
2325     VLOG(2) << "The string supplied is too long to be a phone number.";
2326     return TOO_LONG_NSN;
2327   }
2328   temp_number.set_country_code(country_code);
2329   SetItalianLeadingZerosForPhoneNumber(normalized_national_number,
2330       &temp_number);
2331   uint64 number_as_int;
2332   safe_strtou64(normalized_national_number, &number_as_int);
2333   temp_number.set_national_number(number_as_int);
2334   phone_number->Swap(&temp_number);
2335   return NO_PARSING_ERROR;
2336 }
2337 
2338 // Attempts to extract a possible number from the string passed in. This
2339 // currently strips all leading characters that could not be used to start a
2340 // phone number. Characters that can be used to start a phone number are
2341 // defined in the valid_start_char_pattern. If none of these characters are
2342 // found in the number passed in, an empty string is returned. This function
2343 // also attempts to strip off any alternative extensions or endings if two or
2344 // more are present, such as in the case of: (530) 583-6985 x302/x2303. The
2345 // second extension here makes this actually two phone numbers, (530) 583-6985
2346 // x302 and (530) 583-6985 x2303. We remove the second extension so that the
2347 // first number is parsed correctly.
ExtractPossibleNumber(const string & number,string * extracted_number) const2348 void PhoneNumberUtil::ExtractPossibleNumber(const string& number,
2349                                             string* extracted_number) const {
2350   DCHECK(extracted_number);
2351 
2352   UnicodeText number_as_unicode;
2353   number_as_unicode.PointToUTF8(number.data(), static_cast<int>(number.size()));
2354   if (!number_as_unicode.UTF8WasValid()) {
2355     // The input wasn't valid UTF-8. Produce an empty string to indicate an error.
2356     extracted_number->clear();
2357     return;
2358   }
2359   char current_char[5];
2360   int len;
2361   UnicodeText::const_iterator it;
2362   for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) {
2363     len = it.get_utf8(current_char);
2364     current_char[len] = '\0';
2365     if (reg_exps_->valid_start_char_pattern_->FullMatch(current_char)) {
2366       break;
2367     }
2368   }
2369 
2370   if (it == number_as_unicode.end()) {
2371     // No valid start character was found. extracted_number should be set to
2372     // empty string.
2373     extracted_number->clear();
2374     return;
2375   }
2376 
2377   extracted_number->assign(
2378       UnicodeText::UTF8Substring(it, number_as_unicode.end()));
2379   TrimUnwantedEndChars(extracted_number);
2380   if (extracted_number->length() == 0) {
2381     return;
2382   }
2383 
2384   // Now remove any extra numbers at the end.
2385   reg_exps_->capture_up_to_second_number_start_pattern_->
2386       PartialMatch(*extracted_number, extracted_number);
2387 }
2388 
IsPossibleNumber(const PhoneNumber & number) const2389 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const {
2390   ValidationResult result = IsPossibleNumberWithReason(number);
2391   return result == IS_POSSIBLE || result == IS_POSSIBLE_LOCAL_ONLY;
2392 }
2393 
IsPossibleNumberForType(const PhoneNumber & number,const PhoneNumberType type) const2394 bool PhoneNumberUtil::IsPossibleNumberForType(
2395     const PhoneNumber& number, const PhoneNumberType type) const {
2396   ValidationResult result = IsPossibleNumberForTypeWithReason(number, type);
2397   return result == IS_POSSIBLE || result == IS_POSSIBLE_LOCAL_ONLY;
2398 }
2399 
IsPossibleNumberForString(const string & number,const string & region_dialing_from) const2400 bool PhoneNumberUtil::IsPossibleNumberForString(
2401     const string& number,
2402     const string& region_dialing_from) const {
2403   PhoneNumber number_proto;
2404   if (Parse(number, region_dialing_from, &number_proto) == NO_PARSING_ERROR) {
2405     return IsPossibleNumber(number_proto);
2406   } else {
2407     return false;
2408   }
2409 }
2410 
IsPossibleNumberWithReason(const PhoneNumber & number) const2411 PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason(
2412     const PhoneNumber& number) const {
2413   return IsPossibleNumberForTypeWithReason(number, PhoneNumberUtil::UNKNOWN);
2414 }
2415 
2416 PhoneNumberUtil::ValidationResult
IsPossibleNumberForTypeWithReason(const PhoneNumber & number,PhoneNumberType type) const2417 PhoneNumberUtil::IsPossibleNumberForTypeWithReason(const PhoneNumber& number,
2418                                                    PhoneNumberType type) const {
2419   string national_number;
2420   GetNationalSignificantNumber(number, &national_number);
2421   int country_code = number.country_code();
2422   // Note: For regions that share a country calling code, like NANPA numbers, we
2423   // just use the rules from the default region (US in this case) since the
2424   // GetRegionCodeForNumber will not work if the number is possible but not
2425   // valid. There is in fact one country calling code (290) where the possible
2426   // number pattern differs between various regions (Saint Helena and Tristan da
2427   // Cuñha), but this is handled by putting all possible lengths for any country
2428   // with this country calling code in the metadata for the default region in
2429   // this case.
2430   if (!HasValidCountryCallingCode(country_code)) {
2431     return INVALID_COUNTRY_CODE;
2432   }
2433   string region_code;
2434   GetRegionCodeForCountryCode(country_code, &region_code);
2435   // Metadata cannot be NULL because the country calling code is valid.
2436   const PhoneMetadata* metadata =
2437       GetMetadataForRegionOrCallingCode(country_code, region_code);
2438   return TestNumberLength(national_number, *metadata, type);
2439 }
2440 
TruncateTooLongNumber(PhoneNumber * number) const2441 bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const {
2442   if (IsValidNumber(*number)) {
2443     return true;
2444   }
2445   PhoneNumber number_copy(*number);
2446   uint64 national_number = number->national_number();
2447   do {
2448     national_number /= 10;
2449     number_copy.set_national_number(national_number);
2450     if (IsPossibleNumberWithReason(number_copy) == TOO_SHORT ||
2451         national_number == 0) {
2452       return false;
2453     }
2454   } while (!IsValidNumber(number_copy));
2455   number->set_national_number(national_number);
2456   return true;
2457 }
2458 
GetNumberType(const PhoneNumber & number) const2459 PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberType(
2460     const PhoneNumber& number) const {
2461   string region_code;
2462   GetRegionCodeForNumber(number, &region_code);
2463   const PhoneMetadata* metadata =
2464       GetMetadataForRegionOrCallingCode(number.country_code(), region_code);
2465   if (!metadata) {
2466     return UNKNOWN;
2467   }
2468   string national_significant_number;
2469   GetNationalSignificantNumber(number, &national_significant_number);
2470   return GetNumberTypeHelper(national_significant_number, *metadata);
2471 }
2472 
IsValidNumber(const PhoneNumber & number) const2473 bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const {
2474   string region_code;
2475   GetRegionCodeForNumber(number, &region_code);
2476   return IsValidNumberForRegion(number, region_code);
2477 }
2478 
IsValidNumberForRegion(const PhoneNumber & number,const string & region_code) const2479 bool PhoneNumberUtil::IsValidNumberForRegion(const PhoneNumber& number,
2480                                              const string& region_code) const {
2481   int country_code = number.country_code();
2482   const PhoneMetadata* metadata =
2483       GetMetadataForRegionOrCallingCode(country_code, region_code);
2484   if (!metadata ||
2485       ((kRegionCodeForNonGeoEntity != region_code) &&
2486        country_code != GetCountryCodeForValidRegion(region_code))) {
2487     // Either the region code was invalid, or the country calling code for this
2488     // number does not match that of the region code.
2489     return false;
2490   }
2491   string national_number;
2492   GetNationalSignificantNumber(number, &national_number);
2493 
2494   return GetNumberTypeHelper(national_number, *metadata) != UNKNOWN;
2495 }
2496 
IsNumberGeographical(const PhoneNumber & phone_number) const2497 bool PhoneNumberUtil::IsNumberGeographical(
2498     const PhoneNumber& phone_number) const {
2499   return IsNumberGeographical(GetNumberType(phone_number),
2500                               phone_number.country_code());
2501 }
2502 
IsNumberGeographical(PhoneNumberType phone_number_type,int country_calling_code) const2503 bool PhoneNumberUtil::IsNumberGeographical(
2504     PhoneNumberType phone_number_type, int country_calling_code) const {
2505   return phone_number_type == PhoneNumberUtil::FIXED_LINE ||
2506       phone_number_type == PhoneNumberUtil::FIXED_LINE_OR_MOBILE ||
2507       (reg_exps_->geo_mobile_countries_.find(country_calling_code)
2508            != reg_exps_->geo_mobile_countries_.end() &&
2509        phone_number_type == PhoneNumberUtil::MOBILE);
2510 }
2511 
2512 // A helper function to set the values related to leading zeros in a
2513 // PhoneNumber.
SetItalianLeadingZerosForPhoneNumber(const string & national_number,PhoneNumber * phone_number) const2514 void PhoneNumberUtil::SetItalianLeadingZerosForPhoneNumber(
2515     const string& national_number, PhoneNumber* phone_number) const {
2516   if (national_number.length() > 1 && national_number[0] == '0') {
2517     phone_number->set_italian_leading_zero(true);
2518     size_t number_of_leading_zeros = 1;
2519     // Note that if the national number is all "0"s, the last "0" is not
2520     // counted as a leading zero.
2521     while (number_of_leading_zeros < national_number.length() - 1 &&
2522         national_number[number_of_leading_zeros] == '0') {
2523       number_of_leading_zeros++;
2524     }
2525     if (number_of_leading_zeros != 1) {
2526       phone_number->set_number_of_leading_zeros(static_cast<int32_t>(number_of_leading_zeros));
2527     }
2528   }
2529 }
2530 
IsNumberMatchingDesc(const string & national_number,const PhoneNumberDesc & number_desc) const2531 bool PhoneNumberUtil::IsNumberMatchingDesc(
2532     const string& national_number, const PhoneNumberDesc& number_desc) const {
2533   // Check if any possible number lengths are present; if so, we use them to
2534   // avoid checking the validation pattern if they don't match. If they are
2535   // absent, this means they match the general description, which we have
2536   // already checked before checking a specific number type.
2537   int actual_length = static_cast<int>(national_number.length());
2538   if (number_desc.possible_length_size() > 0 &&
2539       std::find(number_desc.possible_length().begin(),
2540                 number_desc.possible_length().end(),
2541                 actual_length) == number_desc.possible_length().end()) {
2542     return false;
2543   }
2544   return IsMatch(*matcher_api_, national_number, number_desc);
2545 }
2546 
GetNumberTypeHelper(const string & national_number,const PhoneMetadata & metadata) const2547 PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberTypeHelper(
2548     const string& national_number, const PhoneMetadata& metadata) const {
2549   if (!IsNumberMatchingDesc(national_number, metadata.general_desc())) {
2550     VLOG(4) << "Number type unknown - doesn't match general national number"
2551             << " pattern.";
2552     return PhoneNumberUtil::UNKNOWN;
2553   }
2554   if (IsNumberMatchingDesc(national_number, metadata.premium_rate())) {
2555     VLOG(4) << "Number is a premium number.";
2556     return PhoneNumberUtil::PREMIUM_RATE;
2557   }
2558   if (IsNumberMatchingDesc(national_number, metadata.toll_free())) {
2559     VLOG(4) << "Number is a toll-free number.";
2560     return PhoneNumberUtil::TOLL_FREE;
2561   }
2562   if (IsNumberMatchingDesc(national_number, metadata.shared_cost())) {
2563     VLOG(4) << "Number is a shared cost number.";
2564     return PhoneNumberUtil::SHARED_COST;
2565   }
2566   if (IsNumberMatchingDesc(national_number, metadata.voip())) {
2567     VLOG(4) << "Number is a VOIP (Voice over IP) number.";
2568     return PhoneNumberUtil::VOIP;
2569   }
2570   if (IsNumberMatchingDesc(national_number, metadata.personal_number())) {
2571     VLOG(4) << "Number is a personal number.";
2572     return PhoneNumberUtil::PERSONAL_NUMBER;
2573   }
2574   if (IsNumberMatchingDesc(national_number, metadata.pager())) {
2575     VLOG(4) << "Number is a pager number.";
2576     return PhoneNumberUtil::PAGER;
2577   }
2578   if (IsNumberMatchingDesc(national_number, metadata.uan())) {
2579     VLOG(4) << "Number is a UAN.";
2580     return PhoneNumberUtil::UAN;
2581   }
2582   if (IsNumberMatchingDesc(national_number, metadata.voicemail())) {
2583     VLOG(4) << "Number is a voicemail number.";
2584     return PhoneNumberUtil::VOICEMAIL;
2585   }
2586 
2587   bool is_fixed_line =
2588       IsNumberMatchingDesc(national_number, metadata.fixed_line());
2589   if (is_fixed_line) {
2590     if (metadata.same_mobile_and_fixed_line_pattern()) {
2591       VLOG(4) << "Fixed-line and mobile patterns equal, number is fixed-line"
2592               << " or mobile";
2593       return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
2594     } else if (IsNumberMatchingDesc(national_number, metadata.mobile())) {
2595       VLOG(4) << "Fixed-line and mobile patterns differ, but number is "
2596               << "still fixed-line or mobile";
2597       return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
2598     }
2599     VLOG(4) << "Number is a fixed line number.";
2600     return PhoneNumberUtil::FIXED_LINE;
2601   }
2602   // Otherwise, test to see if the number is mobile. Only do this if certain
2603   // that the patterns for mobile and fixed line aren't the same.
2604   if (!metadata.same_mobile_and_fixed_line_pattern() &&
2605       IsNumberMatchingDesc(national_number, metadata.mobile())) {
2606     VLOG(4) << "Number is a mobile number.";
2607     return PhoneNumberUtil::MOBILE;
2608   }
2609   VLOG(4) << "Number type unknown - doesn\'t match any specific number type"
2610           << " pattern.";
2611   return PhoneNumberUtil::UNKNOWN;
2612 }
2613 
GetNationalSignificantNumber(const PhoneNumber & number,string * national_number) const2614 void PhoneNumberUtil::GetNationalSignificantNumber(
2615     const PhoneNumber& number,
2616     string* national_number) const {
2617   DCHECK(national_number);
2618   // If leading zero(s) have been set, we prefix this now. Note this is not a
2619   // national prefix. Ensure the number of leading zeros is at least 0 so we
2620   // don't crash in the case of malicious input.
2621   StrAppend(national_number, number.italian_leading_zero() ?
2622       string(std::max(number.number_of_leading_zeros(), 0), '0') : "");
2623   StrAppend(national_number, number.national_number());
2624 }
2625 
GetLengthOfGeographicalAreaCode(const PhoneNumber & number) const2626 int PhoneNumberUtil::GetLengthOfGeographicalAreaCode(
2627     const PhoneNumber& number) const {
2628   string region_code;
2629   GetRegionCodeForNumber(number, &region_code);
2630   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
2631   if (!metadata) {
2632     return 0;
2633   }
2634   // If a country doesn't use a national prefix, and this number doesn't have an
2635   // Italian leading zero, we assume it is a closed dialling plan with no area
2636   // codes.
2637   if (!metadata->has_national_prefix() && !number.italian_leading_zero()) {
2638     return 0;
2639   }
2640 
2641   PhoneNumberType type = GetNumberType(number);
2642   int country_calling_code = number.country_code();
2643   if (type == PhoneNumberUtil::MOBILE &&
2644       reg_exps_->geo_mobile_countries_without_mobile_area_codes_.find(
2645           country_calling_code) !=
2646           reg_exps_->geo_mobile_countries_without_mobile_area_codes_.end()) {
2647     return 0;
2648   }
2649 
2650   if (!IsNumberGeographical(type, country_calling_code)) {
2651     return 0;
2652   }
2653 
2654   return GetLengthOfNationalDestinationCode(number);
2655 }
2656 
GetLengthOfNationalDestinationCode(const PhoneNumber & number) const2657 int PhoneNumberUtil::GetLengthOfNationalDestinationCode(
2658     const PhoneNumber& number) const {
2659   PhoneNumber copied_proto(number);
2660   if (number.has_extension()) {
2661     // Clear the extension so it's not included when formatting.
2662     copied_proto.clear_extension();
2663   }
2664 
2665   string formatted_number;
2666   Format(copied_proto, INTERNATIONAL, &formatted_number);
2667   const scoped_ptr<RegExpInput> i18n_number(
2668       reg_exps_->regexp_factory_->CreateInput(formatted_number));
2669   string digit_group;
2670   string ndc;
2671   string third_group;
2672   for (int i = 0; i < 3; ++i) {
2673     if (!reg_exps_->capturing_ascii_digits_pattern_->FindAndConsume(
2674             i18n_number.get(), &digit_group)) {
2675       // We should find at least three groups.
2676       return 0;
2677     }
2678     if (i == 1) {
2679       ndc = digit_group;
2680     } else if (i == 2) {
2681       third_group = digit_group;
2682     }
2683   }
2684 
2685   if (GetNumberType(number) == MOBILE) {
2686     // For example Argentinian mobile numbers, when formatted in the
2687     // international format, are in the form of +54 9 NDC XXXX.... As a result,
2688     // we take the length of the third group (NDC) and add the length of the
2689     // mobile token, which also forms part of the national significant number.
2690     // This assumes that the mobile token is always formatted separately from
2691     // the rest of the phone number.
2692     string mobile_token;
2693     GetCountryMobileToken(number.country_code(), &mobile_token);
2694     if (!mobile_token.empty()) {
2695       return static_cast<int>(third_group.size() + mobile_token.size());
2696     }
2697   }
2698   return static_cast<int>(ndc.size());
2699 }
2700 
GetCountryMobileToken(int country_calling_code,string * mobile_token) const2701 void PhoneNumberUtil::GetCountryMobileToken(int country_calling_code,
2702                                             string* mobile_token) const {
2703   DCHECK(mobile_token);
2704   std::map<int, char>::iterator it = reg_exps_->mobile_token_mappings_.find(
2705       country_calling_code);
2706   if (it != reg_exps_->mobile_token_mappings_.end()) {
2707     *mobile_token = it->second;
2708   } else {
2709     mobile_token->assign("");
2710   }
2711 }
2712 
NormalizeDigitsOnly(string * number) const2713 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
2714   DCHECK(number);
2715   const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp(
2716       StrCat("[^", kDigits, "]"));
2717   // Delete everything that isn't valid digits.
2718   non_digits_pattern.GlobalReplace(number, "");
2719   // Normalize all decimal digits to ASCII digits.
2720   number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
2721 }
2722 
NormalizeDiallableCharsOnly(string * number) const2723 void PhoneNumberUtil::NormalizeDiallableCharsOnly(string* number) const {
2724   DCHECK(number);
2725   NormalizeHelper(reg_exps_->diallable_char_mappings_,
2726                   true /* remove non matches */, number);
2727 }
2728 
IsAlphaNumber(const string & number) const2729 bool PhoneNumberUtil::IsAlphaNumber(const string& number) const {
2730   if (!IsViablePhoneNumber(number)) {
2731     // Number is too short, or doesn't match the basic phone number pattern.
2732     return false;
2733   }
2734   // Copy the number, since we are going to try and strip the extension from it.
2735   string number_copy(number);
2736   string extension;
2737   MaybeStripExtension(&number_copy, &extension);
2738   return reg_exps_->valid_alpha_phone_pattern_->FullMatch(number_copy);
2739 }
2740 
ConvertAlphaCharactersInNumber(string * number) const2741 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const {
2742   DCHECK(number);
2743   NormalizeHelper(reg_exps_->alpha_phone_mappings_, false, number);
2744 }
2745 
2746 // Normalizes a string of characters representing a phone number. This performs
2747 // the following conversions:
2748 //   - Punctuation is stripped.
2749 //   For ALPHA/VANITY numbers:
2750 //   - Letters are converted to their numeric representation on a telephone
2751 //     keypad. The keypad used here is the one defined in ITU Recommendation
2752 //     E.161. This is only done if there are 3 or more letters in the number, to
2753 //     lessen the risk that such letters are typos.
2754 //   For other numbers:
2755 //   - Wide-ascii digits are converted to normal ASCII (European) digits.
2756 //   - Arabic-Indic numerals are converted to European numerals.
2757 //   - Spurious alpha characters are stripped.
Normalize(string * number) const2758 void PhoneNumberUtil::Normalize(string* number) const {
2759   DCHECK(number);
2760   if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) {
2761     NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number);
2762   }
2763   NormalizeDigitsOnly(number);
2764 }
2765 
2766 // Checks to see if the string of characters could possibly be a phone number at
2767 // all. At the moment, checks to see that the string begins with at least 3
2768 // digits, ignoring any punctuation commonly found in phone numbers.  This
2769 // method does not require the number to be normalized in advance - but does
2770 // assume that leading non-number symbols have been removed, such as by the
2771 // method ExtractPossibleNumber.
IsViablePhoneNumber(const string & number) const2772 bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const {
2773   if (number.length() < kMinLengthForNsn) {
2774     return false;
2775   }
2776   return reg_exps_->valid_phone_number_pattern_->FullMatch(number);
2777 }
2778 
2779 // Strips the IDD from the start of the number if present. Helper function used
2780 // by MaybeStripInternationalPrefixAndNormalize.
ParsePrefixAsIdd(const RegExp & idd_pattern,string * number) const2781 bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern,
2782                                        string* number) const {
2783   DCHECK(number);
2784   const scoped_ptr<RegExpInput> number_copy(
2785       reg_exps_->regexp_factory_->CreateInput(*number));
2786   // First attempt to strip the idd_pattern at the start, if present. We make a
2787   // copy so that we can revert to the original string if necessary.
2788   if (idd_pattern.Consume(number_copy.get())) {
2789     // Only strip this if the first digit after the match is not a 0, since
2790     // country calling codes cannot begin with 0.
2791     string extracted_digit;
2792     if (reg_exps_->capturing_digit_pattern_->PartialMatch(
2793             number_copy->ToString(), &extracted_digit)) {
2794       NormalizeDigitsOnly(&extracted_digit);
2795       if (extracted_digit == "0") {
2796         return false;
2797       }
2798     }
2799     number->assign(number_copy->ToString());
2800     return true;
2801   }
2802   return false;
2803 }
2804 
2805 // Strips any international prefix (such as +, 00, 011) present in the number
2806 // provided, normalizes the resulting number, and indicates if an international
2807 // prefix was present.
2808 //
2809 // possible_idd_prefix represents the international direct dialing prefix from
2810 // the region we think this number may be dialed in.
2811 // Returns true if an international dialing prefix could be removed from the
2812 // number, otherwise false if the number did not seem to be in international
2813 // format.
2814 PhoneNumber::CountryCodeSource
MaybeStripInternationalPrefixAndNormalize(const string & possible_idd_prefix,string * number) const2815 PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize(
2816     const string& possible_idd_prefix,
2817     string* number) const {
2818   DCHECK(number);
2819   if (number->empty()) {
2820     return PhoneNumber::FROM_DEFAULT_COUNTRY;
2821   }
2822   const scoped_ptr<RegExpInput> number_string_piece(
2823       reg_exps_->regexp_factory_->CreateInput(*number));
2824   if (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get())) {
2825     number->assign(number_string_piece->ToString());
2826     // Can now normalize the rest of the number since we've consumed the "+"
2827     // sign at the start.
2828     Normalize(number);
2829     return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN;
2830   }
2831   // Attempt to parse the first digits as an international prefix.
2832   const RegExp& idd_pattern =
2833       reg_exps_->regexp_cache_->GetRegExp(possible_idd_prefix);
2834   Normalize(number);
2835   return ParsePrefixAsIdd(idd_pattern, number)
2836       ? PhoneNumber::FROM_NUMBER_WITH_IDD
2837       : PhoneNumber::FROM_DEFAULT_COUNTRY;
2838 }
2839 
2840 // Strips any national prefix (such as 0, 1) present in the number provided.
2841 // The number passed in should be the normalized telephone number that we wish
2842 // to strip any national dialing prefix from. The metadata should be for the
2843 // region that we think this number is from. Returns true if a national prefix
2844 // and/or carrier code was stripped.
MaybeStripNationalPrefixAndCarrierCode(const PhoneMetadata & metadata,string * number,string * carrier_code) const2845 bool PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode(
2846     const PhoneMetadata& metadata,
2847     string* number,
2848     string* carrier_code) const {
2849   DCHECK(number);
2850   string carrier_code_temp;
2851   const string& possible_national_prefix =
2852       metadata.national_prefix_for_parsing();
2853   if (number->empty() || possible_national_prefix.empty()) {
2854     // Early return for numbers of zero length or with no national prefix
2855     // possible.
2856     return false;
2857   }
2858   // We use two copies here since Consume modifies the phone number, and if the
2859   // first if-clause fails the number will already be changed.
2860   const scoped_ptr<RegExpInput> number_copy(
2861       reg_exps_->regexp_factory_->CreateInput(*number));
2862   const scoped_ptr<RegExpInput> number_copy_without_transform(
2863       reg_exps_->regexp_factory_->CreateInput(*number));
2864   string number_string_copy(*number);
2865   string captured_part_of_prefix;
2866   const PhoneNumberDesc& general_desc = metadata.general_desc();
2867   // Check if the original number is viable.
2868   bool is_viable_original_number =
2869       IsMatch(*matcher_api_, *number, general_desc);
2870   // Attempt to parse the first digits as a national prefix. We make a
2871   // copy so that we can revert to the original string if necessary.
2872   const string& transform_rule = metadata.national_prefix_transform_rule();
2873   const RegExp& possible_national_prefix_pattern =
2874       reg_exps_->regexp_cache_->GetRegExp(possible_national_prefix);
2875   if (!transform_rule.empty() &&
2876       (possible_national_prefix_pattern.Consume(
2877           number_copy.get(), &carrier_code_temp, &captured_part_of_prefix) ||
2878        possible_national_prefix_pattern.Consume(
2879            number_copy.get(), &captured_part_of_prefix)) &&
2880       !captured_part_of_prefix.empty()) {
2881     // If this succeeded, then we must have had a transform rule and there must
2882     // have been some part of the prefix that we captured.
2883     // We make the transformation and check that the resultant number is still
2884     // viable. If so, replace the number and return.
2885     possible_national_prefix_pattern.Replace(&number_string_copy,
2886                                              transform_rule);
2887     if (is_viable_original_number &&
2888         !IsMatch(*matcher_api_, number_string_copy, general_desc)) {
2889       return false;
2890     }
2891     number->assign(number_string_copy);
2892     if (carrier_code) {
2893       carrier_code->assign(carrier_code_temp);
2894     }
2895   } else if (possible_national_prefix_pattern.Consume(
2896                  number_copy_without_transform.get(), &carrier_code_temp) ||
2897              possible_national_prefix_pattern.Consume(
2898                  number_copy_without_transform.get())) {
2899     VLOG(4) << "Parsed the first digits as a national prefix.";
2900     // If captured_part_of_prefix is empty, this implies nothing was captured by
2901     // the capturing groups in possible_national_prefix; therefore, no
2902     // transformation is necessary, and we just remove the national prefix.
2903     const string number_copy_as_string =
2904         number_copy_without_transform->ToString();
2905     if (is_viable_original_number &&
2906         !IsMatch(*matcher_api_, number_copy_as_string, general_desc)) {
2907       return false;
2908     }
2909     number->assign(number_copy_as_string);
2910     if (carrier_code) {
2911       carrier_code->assign(carrier_code_temp);
2912     }
2913   } else {
2914     return false;
2915     VLOG(4) << "The first digits did not match the national prefix.";
2916   }
2917   return true;
2918 }
2919 
2920 // Strips any extension (as in, the part of the number dialled after the call is
2921 // connected, usually indicated with extn, ext, x or similar) from the end of
2922 // the number, and returns it. The number passed in should be non-normalized.
MaybeStripExtension(string * number,std::string * extension) const2923 bool PhoneNumberUtil::MaybeStripExtension(string* number,  std::string* extension)
2924     const {
2925   DCHECK(number);
2926   DCHECK(extension);
2927   // There are six extension capturing groups in the regular expression.
2928   string possible_extension_one;
2929   string possible_extension_two;
2930   string possible_extension_three;
2931   string possible_extension_four;
2932   string possible_extension_five;
2933   string possible_extension_six;
2934   string number_copy(*number);
2935   const scoped_ptr<RegExpInput> number_copy_as_regexp_input(
2936       reg_exps_->regexp_factory_->CreateInput(number_copy));
2937   if (reg_exps_->extn_pattern_->Consume(
2938           number_copy_as_regexp_input.get(), false, &possible_extension_one,
2939           &possible_extension_two, &possible_extension_three,
2940           &possible_extension_four, &possible_extension_five,
2941           &possible_extension_six)) {
2942     // Replace the extensions in the original string here.
2943     reg_exps_->extn_pattern_->Replace(&number_copy, "");
2944     // If we find a potential extension, and the number preceding this is a
2945     // viable number, we assume it is an extension.
2946     if ((!possible_extension_one.empty() || !possible_extension_two.empty() ||
2947          !possible_extension_three.empty() ||
2948          !possible_extension_four.empty() || !possible_extension_five.empty() ||
2949          !possible_extension_six.empty()) &&
2950         IsViablePhoneNumber(number_copy)) {
2951       number->assign(number_copy);
2952       if (!possible_extension_one.empty()) {
2953         extension->assign(possible_extension_one);
2954       } else if (!possible_extension_two.empty()) {
2955         extension->assign(possible_extension_two);
2956       } else if (!possible_extension_three.empty()) {
2957         extension->assign(possible_extension_three);
2958       } else if (!possible_extension_four.empty()) {
2959         extension->assign(possible_extension_four);
2960       } else if (!possible_extension_five.empty()) {
2961         extension->assign(possible_extension_five);
2962       } else if (!possible_extension_six.empty()) {
2963         extension->assign(possible_extension_six);
2964       }
2965       return true;
2966     }
2967   }
2968   return false;
2969 }
2970 
2971 // Extracts country calling code from national_number, and returns it. It
2972 // assumes that the leading plus sign or IDD has already been removed. Returns 0
2973 // if national_number doesn't start with a valid country calling code, and
2974 // leaves national_number unmodified. Assumes the national_number is at least 3
2975 // characters long.
ExtractCountryCode(string * national_number) const2976 int PhoneNumberUtil::ExtractCountryCode(string* national_number) const {
2977   int potential_country_code;
2978   if (national_number->empty() || (national_number->at(0) == '0')) {
2979     // Country codes do not begin with a '0'.
2980     return 0;
2981   }
2982   for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) {
2983     safe_strto32(national_number->substr(0, i), &potential_country_code);
2984     string region_code;
2985     GetRegionCodeForCountryCode(potential_country_code, &region_code);
2986     if (region_code != RegionCode::GetUnknown()) {
2987       national_number->erase(0, i);
2988       return potential_country_code;
2989     }
2990   }
2991   return 0;
2992 }
2993 
2994 // Tries to extract a country calling code from a number. Country calling codes
2995 // are extracted in the following ways:
2996 //   - by stripping the international dialing prefix of the region the person
2997 //   is dialing from, if this is present in the number, and looking at the next
2998 //   digits
2999 //   - by stripping the '+' sign if present and then looking at the next digits
3000 //   - by comparing the start of the number and the country calling code of the
3001 //   default region. If the number is not considered possible for the numbering
3002 //   plan of the default region initially, but starts with the country calling
3003 //   code of this region, validation will be reattempted after stripping this
3004 //   country calling code. If this number is considered a possible number, then
3005 //   the first digits will be considered the country calling code and removed as
3006 //   such.
3007 //
3008 //   Returns NO_PARSING_ERROR if a country calling code was successfully
3009 //   extracted or none was present, or the appropriate error otherwise, such as
3010 //   if a + was present but it was not followed by a valid country calling code.
3011 //   If NO_PARSING_ERROR is returned, the national_number without the country
3012 //   calling code is populated, and the country_code of the phone_number passed
3013 //   in is set to the country calling code if found, otherwise to 0.
MaybeExtractCountryCode(const PhoneMetadata * default_region_metadata,bool keep_raw_input,string * national_number,PhoneNumber * phone_number) const3014 PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode(
3015     const PhoneMetadata* default_region_metadata,
3016     bool keep_raw_input,
3017     string* national_number,
3018     PhoneNumber* phone_number) const {
3019   DCHECK(national_number);
3020   DCHECK(phone_number);
3021   // Set the default prefix to be something that will never match if there is no
3022   // default region.
3023   string possible_country_idd_prefix = default_region_metadata
3024       ?  default_region_metadata->international_prefix()
3025       : "NonMatch";
3026   PhoneNumber::CountryCodeSource country_code_source =
3027       MaybeStripInternationalPrefixAndNormalize(possible_country_idd_prefix,
3028                                                 national_number);
3029   if (keep_raw_input) {
3030     phone_number->set_country_code_source(country_code_source);
3031   }
3032   if (country_code_source != PhoneNumber::FROM_DEFAULT_COUNTRY) {
3033     if (national_number->length() <= kMinLengthForNsn) {
3034       VLOG(2) << "Phone number had an IDD, but after this was not "
3035               << "long enough to be a viable phone number.";
3036       return TOO_SHORT_AFTER_IDD;
3037     }
3038     int potential_country_code = ExtractCountryCode(national_number);
3039     if (potential_country_code != 0) {
3040       phone_number->set_country_code(potential_country_code);
3041       return NO_PARSING_ERROR;
3042     }
3043     // If this fails, they must be using a strange country calling code that we
3044     // don't recognize, or that doesn't exist.
3045     return INVALID_COUNTRY_CODE_ERROR;
3046   } else if (default_region_metadata) {
3047     // Check to see if the number starts with the country calling code for the
3048     // default region. If so, we remove the country calling code, and do some
3049     // checks on the validity of the number before and after.
3050     int default_country_code = default_region_metadata->country_code();
3051     string default_country_code_string(SimpleItoa(default_country_code));
3052     VLOG(4) << "Possible country calling code: " << default_country_code_string;
3053     string potential_national_number;
3054     if (TryStripPrefixString(*national_number,
3055                              default_country_code_string,
3056                              &potential_national_number)) {
3057       const PhoneNumberDesc& general_num_desc =
3058           default_region_metadata->general_desc();
3059       MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata,
3060                                              &potential_national_number,
3061                                              NULL);
3062       VLOG(4) << "Number without country calling code prefix";
3063       // If the number was not valid before but is valid now, or if it was too
3064       // long before, we consider the number with the country code stripped to
3065       // be a better result and keep that instead.
3066       if ((!IsMatch(*matcher_api_, *national_number, general_num_desc) &&
3067           IsMatch(
3068               *matcher_api_, potential_national_number, general_num_desc)) ||
3069           TestNumberLength(*national_number, *default_region_metadata) ==
3070               TOO_LONG) {
3071         national_number->assign(potential_national_number);
3072         if (keep_raw_input) {
3073           phone_number->set_country_code_source(
3074               PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
3075         }
3076         phone_number->set_country_code(default_country_code);
3077         return NO_PARSING_ERROR;
3078       }
3079     }
3080   }
3081   // No country calling code present. Set the country_code to 0.
3082   phone_number->set_country_code(0);
3083   return NO_PARSING_ERROR;
3084 }
3085 
IsNumberMatch(const PhoneNumber & first_number_in,const PhoneNumber & second_number_in) const3086 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatch(
3087     const PhoneNumber& first_number_in,
3088     const PhoneNumber& second_number_in) const {
3089   // We only are about the fields that uniquely define a number, so we copy
3090   // these across explicitly.
3091   PhoneNumber first_number;
3092   CopyCoreFieldsOnly(first_number_in, &first_number);
3093   PhoneNumber second_number;
3094   CopyCoreFieldsOnly(second_number_in, &second_number);
3095   // Early exit if both had extensions and these are different.
3096   if (first_number.has_extension() && second_number.has_extension() &&
3097       first_number.extension() != second_number.extension()) {
3098     return NO_MATCH;
3099   }
3100   int first_number_country_code = first_number.country_code();
3101   int second_number_country_code = second_number.country_code();
3102   // Both had country calling code specified.
3103   if (first_number_country_code != 0 && second_number_country_code != 0) {
3104     if (ExactlySameAs(first_number, second_number)) {
3105       return EXACT_MATCH;
3106     } else if (first_number_country_code == second_number_country_code &&
3107                IsNationalNumberSuffixOfTheOther(first_number, second_number)) {
3108       // A SHORT_NSN_MATCH occurs if there is a difference because of the
3109       // presence or absence of an 'Italian leading zero', the presence or
3110       // absence of an extension, or one NSN being a shorter variant of the
3111       // other.
3112       return SHORT_NSN_MATCH;
3113     }
3114     // This is not a match.
3115     return NO_MATCH;
3116   }
3117   // Checks cases where one or both country calling codes were not specified. To
3118   // make equality checks easier, we first set the country_code fields to be
3119   // equal.
3120   first_number.set_country_code(second_number_country_code);
3121   // If all else was the same, then this is an NSN_MATCH.
3122   if (ExactlySameAs(first_number, second_number)) {
3123     return NSN_MATCH;
3124   }
3125   if (IsNationalNumberSuffixOfTheOther(first_number, second_number)) {
3126     return SHORT_NSN_MATCH;
3127   }
3128   return NO_MATCH;
3129 }
3130 
IsNumberMatchWithTwoStrings(const string & first_number,const string & second_number) const3131 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithTwoStrings(
3132     const string& first_number,
3133     const string& second_number) const {
3134   PhoneNumber first_number_as_proto;
3135   ErrorType error_type =
3136       Parse(first_number, RegionCode::GetUnknown(), &first_number_as_proto);
3137   if (error_type == NO_PARSING_ERROR) {
3138     return IsNumberMatchWithOneString(first_number_as_proto, second_number);
3139   }
3140   if (error_type == INVALID_COUNTRY_CODE_ERROR) {
3141     PhoneNumber second_number_as_proto;
3142     ErrorType error_type = Parse(second_number, RegionCode::GetUnknown(),
3143                                  &second_number_as_proto);
3144     if (error_type == NO_PARSING_ERROR) {
3145       return IsNumberMatchWithOneString(second_number_as_proto, first_number);
3146     }
3147     if (error_type == INVALID_COUNTRY_CODE_ERROR) {
3148       error_type  = ParseHelper(first_number, RegionCode::GetUnknown(), false,
3149                                 false, &first_number_as_proto);
3150       if (error_type == NO_PARSING_ERROR) {
3151         error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false,
3152                                  false, &second_number_as_proto);
3153         if (error_type == NO_PARSING_ERROR) {
3154           return IsNumberMatch(first_number_as_proto, second_number_as_proto);
3155         }
3156       }
3157     }
3158   }
3159   // One or more of the phone numbers we are trying to match is not a viable
3160   // phone number.
3161   return INVALID_NUMBER;
3162 }
3163 
IsNumberMatchWithOneString(const PhoneNumber & first_number,const string & second_number) const3164 PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithOneString(
3165     const PhoneNumber& first_number,
3166     const string& second_number) const {
3167   // First see if the second number has an implicit country calling code, by
3168   // attempting to parse it.
3169   PhoneNumber second_number_as_proto;
3170   ErrorType error_type =
3171       Parse(second_number, RegionCode::GetUnknown(), &second_number_as_proto);
3172   if (error_type == NO_PARSING_ERROR) {
3173     return IsNumberMatch(first_number, second_number_as_proto);
3174   }
3175   if (error_type == INVALID_COUNTRY_CODE_ERROR) {
3176     // The second number has no country calling code. EXACT_MATCH is no longer
3177     // possible.  We parse it as if the region was the same as that for the
3178     // first number, and if EXACT_MATCH is returned, we replace this with
3179     // NSN_MATCH.
3180     string first_number_region;
3181     GetRegionCodeForCountryCode(first_number.country_code(),
3182                                 &first_number_region);
3183     if (first_number_region != RegionCode::GetUnknown()) {
3184       PhoneNumber second_number_with_first_number_region;
3185       Parse(second_number, first_number_region,
3186             &second_number_with_first_number_region);
3187       MatchType match = IsNumberMatch(first_number,
3188                                       second_number_with_first_number_region);
3189       if (match == EXACT_MATCH) {
3190         return NSN_MATCH;
3191       }
3192       return match;
3193     } else {
3194       // If the first number didn't have a valid country calling code, then we
3195       // parse the second number without one as well.
3196       error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false,
3197                                false, &second_number_as_proto);
3198       if (error_type == NO_PARSING_ERROR) {
3199         return IsNumberMatch(first_number, second_number_as_proto);
3200       }
3201     }
3202   }
3203   // One or more of the phone numbers we are trying to match is not a viable
3204   // phone number.
3205   return INVALID_NUMBER;
3206 }
3207 
GetAsYouTypeFormatter(const string & region_code) const3208 AsYouTypeFormatter* PhoneNumberUtil::GetAsYouTypeFormatter(
3209     const string& region_code) const {
3210   return new AsYouTypeFormatter(region_code);
3211 }
3212 
CanBeInternationallyDialled(const PhoneNumber & number) const3213 bool PhoneNumberUtil::CanBeInternationallyDialled(
3214     const PhoneNumber& number) const {
3215   string region_code;
3216   GetRegionCodeForNumber(number, &region_code);
3217   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
3218   if (!metadata) {
3219     // Note numbers belonging to non-geographical entities (e.g. +800 numbers)
3220     // are always internationally diallable, and will be caught here.
3221     return true;
3222   }
3223   string national_significant_number;
3224   GetNationalSignificantNumber(number, &national_significant_number);
3225   return !IsNumberMatchingDesc(
3226       national_significant_number, metadata->no_international_dialling());
3227 }
3228 
3229 }  // namespace phonenumbers
3230 }  // namespace i18n
3231