1 // Copyright (C) 2009 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Utility for international phone numbers. 16 17 #ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 18 #define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 19 20 #include <stddef.h> 21 #include <list> 22 #include <map> 23 #include <set> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 #include "phonenumbers/base/basictypes.h" 29 #include "phonenumbers/base/memory/scoped_ptr.h" 30 #include "phonenumbers/base/memory/singleton.h" 31 #include "phonenumbers/phonenumber.pb.h" 32 33 class TelephoneNumber; 34 35 namespace i18n { 36 namespace phonenumbers { 37 38 using google::protobuf::RepeatedPtrField; 39 using std::string; 40 41 class AsYouTypeFormatter; 42 class Logger; 43 class MatcherApi; 44 class NumberFormat; 45 class PhoneMetadata; 46 class PhoneNumberDesc; 47 class PhoneNumberRegExpsAndMappings; 48 class RegExp; 49 50 // NOTE: A lot of methods in this class require Region Code strings. These must 51 // be provided using CLDR two-letter region-code format. These should be in 52 // upper-case. The list of the codes can be found here: 53 // http://www.unicode.org/cldr/charts/30/supplemental/territory_information.html 54 55 class PhoneNumberUtil : public Singleton<PhoneNumberUtil> { 56 private: 57 friend class AsYouTypeFormatter; 58 friend class PhoneNumberMatcher; 59 friend class PhoneNumberMatcherRegExps; 60 friend class PhoneNumberMatcherTest; 61 friend class PhoneNumberRegExpsAndMappings; 62 friend class PhoneNumberUtilTest; 63 friend class ShortNumberInfo; 64 friend class ShortNumberInfoTest; 65 friend class Singleton<PhoneNumberUtil>; 66 67 public: 68 ~PhoneNumberUtil(); 69 static const char kRegionCodeForNonGeoEntity[]; 70 71 // INTERNATIONAL and NATIONAL formats are consistent with the definition 72 // in ITU-T Recommendation E.123. However we follow local conventions such as 73 // using '-' instead of whitespace as separators. For example, the number of 74 // the Google Switzerland office will be written as "+41 44 668 1800" in 75 // INTERNATIONAL format, and as "044 668 1800" in NATIONAL format. E164 76 // format is as per INTERNATIONAL format but with no formatting applied e.g. 77 // "+41446681800". RFC3966 is as per INTERNATIONAL format, but with all spaces 78 // and other separating symbols replaced with a hyphen, and with any phone 79 // number extension appended with ";ext=". It also will have a prefix of 80 // "tel:" added, e.g. "tel:+41-44-668-1800". 81 enum PhoneNumberFormat { 82 E164, 83 INTERNATIONAL, 84 NATIONAL, 85 RFC3966 86 }; 87 88 static const PhoneNumberFormat kMaxNumberFormat = RFC3966; 89 90 // Type of phone numbers. 91 enum PhoneNumberType { 92 FIXED_LINE, 93 MOBILE, 94 // In some regions (e.g. the USA), it is impossible to distinguish between 95 // fixed-line and mobile numbers by looking at the phone number itself. 96 FIXED_LINE_OR_MOBILE, 97 // Freephone lines 98 TOLL_FREE, 99 PREMIUM_RATE, 100 // The cost of this call is shared between the caller and the recipient, and 101 // is hence typically less than PREMIUM_RATE calls. See 102 // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information. 103 SHARED_COST, 104 // Voice over IP numbers. This includes TSoIP (Telephony Service over IP). 105 VOIP, 106 // A personal number is associated with a particular person, and may be 107 // routed to either a MOBILE or FIXED_LINE number. Some more information can 108 // be found here: http://en.wikipedia.org/wiki/Personal_Numbers 109 PERSONAL_NUMBER, 110 PAGER, 111 // Used for "Universal Access Numbers" or "Company Numbers". They may be 112 // further routed to specific offices, but allow one number to be used for a 113 // company. 114 UAN, 115 // Used for "Voice Mail Access Numbers". 116 VOICEMAIL, 117 // A phone number is of type UNKNOWN when it does not fit any of the known 118 // patterns for a specific region. 119 UNKNOWN 120 }; 121 122 static const PhoneNumberType kMaxNumberType = UNKNOWN; 123 124 // Types of phone number matches. See detailed description beside the 125 // IsNumberMatch() method. 126 enum MatchType { 127 INVALID_NUMBER, // NOT_A_NUMBER in the java version. 128 NO_MATCH, 129 SHORT_NSN_MATCH, 130 NSN_MATCH, 131 EXACT_MATCH, 132 }; 133 134 static const MatchType kMaxMatchType = EXACT_MATCH; 135 136 enum ErrorType { 137 NO_PARSING_ERROR, 138 INVALID_COUNTRY_CODE_ERROR, // INVALID_COUNTRY_CODE in the java version. 139 NOT_A_NUMBER, 140 TOO_SHORT_AFTER_IDD, 141 TOO_SHORT_NSN, 142 TOO_LONG_NSN, // TOO_LONG in the java version. 143 }; 144 145 static const ErrorType kMaxErrorType = TOO_LONG_NSN; 146 147 // Possible outcomes when testing if a PhoneNumber is possible. 148 enum ValidationResult { 149 // The number length matches that of valid numbers for this region. 150 IS_POSSIBLE, 151 // The number length matches that of local numbers for this region only 152 // (i.e. numbers that may be able to be dialled within an area, but do not 153 // have all the information to be dialled from anywhere inside or outside 154 // the country). 155 IS_POSSIBLE_LOCAL_ONLY, 156 // The number has an invalid country calling code. 157 INVALID_COUNTRY_CODE, 158 // The number is shorter than all valid numbers for this region. 159 TOO_SHORT, 160 // The number is longer than the shortest valid numbers for this region, 161 // shorter than the longest valid numbers for this region, and does not 162 // itself have a number length that matches valid numbers for this region. 163 // This can also be returned in the case where 164 // IsPossibleNumberForTypeWithReason was called, and there are no numbers of 165 // this type at all for this region. 166 INVALID_LENGTH, 167 // The number is longer than all valid numbers for this region. 168 TOO_LONG, 169 }; 170 171 static const ValidationResult kMaxValidationResult = TOO_LONG; 172 173 // Returns all regions the library has metadata for. 174 // @returns an unordered set of the two-letter region codes for every 175 // geographical region the library supports 176 void GetSupportedRegions( 177 std::set<string>* regions) const; 178 179 // Returns all global network calling codes the library has metadata for. 180 // @returns an unordered set of the country calling codes for every 181 // non-geographical entity the library supports 182 void GetSupportedGlobalNetworkCallingCodes( 183 std::set<int>* calling_codes) const; 184 185 // Returns all country calling codes the library has metadata for, covering 186 // both non-geographical entities (global network calling codes) and those 187 // used for geographical entities. This could be used to populate a drop-down 188 // box of country calling codes for a phone-number widget, for instance. 189 void GetSupportedCallingCodes(std::set<int>* calling_codes) const; 190 191 // Returns the types for a given region which the library has metadata for. 192 // Will not include FIXED_LINE_OR_MOBILE (if numbers for this non-geographical 193 // entity could be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and 194 // MOBILE would be present) and UNKNOWN. 195 // 196 // No types will be returned for invalid or unknown region codes. 197 void GetSupportedTypesForRegion( 198 const string& region_code, 199 std::set<PhoneNumberType>* types) const; 200 201 // Returns the types for a country-code belonging to a non-geographical entity 202 // which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE 203 // (instead both FIXED_LINE and FIXED_LINE_OR_MOBILE (if numbers for this 204 // non-geographical entity could be classified as FIXED_LINE_OR_MOBILE, both 205 // FIXED_LINE and MOBILE would be present) and UNKNOWN. 206 // 207 // No types will be returned for country calling codes that do not map to a 208 // known non-geographical entity. 209 void GetSupportedTypesForNonGeoEntity( 210 int country_calling_code, 211 std::set<PhoneNumberType>* types) const; 212 213 // Gets a PhoneNumberUtil instance to carry out international phone number 214 // formatting, parsing, or validation. The instance is loaded with phone 215 // number metadata for a number of most commonly used regions, as specified by 216 // DEFAULT_REGIONS_. 217 // 218 // The PhoneNumberUtil is implemented as a singleton. Therefore, calling 219 // GetInstance multiple times will only result in one instance being created. 220 static PhoneNumberUtil* GetInstance(); 221 222 // Returns true if the number is a valid vanity (alpha) number such as 800 223 // MICROSOFT. A valid vanity number will start with at least 3 digits and will 224 // have three or more alpha characters. This does not do region-specific 225 // checks - to work out if this number is actually valid for a region, it 226 // should be parsed and methods such as IsPossibleNumberWithReason or 227 // IsValidNumber should be used. 228 bool IsAlphaNumber(const string& number) const; 229 230 // Converts all alpha characters in a number to their respective digits on 231 // a keypad, but retains existing formatting. 232 void ConvertAlphaCharactersInNumber(string* number) const; 233 234 // Normalizes a string of characters representing a phone number. This 235 // converts wide-ascii and arabic-indic numerals to European numerals, and 236 // strips punctuation and alpha characters. 237 void NormalizeDigitsOnly(string* number) const; 238 239 // Normalizes a string of characters representing a phone number. This strips 240 // all characters which are not diallable on a mobile phone keypad (including 241 // all non-ASCII digits). 242 void NormalizeDiallableCharsOnly(string* number) const; 243 244 // Gets the national significant number of a phone number. Note a national 245 // significant number doesn't contain a national prefix or any formatting. 246 void GetNationalSignificantNumber(const PhoneNumber& number, 247 string* national_significant_num) const; 248 249 // Gets the length of the geographical area code from the PhoneNumber object 250 // passed in, so that clients could use it to split a national significant 251 // number into geographical area code and subscriber number. It works in such 252 // a way that the resultant subscriber number should be diallable, at least on 253 // some devices. An example of how this could be used: 254 // 255 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance()); 256 // PhoneNumber number; 257 // phone_util.Parse("16502530000", "US", &number); 258 // string national_significant_number; 259 // phone_util.GetNationalSignificantNumber(number, 260 // &national_significant_number); 261 // string area_code; 262 // string subscriber_number; 263 // 264 // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number); 265 // if (area_code_length > 0) { 266 // area_code = national_significant_number.substr(0, area_code_length); 267 // subscriber_number = national_significant_number.substr( 268 // area_code_length, string::npos); 269 // } else { 270 // area_code = ""; 271 // subscriber_number = national_significant_number; 272 // } 273 // 274 // N.B.: area code is a very ambiguous concept, so the authors generally 275 // recommend against using it for most purposes, but recommend using the 276 // more general national_number instead. Read the following carefully before 277 // deciding to use this method: 278 // 279 // - geographical area codes change over time, and this method honors those 280 // changes; therefore, it doesn't guarantee the stability of the result it 281 // produces. 282 // - subscriber numbers may not be diallable from all devices (notably mobile 283 // devices, which typically requires the full national_number to be dialled 284 // in most regions). 285 // - most non-geographical numbers have no area codes, including numbers 286 // from non-geographical entities. 287 // - some geographical numbers have no area codes. 288 int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const; 289 290 // Gets the length of the national destination code (NDC) from the PhoneNumber 291 // object passed in, so that clients could use it to split a national 292 // significant number into NDC and subscriber number. The NDC of a phone 293 // number is normally the first group of digit(s) right after the country 294 // calling code when the number is formatted in the international format, if 295 // there is a subscriber number part that follows. 296 // 297 // N.B.: similar to an area code, not all numbers have an NDC! 298 // 299 // An example of how this could be used: 300 // 301 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance()); 302 // PhoneNumber number; 303 // phone_util.Parse("16502530000", "US", &number); 304 // string national_significant_number; 305 // phone_util.GetNationalSignificantNumber(number, 306 // &national_significant_number); 307 // string national_destination_code; 308 // string subscriber_number; 309 // 310 // int national_destination_code_length = 311 // phone_util.GetLengthOfNationalDestinationCode(number); 312 // if (national_destination_code_length > 0) { 313 // national_destination_code = national_significant_number.substr( 314 // 0, national_destination_code_length); 315 // subscriber_number = national_significant_number.substr( 316 // national_destination_code_length, string::npos); 317 // } else { 318 // national_destination_code = ""; 319 // subscriber_number = national_significant_number; 320 // } 321 // 322 // Refer to the unittests to see the difference between this function and 323 // GetLengthOfGeographicalAreaCode(). 324 int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const; 325 326 // Returns the mobile token for the provided country calling code if it has 327 // one, otherwise returns an empty string. A mobile token is a number inserted 328 // before the area code when dialing a mobile number from that country from 329 // abroad. 330 void GetCountryMobileToken(int country_calling_code, 331 string* mobile_token) const; 332 333 // Formats a phone number in the specified format using default rules. Note 334 // that this does not promise to produce a phone number that the user can 335 // dial from where they are - although we do format in either NATIONAL or 336 // INTERNATIONAL format depending on what the client asks for, we do not 337 // currently support a more abbreviated format, such as for users in the 338 // same area who could potentially dial the number without area code. 339 void Format(const PhoneNumber& number, 340 PhoneNumberFormat number_format, 341 string* formatted_number) const; 342 343 // Formats a phone number in the specified format using client-defined 344 // formatting rules. 345 void FormatByPattern( 346 const PhoneNumber& number, 347 PhoneNumberFormat number_format, 348 const RepeatedPtrField<NumberFormat>& user_defined_formats, 349 string* formatted_number) const; 350 351 // Formats a phone number in national format for dialing using the carrier as 352 // specified in the carrier_code. The carrier_code will always be used 353 // regardless of whether the phone number already has a preferred domestic 354 // carrier code stored. If carrier_code contains an empty string, return the 355 // number in national format without any carrier code. 356 void FormatNationalNumberWithCarrierCode(const PhoneNumber& number, 357 const string& carrier_code, 358 string* formatted_number) const; 359 360 // Formats a phone number in national format for dialing using the carrier as 361 // specified in the preferred_domestic_carrier_code field of the PhoneNumber 362 // object passed in. If that is missing, use the fallback_carrier_code passed 363 // in instead. If there is no preferred_domestic_carrier_code, and the 364 // fallback_carrier_code contains an empty string, return the number in 365 // national format without any carrier code. 366 // 367 // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed 368 // in should take precedence over the number's preferred_domestic_carrier_code 369 // when formatting. 370 void FormatNationalNumberWithPreferredCarrierCode( 371 const PhoneNumber& number, 372 const string& fallback_carrier_code, 373 string* formatted_number) const; 374 375 // Returns a number formatted in such a way that it can be dialed from a 376 // mobile phone in a specific region. If the number cannot be reached from 377 // the region (e.g. some countries block toll-free numbers from being called 378 // outside of the country), the method returns an empty string. 379 void FormatNumberForMobileDialing( 380 const PhoneNumber& number, 381 const string& region_calling_from, 382 bool with_formatting, 383 string* formatted_number) const; 384 385 // Formats a phone number for out-of-country dialing purposes. 386 // 387 // Note this function takes care of the case for calling inside of NANPA 388 // and between Russia and Kazakhstan (who share the same country calling 389 // code). In those cases, no international prefix is used. For regions which 390 // have multiple international prefixes, the number in its INTERNATIONAL 391 // format will be returned instead. 392 void FormatOutOfCountryCallingNumber( 393 const PhoneNumber& number, 394 const string& calling_from, 395 string* formatted_number) const; 396 397 // Formats a phone number using the original phone number format that the 398 // number is parsed from. The original format is embedded in the 399 // country_code_source field of the PhoneNumber object passed in. If such 400 // information is missing, the number will be formatted into the NATIONAL 401 // format by default. When we don't have a formatting pattern for the number, 402 // the method returns the raw input when it is available. 403 // 404 // Note this method guarantees no digit will be inserted, removed or modified 405 // as a result of formatting. 406 void FormatInOriginalFormat(const PhoneNumber& number, 407 const string& region_calling_from, 408 string* formatted_number) const; 409 410 // Formats a phone number for out-of-country dialing purposes. 411 // 412 // Note that in this version, if the number was entered originally using alpha 413 // characters and this version of the number is stored in raw_input, this 414 // representation of the number will be used rather than the digit 415 // representation. Grouping information, as specified by characters such as 416 // "-" and " ", will be retained. 417 // 418 // Caveats: 419 // 1) This will not produce good results if the country calling code is both 420 // present in the raw input _and_ is the start of the national number. This 421 // is not a problem in the regions which typically use alpha numbers. 422 // 2) This will also not produce good results if the raw input has any 423 // grouping information within the first three digits of the national number, 424 // and if the function needs to strip preceding digits/words in the raw input 425 // before these digits. Normally people group the first three digits together 426 // so this is not a huge problem - and will be fixed if it proves to be so. 427 void FormatOutOfCountryKeepingAlphaChars( 428 const PhoneNumber& number, 429 const string& calling_from, 430 string* formatted_number) const; 431 432 // Attempts to extract a valid number from a phone number that is too long to 433 // be valid, and resets the PhoneNumber object passed in to that valid 434 // version. If no valid number could be extracted, the PhoneNumber object 435 // passed in will not be modified. It returns true if a valid phone number can 436 // be successfully extracted. 437 bool TruncateTooLongNumber(PhoneNumber* number) const; 438 439 // Gets the type of a valid phone number, or UNKNOWN if it is invalid. 440 PhoneNumberType GetNumberType(const PhoneNumber& number) const; 441 442 // Tests whether a phone number matches a valid pattern. Note this doesn't 443 // verify the number is actually in use, which is impossible to tell by just 444 // looking at a number itself. 445 // It only verifies whether the parsed, canonicalised number is valid: not 446 // whether a particular series of digits entered by the user is diallable from 447 // the region provided when parsing. For example, the number +41 (0) 78 927 448 // 2696 can be parsed into a number with country code "41" and national 449 // significant number "789272696". This is valid, while the original string 450 // is not diallable. 451 bool IsValidNumber(const PhoneNumber& number) const; 452 453 // Tests whether a phone number is valid for a certain region. Note this 454 // doesn't verify the number is actually in use, which is impossible to tell 455 // by just looking at a number itself. If the country calling code is not the 456 // same as the country calling code for the region, this immediately exits 457 // with false. After this, the specific number pattern rules for the region 458 // are examined. 459 // This is useful for determining for example whether a particular number is 460 // valid for Canada, rather than just a valid NANPA number. 461 // Warning: In most cases, you want to use IsValidNumber instead. For 462 // example, this method will mark numbers from British Crown dependencies 463 // such as the Isle of Man as invalid for the region "GB" (United Kingdom), 464 // since it has its own region code, "IM", which may be undesirable. 465 bool IsValidNumberForRegion( 466 const PhoneNumber& number, 467 const string& region_code) const; 468 469 // Returns the region where a phone number is from. This could be used for 470 // geocoding at the region level. Only guarantees correct results for valid, 471 // full numbers (not short-codes, or invalid numbers). 472 void GetRegionCodeForNumber(const PhoneNumber& number, 473 string* region_code) const; 474 475 // Returns the country calling code for a specific region. For example, 476 // this would be 1 for the United States, and 64 for New Zealand. 477 int GetCountryCodeForRegion(const string& region_code) const; 478 479 // Returns the region code that matches the specific country code. Note that 480 // it is possible that several regions share the same country calling code 481 // (e.g. US and Canada), and in that case, only one of the regions (normally 482 // the one with the largest population) is returned. If the 483 // countryCallingCode entered is valid but doesn't match a specific region 484 // (such as in the case of non-geographical calling codes like 800) the 485 // RegionCode 001 will be returned (corresponding to the value for World in 486 // the UN M.49 schema). 487 void GetRegionCodeForCountryCode(int country_code, string* region_code) const; 488 489 // Populates a list with the region codes that match the specific country 490 // calling code. For non-geographical country calling codes, the region code 491 // 001 is returned. Also, in the case of no region code being found, the list 492 // is left unchanged. 493 void GetRegionCodesForCountryCallingCode( 494 int country_calling_code, 495 std::list<string>* region_codes) const; 496 497 // Checks if this is a region under the North American Numbering Plan 498 // Administration (NANPA). 499 bool IsNANPACountry(const string& region_code) const; 500 501 // Returns the national dialling prefix for a specific region. For example, 502 // this would be 1 for the United States, and 0 for New Zealand. Set 503 // strip_non_digits to true to strip symbols like "~" (which indicates a wait 504 // for a dialling tone) from the prefix returned. If no national prefix is 505 // present, we return an empty string. 506 void GetNddPrefixForRegion(const string& region_code, 507 bool strip_non_digits, 508 string* national_prefix) const; 509 510 // Checks whether a phone number is a possible number. It provides a more 511 // lenient check than IsValidNumber() in the following sense: 512 // 1. It only checks the length of phone numbers. In particular, it doesn't 513 // check starting digits of the number. 514 // 2. It doesn't attempt to figure out the type of the number, but uses 515 // general rules which applies to all types of phone numbers in a 516 // region. Therefore, it is much faster than IsValidNumber(). 517 // 3. For some numbers (particularly fixed-line), many regions have the 518 // concept of area code, which together with subscriber number constitute 519 // the national significant number. It is sometimes okay to dial only the 520 // subscriber number when dialing in the same area. This function will 521 // return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is 522 // passed in. On the other hand, because IsValidNumber() validates using 523 // information on both starting digits (for fixed line numbers, that 524 // would most likely be area codes) and length (obviously includes the 525 // length of area codes for fixed line numbers), it will return false for 526 // the subscriber-number-only version. 527 ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const; 528 529 // Convenience wrapper around IsPossibleNumberWithReason(). Instead of 530 // returning the reason for failure, this method returns true if the number is 531 // either a possible fully-qualified number (containing the area code and 532 // country code), or if the number could be a possible local number (with a 533 // country code, but missing an area code). Local numbers are considered 534 // possible if they could be possibly dialled in this format: if the area code 535 // is needed for a call to connect, the number is not considered possible 536 // without it. 537 bool IsPossibleNumber(const PhoneNumber& number) const; 538 539 // Check whether a phone number is a possible number of a particular type. For 540 // types that don't exist in a particular region, this will return a result 541 // that isn't so useful; it is recommended that you use 542 // GetSupportedTypesForRegion() or GetSupportedTypesForNonGeoEntity() 543 // respectively before calling this method to determine whether you should 544 // call it for this number at all. 545 // 546 // This provides a more lenient check than IsValidNumber() in the following 547 // sense: 548 // 549 // 1. It only checks the length of phone numbers. In particular, it doesn't 550 // check starting digits of the number. 551 // 2. For some numbers (particularly fixed-line), many regions have the 552 // concept of area code, which together with subscriber number constitute 553 // the national significant number. It is sometimes okay to dial only the 554 // subscriber number when dialing in the same area. This function will 555 // return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is 556 // passed in. On the other hand, because IsValidNumber() validates using 557 // information on both starting digits (for fixed line numbers, that 558 // would most likely be area codes) and length (obviously includes the 559 // length of area codes for fixed line numbers), it will return false for 560 // the subscriber-number-only version. 561 ValidationResult IsPossibleNumberForTypeWithReason( 562 const PhoneNumber& number, PhoneNumberType type) const; 563 564 // Convenience wrapper around IsPossibleNumberForTypeWithReason(). Instead of 565 // returning the reason for failure, this method returns true if the number is 566 // either a possible fully-qualified number (containing the area code and 567 // country code), or if the number could be a possible local number (with a 568 // country code, but missing an area code). Local numbers are considered 569 // possible if they could be possibly dialled in this format: if the area code 570 // is needed for a call to connect, the number is not considered possible 571 // without it. 572 bool IsPossibleNumberForType(const PhoneNumber& number, 573 PhoneNumberType type) const; 574 575 // Checks whether a phone number is a possible number given a number in the 576 // form of a string, and the country where the number could be dialed from. 577 // It provides a more lenient check than IsValidNumber(). See 578 // IsPossibleNumber(const PhoneNumber& number) for details. 579 // 580 // This method first parses the number, then invokes 581 // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber 582 // object. 583 // 584 // region_dialing_from represents the region that we are expecting the number 585 // to be dialed from. Note this is different from the region where the number 586 // belongs. For example, the number +1 650 253 0000 is a number that belongs 587 // to US. When written in this form, it could be dialed from any region. When 588 // it is written as 00 1 650 253 0000, it could be dialed from any region 589 // which uses an international dialling prefix of 00. When it is written as 590 // 650 253 0000, it could only be dialed from within the US, and when written 591 // as 253 0000, it could only be dialed from within a smaller area in the US 592 // (Mountain View, CA, to be more specific). 593 bool IsPossibleNumberForString( 594 const string& number, 595 const string& region_dialing_from) const; 596 597 // Returns true if the number can be dialled from outside the region, or 598 // unknown. If the number can only be dialled from within the region, returns 599 // false. Does not check the number is a valid number. Note that, at the 600 // moment, this method does not handle short numbers (which are currently all 601 // presumed to not be diallable from outside their country). 602 bool CanBeInternationallyDialled(const PhoneNumber& number) const; 603 604 // Tests whether a phone number has a geographical association. It checks if 605 // the number is associated with a certain region in the country to which it 606 // belongs. Note that this doesn't verify if the number is actually in use. 607 bool IsNumberGeographical(const PhoneNumber& phone_number) const; 608 609 // Overload of IsNumberGeographical(PhoneNumber), since calculating the phone 610 // number type is expensive; if we have already done this, we don't want to do 611 // it again. 612 bool IsNumberGeographical(PhoneNumberType phone_number_type, 613 int country_calling_code) const; 614 615 // Gets a valid fixed-line number for the specified region. Returns false if 616 // the region was unknown, or the region 001 is passed in. For 001 617 // (representing non-geographical numbers), call 618 // GetExampleNumberForNonGeoEntity instead. 619 bool GetExampleNumber(const string& region_code, 620 PhoneNumber* number) const; 621 622 // Gets an invalid number for the specified region. This is useful for 623 // unit-testing purposes, where you want to test that will happen with an 624 // invalid number. Note that the number that is returned will always be able 625 // to be parsed and will have the correct country code. It may also be a valid 626 // *short* number/code for this region. Validity checking such 627 // numbers is handled with ShortNumberInfo. 628 // 629 // Returns false when an unsupported region or the region 001 (Earth) is 630 // passed in. 631 bool GetInvalidExampleNumber(const string& region_code, 632 PhoneNumber* number) const; 633 634 // Gets a valid number of the specified type for the specified region. 635 // Returns false if the region was unknown or 001, or if no example number of 636 // that type could be found. For 001 (representing non-geographical numbers), 637 // call GetExampleNumberForNonGeoEntity instead. 638 bool GetExampleNumberForType(const string& region_code, 639 PhoneNumberType type, 640 PhoneNumber* number) const; 641 642 // Gets a valid number for the specified type (it may belong to any country). 643 // Returns false when the metadata does not contain such information. This 644 // should only happen when no numbers of this type are allocated anywhere in 645 // the world anymore. 646 bool GetExampleNumberForType(PhoneNumberType type, 647 PhoneNumber* number) const; 648 649 // Gets a valid number for the specified country calling code for a 650 // non-geographical entity. Returns false if the metadata does not contain 651 // such information, or the country calling code passed in does not belong to 652 // a non-geographical entity. 653 bool GetExampleNumberForNonGeoEntity( 654 int country_calling_code, PhoneNumber* number) const; 655 656 // Parses a string and returns it as a phone number in proto buffer format. 657 // The method is quite lenient and looks for a number in the input text 658 // (raw input) and does not check whether the string is definitely only a 659 // phone number. To do this, it ignores punctuation and white-space, as well 660 // as any text before the number (e.g. a leading “Tel: ”) and trims the 661 // non-number bits. It will accept a number in any format (E164, national, 662 // international etc), assuming it can be interpreted with the defaultRegion 663 // supplied. It also attempts to convert any alpha characters into digits 664 // if it thinks this is a vanity number of the type "1800 MICROSOFT". 665 // 666 // This method will return an error if the number is not considered to be a 667 // possible number, and NO_PARSING_ERROR if it is parsed correctly. 668 // Note that validation of whether the number is actually a valid number for 669 // a particular region is not performed. This can be done separately with 670 // IsValidNumber(). 671 // 672 // Note this method canonicalizes the phone number such that different 673 // representations can be easily compared, no matter what form it was 674 // originally entered in (e.g. national, international). If you want to record 675 // context about the number being parsed, such as the raw input that was 676 // entered, how the country code was derived etc. then call 677 // ParseAndKeepRawInput() instead. 678 // 679 // number_to_parse can contain formatting such as +, ( and -, as well as a 680 // phone number extension. It can also be provided in RFC3966 format. 681 // 682 // default_region represents the country that we are expecting the number to 683 // be from. This is only used if the number being parsed is not written in 684 // international format. The country_code for the number in this case would be 685 // stored as that of the default country supplied. If the number is guaranteed 686 // to start with a '+' followed by the country calling code, then 687 // "ZZ" can be supplied. 688 // 689 // Returns an error if the string is not considered to be a viable phone 690 // number (e.g.too few or too many digits) or if no default region was 691 // supplied and the number is not in international format (does not start with 692 // +). 693 ErrorType Parse(const string& number_to_parse, 694 const string& default_region, 695 PhoneNumber* number) const; 696 // Parses a string and returns it in proto buffer format. This method differs 697 // from Parse() in that it always populates the raw_input field of the 698 // protocol buffer with number_to_parse as well as the country_code_source 699 // field. 700 ErrorType ParseAndKeepRawInput(const string& number_to_parse, 701 const string& default_region, 702 PhoneNumber* number) const; 703 704 // Takes two phone numbers and compares them for equality. 705 // 706 // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading 707 // zero for Italian numbers and any extension present are the same. 708 // Returns NSN_MATCH if either or both has no country calling code specified, 709 // and the NSNs and extensions are the same. 710 // Returns SHORT_NSN_MATCH if either or both has no country calling code 711 // specified, or the country calling code specified is the same, and one NSN 712 // could be a shorter version of the other number. This includes the case 713 // where one has an extension specified, and the other does not. 714 // Returns NO_MATCH otherwise. 715 // For example, the numbers +1 345 657 1234 and 657 1234 are a 716 // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH. 717 MatchType IsNumberMatch(const PhoneNumber& first_number, 718 const PhoneNumber& second_number) const; 719 720 // Takes two phone numbers as strings and compares them for equality. This 721 // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, 722 // PhoneNumber secondNumber). No default region is known. 723 // Returns INVALID_NUMBER if either number cannot be parsed into a phone 724 // number. 725 MatchType IsNumberMatchWithTwoStrings(const string& first_number, 726 const string& second_number) const; 727 728 // Takes two phone numbers and compares them for equality. This is a 729 // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, 730 // PhoneNumber secondNumber). No default region is known. 731 // Returns INVALID_NUMBER if second_number cannot be parsed into a phone 732 // number. 733 MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number, 734 const string& second_number) const; 735 736 // Overrides the default logging system. This takes ownership of the provided 737 // logger. 738 void SetLogger(Logger* logger); 739 740 // Gets an AsYouTypeFormatter for the specific region. 741 // Returns an AsYouTypeFormatter object, which could be used to format phone 742 // numbers in the specific region "as you type". 743 // The deletion of the returned instance is under the responsibility of the 744 // caller. 745 AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const; 746 747 friend bool ConvertFromTelephoneNumberProto( 748 const TelephoneNumber& proto_to_convert, 749 PhoneNumber* new_proto); 750 friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert, 751 TelephoneNumber* resulting_proto); 752 753 protected: 754 bool IsNumberMatchingDesc(const string& national_number, 755 const PhoneNumberDesc& number_desc) const; 756 757 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( 758 const string& national_number, const PhoneMetadata& metadata) const; 759 760 private: 761 scoped_ptr<Logger> logger_; 762 763 typedef std::pair<int, std::list<string>*> IntRegionsPair; 764 765 // The minimum and maximum length of the national significant number. 766 static const size_t kMinLengthForNsn = 2; 767 // The ITU says the maximum length should be 15, but we have found longer 768 // numbers in Germany. 769 static const size_t kMaxLengthForNsn = 17; 770 // The maximum length of the country calling code. 771 static const size_t kMaxLengthCountryCode = 3; 772 773 static const char kPlusChars[]; 774 // Regular expression of acceptable punctuation found in phone numbers. This 775 // excludes punctuation found as a leading character only. This consists of 776 // dash characters, white space characters, full stops, slashes, square 777 // brackets, parentheses and tildes. It also includes the letter 'x' as that 778 // is found as a placeholder for carrier information in some phone numbers. 779 // Full-width variants are also present. 780 static const char kValidPunctuation[]; 781 782 // Regular expression of characters typically used to start a second phone 783 // number for the purposes of parsing. This allows us to strip off parts of 784 // the number that are actually the start of another number, such as for: 785 // (530) 583-6985 x302/x2303 -> the second extension here makes this actually 786 // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove 787 // the second extension so that the first number is parsed correctly. The 788 // string preceding this is captured. 789 // This corresponds to SECOND_NUMBER_START in the java version. 790 static const char kCaptureUpToSecondNumberStart[]; 791 792 // An API for validation checking. 793 scoped_ptr<MatcherApi> matcher_api_; 794 795 // Helper class holding useful regular expressions and character mappings. 796 scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_; 797 798 // A mapping from a country calling code to a RegionCode object which denotes 799 // the region represented by that country calling code. Note regions under 800 // NANPA share the country calling code 1 and Russia and Kazakhstan share the 801 // country calling code 7. Under this map, 1 is mapped to region code "US" and 802 // 7 is mapped to region code "RU". This is implemented as a sorted vector to 803 // achieve better performance. 804 scoped_ptr<std::vector<IntRegionsPair> > 805 country_calling_code_to_region_code_map_; 806 807 // The set of regions that share country calling code 1. 808 scoped_ptr<std::set<string> > nanpa_regions_; 809 static const int kNanpaCountryCode = 1; 810 811 // A mapping from a region code to a PhoneMetadata for that region. 812 scoped_ptr<std::map<string, PhoneMetadata> > region_to_metadata_map_; 813 814 // A mapping from a country calling code for a non-geographical entity to the 815 // PhoneMetadata for that country calling code. Examples of the country 816 // calling codes include 800 (International Toll Free Service) and 808 817 // (International Shared Cost Service). 818 scoped_ptr<std::map<int, PhoneMetadata> > 819 country_code_to_non_geographical_metadata_map_; 820 821 PhoneNumberUtil(); 822 823 // Returns a regular expression for the possible extensions that may be found 824 // in a number, for use when matching. 825 const string& GetExtnPatternsForMatching() const; 826 827 // Checks if a number matches the plus chars pattern. 828 bool StartsWithPlusCharsPattern(const string& number) const; 829 830 void SetItalianLeadingZerosForPhoneNumber( 831 const string& national_number, PhoneNumber* phone_number) const; 832 833 // Checks whether a string contains only valid digits. 834 bool ContainsOnlyValidDigits(const string& s) const; 835 836 // Checks if a format is eligible to be used by the AsYouTypeFormatter. This 837 // method is here rather than in asyoutypeformatter.h since it depends on the 838 // valid punctuation declared by the phone number util. 839 bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const; 840 841 // Helper function to check if the national prefix formatting rule has the 842 // first group only, i.e., does not start with the national prefix. 843 bool FormattingRuleHasFirstGroupOnly( 844 const string& national_prefix_formatting_rule) const; 845 846 // Trims unwanted end characters from a phone number string. 847 void TrimUnwantedEndChars(string* number) const; 848 849 // Helper function to check region code is not unknown or null. 850 bool IsValidRegionCode(const string& region_code) const; 851 852 // Helper function to check the country calling code is valid. 853 bool HasValidCountryCallingCode(int country_calling_code) const; 854 855 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion( 856 const string& region_code) const; 857 858 const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion( 859 int country_calling_code) const; 860 861 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode( 862 int country_calling_code, 863 const string& region_code) const; 864 865 // As per GetCountryCodeForRegion, but assumes the validity of the region_code 866 // has already been checked. 867 int GetCountryCodeForValidRegion(const string& region_code) const; 868 869 const NumberFormat* ChooseFormattingPatternForNumber( 870 const RepeatedPtrField<NumberFormat>& available_formats, 871 const string& national_number) const; 872 873 void FormatNsnUsingPatternWithCarrier( 874 const string& national_number, 875 const NumberFormat& formatting_pattern, 876 PhoneNumberUtil::PhoneNumberFormat number_format, 877 const string& carrier_code, 878 string* formatted_number) const; 879 880 void FormatNsnUsingPattern( 881 const string& national_number, 882 const NumberFormat& formatting_pattern, 883 PhoneNumberUtil::PhoneNumberFormat number_format, 884 string* formatted_number) const; 885 886 // Check if raw_input, which is assumed to be in the national format, has a 887 // national prefix. The national prefix is assumed to be in digits-only form. 888 bool RawInputContainsNationalPrefix( 889 const string& raw_input, 890 const string& national_prefix, 891 const string& region_code) const; 892 893 bool HasFormattingPatternForNumber(const PhoneNumber& number) const; 894 895 // Simple wrapper of FormatNsnWithCarrier for the common case of 896 // no carrier code. 897 void FormatNsn(const string& number, 898 const PhoneMetadata& metadata, 899 PhoneNumberFormat number_format, 900 string* formatted_number) const; 901 902 void FormatNsnWithCarrier(const string& number, 903 const PhoneMetadata& metadata, 904 PhoneNumberFormat number_format, 905 const string& carrier_code, 906 string* formatted_number) const; 907 908 void MaybeAppendFormattedExtension( 909 const PhoneNumber& number, 910 const PhoneMetadata& metadata, 911 PhoneNumberFormat number_format, 912 string* extension) const; 913 914 void GetRegionCodeForNumberFromRegionList( 915 const PhoneNumber& number, 916 const std::list<string>& region_codes, 917 string* region_code) const; 918 919 // Strips the IDD from the start of the number if present. Helper function 920 // used by MaybeStripInternationalPrefixAndNormalize. 921 bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const; 922 923 void Normalize(string* number) const; 924 925 PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize( 926 const string& possible_idd_prefix, 927 string* number) const; 928 929 bool MaybeStripNationalPrefixAndCarrierCode( 930 const PhoneMetadata& metadata, 931 string* number, 932 string* carrier_code) const; 933 934 void ExtractPossibleNumber(const string& number, 935 string* extracted_number) const; 936 937 bool IsViablePhoneNumber(const string& number) const; 938 939 bool MaybeStripExtension(string* number, string* extension) const; 940 941 int ExtractCountryCode(string* national_number) const; 942 ErrorType MaybeExtractCountryCode( 943 const PhoneMetadata* default_region_metadata, 944 bool keepRawInput, 945 string* national_number, 946 PhoneNumber* phone_number) const; 947 948 bool CheckRegionForParsing( 949 const string& number_to_parse, 950 const string& default_region) const; 951 952 ErrorType ParseHelper(const string& number_to_parse, 953 const string& default_region, 954 bool keep_raw_input, 955 bool check_region, 956 PhoneNumber* phone_number) const; 957 958 void BuildNationalNumberForParsing(const string& number_to_parse, 959 string* national_number) const; 960 961 bool IsShorterThanPossibleNormalNumber(const PhoneMetadata* country_metadata, 962 const string& number) const; 963 964 DISALLOW_COPY_AND_ASSIGN(PhoneNumberUtil); 965 }; 966 967 } // namespace phonenumbers 968 } // namespace i18n 969 970 #endif // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 971