• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2009 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Utility for international phone numbers.
16 //
17 // Author: Shaopeng Jia
18 // Open-sourced by: Philippe Liard
19 
20 #ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
21 #define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
22 
23 #include <stddef.h>
24 #include <list>
25 #include <map>
26 #include <set>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include "phonenumbers/base/basictypes.h"
32 #include "phonenumbers/base/memory/scoped_ptr.h"
33 #include "phonenumbers/base/memory/singleton.h"
34 #include "phonenumbers/phonenumber.pb.h"
35 
36 class TelephoneNumber;
37 
38 namespace i18n {
39 namespace phonenumbers {
40 
41 using std::list;
42 using std::map;
43 using std::pair;
44 using std::set;
45 using std::string;
46 using std::vector;
47 
48 using google::protobuf::RepeatedPtrField;
49 
50 class AsYouTypeFormatter;
51 class Logger;
52 class NumberFormat;
53 class PhoneMetadata;
54 class PhoneNumberRegExpsAndMappings;
55 class RegExp;
56 
57 // NOTE: A lot of methods in this class require Region Code strings. These must
58 // be provided using ISO 3166-1 two-letter country-code format. The list of the
59 // codes can be found here:
60 // http://www.iso.org/iso/english_country_names_and_code_elements
61 
62 class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
63  private:
64   friend class AsYouTypeFormatter;
65   friend class PhoneNumberMatcher;
66   friend class PhoneNumberMatcherRegExps;
67   friend class PhoneNumberMatcherTest;
68   friend class PhoneNumberRegExpsAndMappings;
69   friend class PhoneNumberUtilTest;
70   friend class ShortNumberInfo;
71   friend class ShortNumberInfoTest;
72   friend class Singleton<PhoneNumberUtil>;
73 
74  public:
75   ~PhoneNumberUtil();
76   static const char kRegionCodeForNonGeoEntity[];
77 
78   // INTERNATIONAL and NATIONAL formats are consistent with the definition
79   // in ITU-T Recommendation E. 123. For example, the number of the Google
80   // Zürich office will be written as "+41 44 668 1800" in INTERNATIONAL
81   // format, and as "044 668 1800" in NATIONAL format. E164 format is as per
82   // INTERNATIONAL format but with no formatting applied e.g. "+41446681800".
83   // RFC3966 is as per INTERNATIONAL format, but with all spaces and other
84   // separating symbols replaced with a hyphen, and with any phone number
85   // extension appended with ";ext=". It also will have a prefix of "tel:"
86   // added, e.g. "tel:+41-44-668-1800".
87   enum PhoneNumberFormat {
88     E164,
89     INTERNATIONAL,
90     NATIONAL,
91     RFC3966
92   };
93 
94   // Type of phone numbers.
95   enum PhoneNumberType {
96     FIXED_LINE,
97     MOBILE,
98     // In some regions (e.g. the USA), it is impossible to distinguish between
99     // fixed-line and mobile numbers by looking at the phone number itself.
100     FIXED_LINE_OR_MOBILE,
101     // Freephone lines
102     TOLL_FREE,
103     PREMIUM_RATE,
104     // The cost of this call is shared between the caller and the recipient, and
105     // is hence typically less than PREMIUM_RATE calls. See
106     // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
107     SHARED_COST,
108     // Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
109     VOIP,
110     // A personal number is associated with a particular person, and may be
111     // routed to either a MOBILE or FIXED_LINE number. Some more information can
112     // be found here: http://en.wikipedia.org/wiki/Personal_Numbers
113     PERSONAL_NUMBER,
114     PAGER,
115     // Used for "Universal Access Numbers" or "Company Numbers". They may be
116     // further routed to specific offices, but allow one number to be used for a
117     // company.
118     UAN,
119     // Used for "Voice Mail Access Numbers".
120     VOICEMAIL,
121     // A phone number is of type UNKNOWN when it does not fit any of the known
122     // patterns for a specific region.
123     UNKNOWN
124   };
125 
126   // Types of phone number matches. See detailed description beside the
127   // IsNumberMatch() method.
128   enum MatchType {
129     INVALID_NUMBER,  // NOT_A_NUMBER in the java version.
130     NO_MATCH,
131     SHORT_NSN_MATCH,
132     NSN_MATCH,
133     EXACT_MATCH,
134   };
135 
136   enum ErrorType {
137     NO_PARSING_ERROR,
138     INVALID_COUNTRY_CODE_ERROR,  // INVALID_COUNTRY_CODE in the java version.
139     NOT_A_NUMBER,
140     TOO_SHORT_AFTER_IDD,
141     TOO_SHORT_NSN,
142     TOO_LONG_NSN,  // TOO_LONG in the java version.
143   };
144 
145   // Possible outcomes when testing if a PhoneNumber is possible.
146   enum ValidationResult {
147     IS_POSSIBLE,
148     INVALID_COUNTRY_CODE,
149     TOO_SHORT,
150     TOO_LONG,
151   };
152 
153   // Convenience method to get a list of what regions the library has metadata
154   // for.
155   void GetSupportedRegions(set<string>* regions) const;
156 
157   // Populates a list with the region codes that match the specific country
158   // calling code. For non-geographical country calling codes, the region code
159   // 001 is returned. Also, in the case of no region code being found, the list
160   // is left unchanged.
161   void GetRegionCodesForCountryCallingCode(
162       int country_calling_code,
163       list<string>* region_codes) const;
164 
165   // Gets a PhoneNumberUtil instance to carry out international phone number
166   // formatting, parsing, or validation. The instance is loaded with phone
167   // number metadata for a number of most commonly used regions, as specified by
168   // DEFAULT_REGIONS_.
169   //
170   // The PhoneNumberUtil is implemented as a singleton. Therefore, calling
171   // GetInstance multiple times will only result in one instance being created.
172   static PhoneNumberUtil* GetInstance();
173 
174   // Returns true if the number is a valid vanity (alpha) number such as 800
175   // MICROSOFT. A valid vanity number will start with at least 3 digits and will
176   // have three or more alpha characters. This does not do region-specific
177   // checks - to work out if this number is actually valid for a region, it
178   // should be parsed and methods such as IsPossibleNumberWithReason or
179   // IsValidNumber should be used.
180   bool IsAlphaNumber(const string& number) const;
181 
182   // Converts all alpha characters in a number to their respective digits on
183   // a keypad, but retains existing formatting.
184   void ConvertAlphaCharactersInNumber(string* number) const;
185 
186   // Normalizes a string of characters representing a phone number. This
187   // converts wide-ascii and arabic-indic numerals to European numerals, and
188   // strips punctuation and alpha characters.
189   void NormalizeDigitsOnly(string* number) const;
190 
191   // Normalizes a string of characters representing a phone number. This strips
192   // all characters which are not diallable on a mobile phone keypad (including
193   // all non-ASCII digits).
194   void NormalizeDiallableCharsOnly(string* number) const;
195 
196   // Gets the national significant number of a phone number. Note a national
197   // significant number doesn't contain a national prefix or any formatting.
198   void GetNationalSignificantNumber(const PhoneNumber& number,
199                                     string* national_significant_num) const;
200 
201   // Gets the length of the geographical area code from the PhoneNumber object
202   // passed in, so that clients could use it to split a national significant
203   // number into geographical area code and subscriber number. It works in such
204   // a way that the resultant subscriber number should be diallable, at least on
205   // some devices. An example of how this could be used:
206   //
207   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
208   // PhoneNumber number;
209   // phone_util.Parse("16502530000", "US", &number);
210   // string national_significant_number;
211   // phone_util.GetNationalSignificantNumber(number,
212   //                                         &national_significant_number);
213   // string area_code;
214   // string subscriber_number;
215   //
216   // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number);
217   // if (area_code_length > 0) {
218   //   area_code = national_significant_number.substr(0, area_code_length);
219   //   subscriber_number = national_significant_number.substr(
220   //       area_code_length, string::npos);
221   // else {
222   //   area_code = "";
223   //   subscriber_number = national_significant_number;
224   // }
225   //
226   // N.B.: area code is a very ambiguous concept, so the authors generally
227   // recommend against using it for most purposes, but recommend using the
228   // more general national_number instead. Read the following carefully before
229   // deciding to use this method:
230   //
231   //  - geographical area codes change over time, and this method honors those
232   //    changes; therefore, it doesn't guarantee the stability of the result it
233   //    produces.
234   //  - subscriber numbers may not be diallable from all devices (notably mobile
235   //    devices, which typically requires the full national_number to be dialled
236   //    in most regions).
237   //  - most non-geographical numbers have no area codes, including numbers
238   //    from non-geographical entities.
239   //  - some geographical numbers have no area codes.
240   int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const;
241 
242   // Gets the length of the national destination code (NDC) from the PhoneNumber
243   // object passed in, so that clients could use it to split a national
244   // significant number into NDC and subscriber number. The NDC of a phone
245   // number is normally the first group of digit(s) right after the country
246   // calling code when the number is formatted in the international format, if
247   // there is a subscriber number part that follows. An example of how this
248   // could be used:
249   //
250   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
251   // PhoneNumber number;
252   // phone_util.Parse("16502530000", "US", &number);
253   // string national_significant_number;
254   // phone_util.GetNationalSignificantNumber(number,
255   //                                         &national_significant_number);
256   // string national_destination_code;
257   // string subscriber_number;
258   //
259   // int national_destination_code_length =
260   //     phone_util.GetLengthOfGeographicalAreaCode(number);
261   // if (national_destination_code_length > 0) {
262   //   national_destination_code = national_significant_number.substr(
263   //       0, national_destination_code_length);
264   //   subscriber_number = national_significant_number.substr(
265   //       national_destination_code_length, string::npos);
266   // else {
267   //   national_destination_code = "";
268   //   subscriber_number = national_significant_number;
269   // }
270   //
271   // Refer to the unittests to see the difference between this function and
272   // GetLengthOfGeographicalAreaCode().
273   int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const;
274 
275   // Returns the mobile token for the provided country calling code if it has
276   // one, otherwise returns an empty string. A mobile token is a number inserted
277   // before the area code when dialing a mobile number from that country from
278   // abroad.
279   void GetCountryMobileToken(int country_calling_code,
280                              string* mobile_token) const;
281 
282   // Formats a phone number in the specified format using default rules. Note
283   // that this does not promise to produce a phone number that the user can
284   // dial from where they are - although we do format in either NATIONAL or
285   // INTERNATIONAL format depending on what the client asks for, we do not
286   // currently support a more abbreviated format, such as for users in the
287   // same area who could potentially dial the number without area code.
288   void Format(const PhoneNumber& number,
289               PhoneNumberFormat number_format,
290               string* formatted_number) const;
291 
292   // Formats a phone number in the specified format using client-defined
293   // formatting rules.
294   void FormatByPattern(
295       const PhoneNumber& number,
296       PhoneNumberFormat number_format,
297       const RepeatedPtrField<NumberFormat>& user_defined_formats,
298       string* formatted_number) const;
299 
300   // Formats a phone number in national format for dialing using the carrier as
301   // specified in the carrier_code. The carrier_code will always be used
302   // regardless of whether the phone number already has a preferred domestic
303   // carrier code stored. If carrier_code contains an empty string, return the
304   // number in national format without any carrier code.
305   void FormatNationalNumberWithCarrierCode(const PhoneNumber& number,
306                                            const string& carrier_code,
307                                            string* formatted_number) const;
308 
309   // Formats a phone number in national format for dialing using the carrier as
310   // specified in the preferred_domestic_carrier_code field of the PhoneNumber
311   // object passed in. If that is missing, use the fallback_carrier_code passed
312   // in instead. If there is no preferred_domestic_carrier_code, and the
313   // fallback_carrier_code contains an empty string, return the number in
314   // national format without any carrier code.
315   //
316   // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed
317   // in should take precedence over the number's preferred_domestic_carrier_code
318   // when formatting.
319   void FormatNationalNumberWithPreferredCarrierCode(
320       const PhoneNumber& number,
321       const string& fallback_carrier_code,
322       string* formatted_number) const;
323 
324   // Returns a number formatted in such a way that it can be dialed from a
325   // mobile phone in a specific region. If the number cannot be reached from
326   // the region (e.g. some countries block toll-free numbers from being called
327   // outside of the country), the method returns an empty string.
328   void FormatNumberForMobileDialing(
329       const PhoneNumber& number,
330       const string& region_calling_from,
331       bool with_formatting,
332       string* formatted_number) const;
333 
334   // Formats a phone number for out-of-country dialing purposes.
335   //
336   // Note this function takes care of the case for calling inside of NANPA
337   // and between Russia and Kazakhstan (who share the same country calling
338   // code). In those cases, no international prefix is used. For regions which
339   // have multiple international prefixes, the number in its INTERNATIONAL
340   // format will be returned instead.
341   void FormatOutOfCountryCallingNumber(
342       const PhoneNumber& number,
343       const string& calling_from,
344       string* formatted_number) const;
345 
346   // Formats a phone number using the original phone number format that the
347   // number is parsed from. The original format is embedded in the
348   // country_code_source field of the PhoneNumber object passed in. If such
349   // information is missing, the number will be formatted into the NATIONAL
350   // format by default. When the number is an invalid number, the method returns
351   // the raw input when it is available.
352   void FormatInOriginalFormat(const PhoneNumber& number,
353                               const string& region_calling_from,
354                               string* formatted_number) const;
355 
356   // Formats a phone number for out-of-country dialing purposes.
357   //
358   // Note that in this version, if the number was entered originally using alpha
359   // characters and this version of the number is stored in raw_input, this
360   // representation of the number will be used rather than the digit
361   // representation. Grouping information, as specified by characters such as
362   // "-" and " ", will be retained.
363   //
364   // Caveats:
365   // 1) This will not produce good results if the country calling code is both
366   // present in the raw input _and_ is the start of the national number. This
367   // is not a problem in the regions which typically use alpha numbers.
368   // 2) This will also not produce good results if the raw input has any
369   // grouping information within the first three digits of the national number,
370   // and if the function needs to strip preceding digits/words in the raw input
371   // before these digits. Normally people group the first three digits together
372   // so this is not a huge problem - and will be fixed if it proves to be so.
373   void FormatOutOfCountryKeepingAlphaChars(
374       const PhoneNumber& number,
375       const string& calling_from,
376       string* formatted_number) const;
377 
378   // Attempts to extract a valid number from a phone number that is too long to
379   // be valid, and resets the PhoneNumber object passed in to that valid
380   // version. If no valid number could be extracted, the PhoneNumber object
381   // passed in will not be modified. It returns true if a valid phone number can
382   // be successfully extracted.
383   bool TruncateTooLongNumber(PhoneNumber* number) const;
384 
385   // Gets the type of a phone number.
386   PhoneNumberType GetNumberType(const PhoneNumber& number) const;
387 
388   // Tests whether a phone number matches a valid pattern. Note this doesn't
389   // verify the number is actually in use, which is impossible to tell by just
390   // looking at a number itself.
391   bool IsValidNumber(const PhoneNumber& number) const;
392 
393   // Tests whether a phone number is valid for a certain region. Note this
394   // doesn't verify the number is actually in use, which is impossible to tell
395   // by just looking at a number itself. If the country calling code is not the
396   // same as the country calling code for the region, this immediately exits
397   // with false.  After this, the specific number pattern rules for the region
398   // are examined.
399   // This is useful for determining for example whether a particular number is
400   // valid for Canada, rather than just a valid NANPA number.
401   // Warning: In most cases, you want to use IsValidNumber instead. For
402   // example, this method will mark numbers from British Crown dependencies
403   // such as the Isle of Man as invalid for the region "GB" (United Kingdom),
404   // since it has its own region code, "IM", which may be undesirable.
405   bool IsValidNumberForRegion(
406       const PhoneNumber& number,
407       const string& region_code) const;
408 
409   // Returns the region where a phone number is from. This could be used for
410   // geo-coding at the region level.
411   void GetRegionCodeForNumber(const PhoneNumber& number,
412                               string* region_code) const;
413 
414   // Returns the country calling code for a specific region. For example,
415   // this would be 1 for the United States, and 64 for New Zealand.
416   int GetCountryCodeForRegion(const string& region_code) const;
417 
418   // Returns the region code that matches the specific country code. Note that
419   // it is possible that several regions share the same country calling code
420   // (e.g. US and Canada), and in that case, only one of the regions (normally
421   // the one with the largest population) is returned.
422   void GetRegionCodeForCountryCode(int country_code, string* region_code) const;
423 
424   // Checks if this is a region under the North American Numbering Plan
425   // Administration (NANPA).
426   bool IsNANPACountry(const string& region_code) const;
427 
428   // Returns the national dialling prefix for a specific region. For example,
429   // this would be 1 for the United States, and 0 for New Zealand. Set
430   // strip_non_digits to true to strip symbols like "~" (which indicates a wait
431   // for a dialling tone) from the prefix returned. If no national prefix is
432   // present, we return an empty string.
433   void GetNddPrefixForRegion(const string& region_code,
434                              bool strip_non_digits,
435                              string* national_prefix) const;
436 
437   // Checks whether a phone number is a possible number. It provides a more
438   // lenient check than IsValidNumber() in the following sense:
439   //   1. It only checks the length of phone numbers. In particular, it doesn't
440   //      check starting digits of the number.
441   //   2. It doesn't attempt to figure out the type of the number, but uses
442   //      general rules which applies to all types of phone numbers in a
443   //      region. Therefore, it is much faster than IsValidNumber().
444   //   3. For fixed line numbers, many regions have the concept of area code,
445   //      which together with subscriber number constitute the national
446   //      significant number. It is sometimes okay to dial the subscriber
447   //      number only when dialing in the same area. This function will return
448   //      true if the subscriber-number-only version is passed in. On the other
449   //      hand, because IsValidNumber() validates using information on both
450   //      starting digits (for fixed line numbers, that would most likely be
451   //      area codes) and length (obviously includes the length of area codes
452   //      for fixed line numbers), it will return false for the
453   //      subscriber-number-only version.
454   ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const;
455 
456   // Convenience wrapper around IsPossibleNumberWithReason. Instead of returning
457   // the reason for failure, this method returns a boolean value.
458   bool IsPossibleNumber(const PhoneNumber& number) const;
459 
460   // Checks whether a phone number is a possible number given a number in the
461   // form of a string, and the country where the number could be dialed from.
462   // It provides a more lenient check than IsValidNumber(). See
463   // IsPossibleNumber(const PhoneNumber& number) for details.
464   //
465   // This method first parses the number, then invokes
466   // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber
467   // object.
468   //
469   // region_dialing_from represents the region that we are expecting the number
470   // to be dialed from. Note this is different from the region where the number
471   // belongs. For example, the number +1 650 253 0000 is a number that belongs
472   // to US. When written in this form, it could be dialed from any region. When
473   // it is written as 00 1 650 253 0000, it could be dialed from any region
474   // which uses an international dialling prefix of 00. When it is written as
475   // 650 253 0000, it could only be dialed from within the US, and when written
476   // as 253 0000, it could only be dialed from within a smaller area in the US
477   // (Mountain View, CA, to be more specific).
478   bool IsPossibleNumberForString(
479       const string& number,
480       const string& region_dialing_from) const;
481 
482   // Gets a valid fixed-line number for the specified region. Returns false if
483   // the region was unknown, or the region 001 is passed in. For 001
484   // (representing non-geographical numbers), call
485   // GetExampleNumberForNonGeoEntity instead.
486   bool GetExampleNumber(const string& region_code,
487                         PhoneNumber* number) const;
488 
489   // Gets a valid number of the specified type for the specified region.
490   // Returns false if the region was unknown or 001, or if no example number of
491   // that type could be found. For 001 (representing non-geographical numbers),
492   // call GetExampleNumberForNonGeoEntity instead.
493   bool GetExampleNumberForType(const string& region_code,
494                                PhoneNumberType type,
495                                PhoneNumber* number) const;
496 
497   // Gets a valid number for the specified country calling code for a
498   // non-geographical entity. Returns false if the metadata does not contain
499   // such information, or the country calling code passed in does not belong to
500   // a non-geographical entity.
501   bool GetExampleNumberForNonGeoEntity(
502       int country_calling_code, PhoneNumber* number) const;
503 
504   // Parses a string and returns it in proto buffer format. This method will
505   // return an error like INVALID_COUNTRY_CODE if the number is not considered
506   // to be a possible number, and NO_PARSING_ERROR if it parsed correctly. Note
507   // that validation of whether the number is actually a valid number for a
508   // particular region is not performed. This can be done separately with
509   // IsValidNumber().
510   //
511   // number_to_parse can also be provided in RFC3966 format.
512   //
513   // default_region represents the country that we are expecting the number to
514   // be from. This is only used if the number being parsed is not written in
515   // international format. The country_code for the number in this case would be
516   // stored as that of the default country supplied. If the number is guaranteed
517   // to start with a '+' followed by the country calling code, then
518   // "ZZ" can be supplied.
519   ErrorType Parse(const string& number_to_parse,
520                   const string& default_region,
521                   PhoneNumber* number) const;
522   // Parses a string and returns it in proto buffer format. This method differs
523   // from Parse() in that it always populates the raw_input field of the
524   // protocol buffer with number_to_parse as well as the country_code_source
525   // field.
526   ErrorType ParseAndKeepRawInput(const string& number_to_parse,
527                                  const string& default_region,
528                                  PhoneNumber* number) const;
529 
530   // Takes two phone numbers and compares them for equality.
531   //
532   // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading
533   // zero for Italian numbers and any extension present are the same.
534   // Returns NSN_MATCH if either or both has no country calling code specified,
535   // and the NSNs and extensions are the same.
536   // Returns SHORT_NSN_MATCH if either or both has no country calling code
537   // specified, or the country calling code specified is the same, and one NSN
538   // could be a shorter version of the other number. This includes the case
539   // where one has an extension specified, and the other does not.
540   // Returns NO_MATCH otherwise.
541   // For example, the numbers +1 345 657 1234 and 657 1234 are a
542   // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
543   MatchType IsNumberMatch(const PhoneNumber& first_number,
544                           const PhoneNumber& second_number) const;
545 
546   // Takes two phone numbers as strings and compares them for equality. This
547   // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
548   // PhoneNumber secondNumber). No default region is known.
549   // Returns INVALID_NUMBER if either number cannot be parsed into a phone
550   // number.
551   MatchType IsNumberMatchWithTwoStrings(const string& first_number,
552                                         const string& second_number) const;
553 
554   // Takes two phone numbers and compares them for equality. This is a
555   // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
556   // PhoneNumber secondNumber). No default region is known.
557   // Returns INVALID_NUMBER if second_number cannot be parsed into a phone
558   // number.
559   MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number,
560                                        const string& second_number) const;
561 
562   // Overrides the default logging system. This takes ownership of the provided
563   // logger.
564   void SetLogger(Logger* logger);
565 
566   // Gets an AsYouTypeFormatter for the specific region.
567   // Returns an AsYouTypeFormatter object, which could be used to format phone
568   // numbers in the specific region "as you type".
569   // The deletion of the returned instance is under the responsibility of the
570   // caller.
571   AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const;
572 
573   friend bool ConvertFromTelephoneNumberProto(
574       const TelephoneNumber& proto_to_convert,
575       PhoneNumber* new_proto);
576   friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert,
577                                             TelephoneNumber* resulting_proto);
578 
579  protected:
580   // Check whether the country_calling_code is from a country whose national
581   // significant number could contain a leading zero. An example of such a
582   // country is Italy.
583   bool IsLeadingZeroPossible(int country_calling_code) const;
584 
585  private:
586   scoped_ptr<Logger> logger_;
587 
588   typedef pair<int, list<string>*> IntRegionsPair;
589 
590   // The minimum and maximum length of the national significant number.
591   static const size_t kMinLengthForNsn = 2;
592   // The ITU says the maximum length should be 15, but we have found longer
593   // numbers in Germany.
594   static const size_t kMaxLengthForNsn = 16;
595   // The maximum length of the country calling code.
596   static const size_t kMaxLengthCountryCode = 3;
597 
598   static const char kPlusChars[];
599   // Regular expression of acceptable punctuation found in phone numbers. This
600   // excludes punctuation found as a leading character only. This consists of
601   // dash characters, white space characters, full stops, slashes, square
602   // brackets, parentheses and tildes. It also includes the letter 'x' as that
603   // is found as a placeholder for carrier information in some phone numbers.
604   // Full-width variants are also present.
605   static const char kValidPunctuation[];
606 
607   // Regular expression of characters typically used to start a second phone
608   // number for the purposes of parsing. This allows us to strip off parts of
609   // the number that are actually the start of another number, such as for:
610   // (530) 583-6985 x302/x2303 -> the second extension here makes this actually
611   // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove
612   // the second extension so that the first number is parsed correctly. The
613   // string preceding this is captured.
614   // This corresponds to SECOND_NUMBER_START in the java version.
615   static const char kCaptureUpToSecondNumberStart[];
616 
617   // Helper class holding useful regular expressions and character mappings.
618   scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
619 
620   // A mapping from a country calling code to a RegionCode object which denotes
621   // the region represented by that country calling code. Note regions under
622   // NANPA share the country calling code 1 and Russia and Kazakhstan share the
623   // country calling code 7. Under this map, 1 is mapped to region code "US" and
624   // 7 is mapped to region code "RU". This is implemented as a sorted vector to
625   // achieve better performance.
626   scoped_ptr<vector<IntRegionsPair> > country_calling_code_to_region_code_map_;
627 
628   // The set of regions that share country calling code 1.
629   scoped_ptr<set<string> > nanpa_regions_;
630   static const int kNanpaCountryCode = 1;
631 
632   // A mapping from a region code to a PhoneMetadata for that region.
633   scoped_ptr<map<string, PhoneMetadata> > region_to_metadata_map_;
634 
635   // A mapping from a country calling code for a non-geographical entity to the
636   // PhoneMetadata for that country calling code. Examples of the country
637   // calling codes include 800 (International Toll Free Service) and 808
638   // (International Shared Cost Service).
639   scoped_ptr<map<int, PhoneMetadata> >
640       country_code_to_non_geographical_metadata_map_;
641 
642   PhoneNumberUtil();
643 
644   // Returns a regular expression for the possible extensions that may be found
645   // in a number, for use when matching.
646   const string& GetExtnPatternsForMatching() const;
647 
648   // Checks if a number matches the plus chars pattern.
649   bool StartsWithPlusCharsPattern(const string& number) const;
650 
651   // Checks whether a string contains only valid digits.
652   bool ContainsOnlyValidDigits(const string& s) const;
653 
654   // Checks if a format is eligible to be used by the AsYouTypeFormatter. This
655   // method is here rather than in asyoutypeformatter.h since it depends on the
656   // valid punctuation declared by the phone number util.
657   bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const;
658 
659   // Helper function to check if the national prefix formatting rule has the
660   // first group only, i.e., does not start with the national prefix.
661   bool FormattingRuleHasFirstGroupOnly(
662       const string& national_prefix_formatting_rule) const;
663 
664   // Trims unwanted end characters from a phone number string.
665   void TrimUnwantedEndChars(string* number) const;
666 
667   // Tests whether a phone number has a geographical association. It checks if
668   // the number is associated to a certain region in the country where it
669   // belongs to. Note that this doesn't verify if the number is actually in use.
670   bool IsNumberGeographical(const PhoneNumber& phone_number) const;
671 
672   // Helper function to check region code is not unknown or null.
673   bool IsValidRegionCode(const string& region_code) const;
674 
675   // Helper function to check the country calling code is valid.
676   bool HasValidCountryCallingCode(int country_calling_code) const;
677 
678   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion(
679       const string& region_code) const;
680 
681   const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion(
682       int country_calling_code) const;
683 
684   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode(
685       int country_calling_code,
686       const string& region_code) const;
687 
688   // As per GetCountryCodeForRegion, but assumes the validity of the region_code
689   // has already been checked.
690   int GetCountryCodeForValidRegion(const string& region_code) const;
691 
692   const NumberFormat* ChooseFormattingPatternForNumber(
693       const RepeatedPtrField<NumberFormat>& available_formats,
694       const string& national_number) const;
695 
696   void FormatNsnUsingPatternWithCarrier(
697       const string& national_number,
698       const NumberFormat& formatting_pattern,
699       PhoneNumberUtil::PhoneNumberFormat number_format,
700       const string& carrier_code,
701       string* formatted_number) const;
702 
703   void FormatNsnUsingPattern(
704       const string& national_number,
705       const NumberFormat& formatting_pattern,
706       PhoneNumberUtil::PhoneNumberFormat number_format,
707       string* formatted_number) const;
708 
709   // Check if raw_input, which is assumed to be in the national format, has a
710   // national prefix. The national prefix is assumed to be in digits-only form.
711   bool RawInputContainsNationalPrefix(
712       const string& raw_input,
713       const string& national_prefix,
714       const string& region_code) const;
715 
716   // Returns true if a number is from a region whose national significant number
717   // couldn't contain a leading zero, but has the italian_leading_zero field set
718   // to true.
719   bool HasUnexpectedItalianLeadingZero(const PhoneNumber& number) const;
720 
721   bool HasFormattingPatternForNumber(const PhoneNumber& number) const;
722 
723   // Simple wrapper of FormatNsnWithCarrier for the common case of
724   // no carrier code.
725   void FormatNsn(const string& number,
726                  const PhoneMetadata& metadata,
727                  PhoneNumberFormat number_format,
728                  string* formatted_number) const;
729 
730   void FormatNsnWithCarrier(const string& number,
731                             const PhoneMetadata& metadata,
732                             PhoneNumberFormat number_format,
733                             const string& carrier_code,
734                             string* formatted_number) const;
735 
736   void MaybeAppendFormattedExtension(
737       const PhoneNumber& number,
738       const PhoneMetadata& metadata,
739       PhoneNumberFormat number_format,
740       string* extension) const;
741 
742   void GetRegionCodeForNumberFromRegionList(
743       const PhoneNumber& number,
744       const list<string>& region_codes,
745       string* region_code) const;
746 
747   // Strips the IDD from the start of the number if present. Helper function
748   // used by MaybeStripInternationalPrefixAndNormalize.
749   bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
750 
751   void Normalize(string* number) const;
752 
753   PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
754       const string& possible_idd_prefix,
755       string* number) const;
756 
757   bool MaybeStripNationalPrefixAndCarrierCode(
758       const PhoneMetadata& metadata,
759       string* number,
760       string* carrier_code) const;
761 
762   void ExtractPossibleNumber(const string& number,
763                              string* extracted_number) const;
764 
765   bool IsViablePhoneNumber(const string& number) const;
766 
767   bool MaybeStripExtension(string* number, string* extension) const;
768 
769   int ExtractCountryCode(string* national_number) const;
770   ErrorType MaybeExtractCountryCode(
771       const PhoneMetadata* default_region_metadata,
772       bool keepRawInput,
773       string* national_number,
774       PhoneNumber* phone_number) const;
775 
776   bool CheckRegionForParsing(
777       const string& number_to_parse,
778       const string& default_region) const;
779 
780   ErrorType ParseHelper(const string& number_to_parse,
781                         const string& default_region,
782                         bool keep_raw_input,
783                         bool check_region,
784                         PhoneNumber* phone_number) const;
785 
786   void BuildNationalNumberForParsing(const string& number_to_parse,
787                                      string* national_number) const;
788 
789   // Returns true if the number can be dialled from outside the region, or
790   // unknown. If the number can only be dialled from within the region, returns
791   // false. Does not check the number is a valid number.
792   bool CanBeInternationallyDialled(const PhoneNumber& number) const;
793 
794   DISALLOW_COPY_AND_ASSIGN(PhoneNumberUtil);
795 };
796 
797 }  // namespace phonenumbers
798 }  // namespace i18n
799 
800 #endif  // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
801