• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Author: Lara Rennie
16 // Author: Tao Huang
17 //
18 // This is a direct port from PhoneNumberMatcher.java.
19 // Changes to this class should also happen to the Java version, whenever it
20 // makes sense.
21 
22 #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
23 #define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
24 
25 #include <string>
26 #include <vector>
27 
28 #include "phonenumbers/base/basictypes.h"
29 #include "phonenumbers/base/memory/scoped_ptr.h"
30 #include "phonenumbers/callback.h"
31 #include "phonenumbers/regexp_adapter.h"
32 
33 namespace i18n {
34 namespace phonenumbers {
35 
36 template <class R, class A1, class A2, class A3, class A4>
37     class ResultCallback4;
38 
39 using std::string;
40 using std::vector;
41 
42 class AlternateFormats;
43 class NumberFormat;
44 class PhoneNumber;
45 class PhoneNumberMatch;
46 class PhoneNumberMatcherRegExps;
47 class PhoneNumberUtil;
48 
49 class PhoneNumberMatcher {
50   friend class PhoneNumberMatcherTest;
51  public:
52   // Leniency when finding potential phone numbers in text segments. The levels
53   // here are ordered in increasing strictness.
54   enum Leniency {
55     // Phone numbers accepted are possible, but not necessarily valid.
56     POSSIBLE,
57     // Phone numbers accepted are possible and valid.
58     VALID,
59     // Phone numbers accepted are valid and are grouped in a possible way for
60     // this locale. For example, a US number written as "65 02 53 00 00" is not
61     // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
62     // are. Numbers with more than one '/' symbol are also dropped at this
63     // level.
64     // Warning: The next two levels might result in lower coverage especially
65     // for regions outside of country code "+1". If you are not sure about which
66     // level to use, you can send an e-mail to the discussion group
67     // http://groups.google.com/group/libphonenumber-discuss/
68     STRICT_GROUPING,
69     // Phone numbers accepted are valid and are grouped in the same way that we
70     // would have formatted it, or as a single block. For example, a US number
71     // written as "650 2530000" is not accepted at this leniency level, whereas
72     // "650 253 0000" or "6502530000" are.
73     EXACT_GROUPING,
74   };
75 
76   // Constructs a phone number matcher.
77   PhoneNumberMatcher(const PhoneNumberUtil& util,
78                      const string& text,
79                      const string& region_code,
80                      Leniency leniency,
81                      int max_tries);
82 
83   // Wrapper to construct a phone number matcher, with no limitation on the
84   // number of retries and VALID Leniency.
85   PhoneNumberMatcher(const string& text,
86                      const string& region_code);
87 
88   ~PhoneNumberMatcher();
89 
90   // Returns true if the text sequence has another match. Return false if not.
91   // Always returns false when input contains non UTF-8 characters.
92   bool HasNext();
93 
94   // Gets next match from text sequence.
95   bool Next(PhoneNumberMatch* match);
96 
97  private:
98   // The potential states of a PhoneNumberMatcher.
99   enum State {
100     NOT_READY,
101     READY,
102     DONE,
103   };
104 
105   // Checks if the to check if the provided text_ is in UTF-8 or not.
106   bool IsInputUtf8();
107 
108   // Attempts to extract a match from a candidate string. Returns true if a
109   // match is found, otherwise returns false. The value "offset" refers to the
110   // start index of the candidate string within the overall text.
111   bool Find(int index, PhoneNumberMatch* match);
112 
113   // Checks a number was formatted with a national prefix, if the number was
114   // found in national format, and a national prefix is required for that
115   // number. Returns false if the number needed to have a national prefix and
116   // none was found.
117   bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const;
118 
119   // Attempts to extract a match from candidate. Returns true if the match was
120   // found, otherwise returns false.
121   bool ExtractMatch(const string& candidate, int offset,
122                     PhoneNumberMatch* match);
123 
124   // Attempts to extract a match from a candidate string if the whole candidate
125   // does not qualify as a match. Returns true if a match is found, otherwise
126   // returns false.
127   bool ExtractInnerMatch(const string& candidate, int offset,
128                          PhoneNumberMatch* match);
129 
130   // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
131   // verifies it matches the requested leniency. If parsing and verification
132   // succeed, returns true, otherwise this method returns false;
133   bool ParseAndVerify(const string& candidate, int offset,
134                       PhoneNumberMatch* match);
135 
136   bool CheckNumberGroupingIsValid(
137     const PhoneNumber& phone_number,
138     const string& candidate,
139     ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&,
140                     const string&, const vector<string>&>* checker) const;
141 
142   // Helper method to get the national-number part of a number, formatted
143   // without any national prefix, and return it as a set of digit blocks that
144   // would be formatted together following standard formatting rules.
145   void GetNationalNumberGroups(
146       const PhoneNumber& number,
147       vector<string>* digit_blocks) const;
148 
149   // Helper method to get the national-number part of a number, formatted
150   // without any national prefix, and return it as a set of digit blocks that
151   // should be formatted together according to the formatting pattern passed in.
152   void GetNationalNumberGroupsForPattern(
153       const PhoneNumber& number,
154       const NumberFormat* formatting_pattern,
155       vector<string>* digit_blocks) const;
156 
157   bool AllNumberGroupsAreExactlyPresent(
158       const PhoneNumberUtil& util,
159       const PhoneNumber& phone_number,
160       const string& normalized_candidate,
161       const vector<string>& formatted_number_groups) const;
162 
163   bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
164                                  const string& candidate) const;
165 
166   // In interface for testing purposes.
167   static bool ContainsMoreThanOneSlashInNationalNumber(
168       const PhoneNumber& number,
169       const string& candidate,
170       const PhoneNumberUtil& util);
171 
172   // Helper method to determine if a character is a Latin-script letter or not.
173   // For our purposes, combining marks should also return true since we assume
174   // they have been added to a preceding Latin character.
175   static bool IsLatinLetter(char32 letter);
176 
177   // Helper class holding useful regular expressions.
178   const PhoneNumberMatcherRegExps* reg_exps_;
179 
180   // Helper class holding loaded data containing alternate ways phone numbers
181   // might be formatted for certain regions.
182   const AlternateFormats* alternate_formats_;
183 
184   // The phone number utility;
185   const PhoneNumberUtil& phone_util_;
186 
187   // The text searched for phone numbers;
188   const string text_;
189 
190   // The region(country) to assume for phone numbers without an international
191   // prefix.
192   const string preferred_region_;
193 
194   // The degree of validation requested.
195   Leniency leniency_;
196 
197   // The maximum number of retries after matching an invalid number.
198   int max_tries_;
199 
200   // The iteration tristate.
201   State state_;
202 
203   // The last successful match, NULL unless in State.READY.
204   scoped_ptr<PhoneNumberMatch> last_match_;
205 
206   // The next index to start searching at. Undefined in State.DONE.
207   int search_index_;
208 
209   // Flag to set or check if input text is in UTF-8 or not.
210   bool is_input_valid_utf8_;
211 
212   DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
213 };
214 
215 }  // namespace phonenumbers
216 }  // namespace i18n
217 
218 #endif  // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
219