1 // Copyright (C) 2011 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Author: Lara Rennie 16 // Author: Tao Huang 17 // 18 // This is a direct port from PhoneNumberMatcher.java. 19 // Changes to this class should also happen to the Java version, whenever it 20 // makes sense. 21 22 #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 23 #define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 24 25 #include <string> 26 #include <vector> 27 28 #include "phonenumbers/base/basictypes.h" 29 #include "phonenumbers/base/memory/scoped_ptr.h" 30 #include "phonenumbers/callback.h" 31 #include "phonenumbers/regexp_adapter.h" 32 33 namespace i18n { 34 namespace phonenumbers { 35 36 template <class R, class A1, class A2, class A3, class A4> 37 class ResultCallback4; 38 39 using std::string; 40 using std::vector; 41 42 class AlternateFormats; 43 class NumberFormat; 44 class PhoneNumber; 45 class PhoneNumberMatch; 46 class PhoneNumberMatcherRegExps; 47 class PhoneNumberUtil; 48 49 class PhoneNumberMatcher { 50 friend class PhoneNumberMatcherTest; 51 public: 52 // Leniency when finding potential phone numbers in text segments. The levels 53 // here are ordered in increasing strictness. 54 enum Leniency { 55 // Phone numbers accepted are possible, but not necessarily valid. 56 POSSIBLE, 57 // Phone numbers accepted are possible and valid. 58 VALID, 59 // Phone numbers accepted are valid and are grouped in a possible way for 60 // this locale. For example, a US number written as "65 02 53 00 00" is not 61 // accepted at this leniency level, whereas "650 253 0000" or "6502530000" 62 // are. Numbers with more than one '/' symbol are also dropped at this 63 // level. 64 // Warning: The next two levels might result in lower coverage especially 65 // for regions outside of country code "+1". If you are not sure about which 66 // level to use, you can send an e-mail to the discussion group 67 // http://groups.google.com/group/libphonenumber-discuss/ 68 STRICT_GROUPING, 69 // Phone numbers accepted are valid and are grouped in the same way that we 70 // would have formatted it, or as a single block. For example, a US number 71 // written as "650 2530000" is not accepted at this leniency level, whereas 72 // "650 253 0000" or "6502530000" are. 73 EXACT_GROUPING, 74 }; 75 76 // Constructs a phone number matcher. 77 PhoneNumberMatcher(const PhoneNumberUtil& util, 78 const string& text, 79 const string& region_code, 80 Leniency leniency, 81 int max_tries); 82 83 // Wrapper to construct a phone number matcher, with no limitation on the 84 // number of retries and VALID Leniency. 85 PhoneNumberMatcher(const string& text, 86 const string& region_code); 87 88 ~PhoneNumberMatcher(); 89 90 // Returns true if the text sequence has another match. Return false if not. 91 // Always returns false when input contains non UTF-8 characters. 92 bool HasNext(); 93 94 // Gets next match from text sequence. 95 bool Next(PhoneNumberMatch* match); 96 97 private: 98 // The potential states of a PhoneNumberMatcher. 99 enum State { 100 NOT_READY, 101 READY, 102 DONE, 103 }; 104 105 // Checks if the to check if the provided text_ is in UTF-8 or not. 106 bool IsInputUtf8(); 107 108 // Attempts to extract a match from a candidate string. Returns true if a 109 // match is found, otherwise returns false. The value "offset" refers to the 110 // start index of the candidate string within the overall text. 111 bool Find(int index, PhoneNumberMatch* match); 112 113 // Checks a number was formatted with a national prefix, if the number was 114 // found in national format, and a national prefix is required for that 115 // number. Returns false if the number needed to have a national prefix and 116 // none was found. 117 bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const; 118 119 // Attempts to extract a match from candidate. Returns true if the match was 120 // found, otherwise returns false. 121 bool ExtractMatch(const string& candidate, int offset, 122 PhoneNumberMatch* match); 123 124 // Attempts to extract a match from a candidate string if the whole candidate 125 // does not qualify as a match. Returns true if a match is found, otherwise 126 // returns false. 127 bool ExtractInnerMatch(const string& candidate, int offset, 128 PhoneNumberMatch* match); 129 130 // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and 131 // verifies it matches the requested leniency. If parsing and verification 132 // succeed, returns true, otherwise this method returns false; 133 bool ParseAndVerify(const string& candidate, int offset, 134 PhoneNumberMatch* match); 135 136 bool CheckNumberGroupingIsValid( 137 const PhoneNumber& phone_number, 138 const string& candidate, 139 ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&, 140 const string&, const vector<string>&>* checker) const; 141 142 // Helper method to get the national-number part of a number, formatted 143 // without any national prefix, and return it as a set of digit blocks that 144 // would be formatted together following standard formatting rules. 145 void GetNationalNumberGroups( 146 const PhoneNumber& number, 147 vector<string>* digit_blocks) const; 148 149 // Helper method to get the national-number part of a number, formatted 150 // without any national prefix, and return it as a set of digit blocks that 151 // should be formatted together according to the formatting pattern passed in. 152 void GetNationalNumberGroupsForPattern( 153 const PhoneNumber& number, 154 const NumberFormat* formatting_pattern, 155 vector<string>* digit_blocks) const; 156 157 bool AllNumberGroupsAreExactlyPresent( 158 const PhoneNumberUtil& util, 159 const PhoneNumber& phone_number, 160 const string& normalized_candidate, 161 const vector<string>& formatted_number_groups) const; 162 163 bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number, 164 const string& candidate) const; 165 166 // In interface for testing purposes. 167 static bool ContainsMoreThanOneSlashInNationalNumber( 168 const PhoneNumber& number, 169 const string& candidate, 170 const PhoneNumberUtil& util); 171 172 // Helper method to determine if a character is a Latin-script letter or not. 173 // For our purposes, combining marks should also return true since we assume 174 // they have been added to a preceding Latin character. 175 static bool IsLatinLetter(char32 letter); 176 177 // Helper class holding useful regular expressions. 178 const PhoneNumberMatcherRegExps* reg_exps_; 179 180 // Helper class holding loaded data containing alternate ways phone numbers 181 // might be formatted for certain regions. 182 const AlternateFormats* alternate_formats_; 183 184 // The phone number utility; 185 const PhoneNumberUtil& phone_util_; 186 187 // The text searched for phone numbers; 188 const string text_; 189 190 // The region(country) to assume for phone numbers without an international 191 // prefix. 192 const string preferred_region_; 193 194 // The degree of validation requested. 195 Leniency leniency_; 196 197 // The maximum number of retries after matching an invalid number. 198 int max_tries_; 199 200 // The iteration tristate. 201 State state_; 202 203 // The last successful match, NULL unless in State.READY. 204 scoped_ptr<PhoneNumberMatch> last_match_; 205 206 // The next index to start searching at. Undefined in State.DONE. 207 int search_index_; 208 209 // Flag to set or check if input text is in UTF-8 or not. 210 bool is_input_valid_utf8_; 211 212 DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher); 213 }; 214 215 } // namespace phonenumbers 216 } // namespace i18n 217 218 #endif // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 219