• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Author: George Yakovlev
16 //         Philippe Liard
17 //
18 // RegExp adapter to allow a pluggable regexp engine. It has been introduced
19 // during the integration of the open-source version of this library into
20 // Chromium to be able to use the ICU Regex engine instead of RE2, which is not
21 // officially supported on Windows.
22 // Since RE2 was initially used in this library, the interface of this adapter
23 // is very close to the subset of the RE2 API used in phonenumberutil.cc.
24 
25 #ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
26 #define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
27 
28 #include <cstddef>
29 #include <string>
30 
31 namespace i18n {
32 namespace phonenumbers {
33 
34 using std::string;
35 
36 // RegExpInput is the interface that abstracts the input that feeds the
37 // Consume() method of RegExp which may differ depending on its various
38 // implementations (StringPiece for RE2, UnicodeString for ICU Regex).
39 class RegExpInput {
40  public:
~RegExpInput()41   virtual ~RegExpInput() {}
42 
43   // Converts to a C++ string.
44   virtual string ToString() const = 0;
45 };
46 
47 // The regular expression abstract class. It supports only functions used in
48 // phonenumberutil.cc. Consume(), Match() and Replace() methods must be
49 // implemented.
50 class RegExp {
51  public:
~RegExp()52   virtual ~RegExp() {}
53 
54   // Matches string to regular expression, returns true if expression was
55   // matched, false otherwise, advances position in the match.
56   // input_string - string to be searched.
57   // anchor_at_start - if true, match would be successful only if it appears at
58   // the beginning of the tested region of the string.
59   // matched_string1..6 - string extracted from the match in sequential order.
60   // Can be NULL.
61   virtual bool Consume(RegExpInput* input_string,
62                        bool anchor_at_start,
63                        string* matched_string1,
64                        string* matched_string2,
65                        string* matched_string3,
66                        string* matched_string4,
67                        string* matched_string5,
68                        string* matched_string6) const = 0;
69 
70   // Helper methods calling the Consume method that assume the match must start
71   // at the beginning.
Consume(RegExpInput * input_string,string * matched_string1,string * matched_string2,string * matched_string3,string * matched_string4,string * matched_string5,string * matched_string6)72   inline bool Consume(RegExpInput* input_string, string* matched_string1,
73                       string* matched_string2,
74                       string* matched_string3,
75                       string* matched_string4,
76                       string* matched_string5,
77                       string* matched_string6) const {
78     return Consume(input_string, true, matched_string1, matched_string2,
79                    matched_string3, matched_string4, matched_string5,
80                    matched_string6);
81   }
82 
Consume(RegExpInput * input_string,string * matched_string1,string * matched_string2,string * matched_string3,string * matched_string4,string * matched_string5)83   inline bool Consume(RegExpInput* input_string, string* matched_string1,
84                       string* matched_string2,
85                       string* matched_string3,
86                       string* matched_string4,
87                       string* matched_string5) const {
88     return Consume(input_string, true, matched_string1, matched_string2,
89                    matched_string3, matched_string4, matched_string5, NULL);
90   }
91 
Consume(RegExpInput * input_string,string * matched_string1,string * matched_string2,string * matched_string3,string * matched_string4)92   inline bool Consume(RegExpInput* input_string, string* matched_string1,
93                       string* matched_string2,
94                       string* matched_string3,
95                       string* matched_string4) const {
96     return Consume(input_string, true, matched_string1, matched_string2,
97                    matched_string3, matched_string4, NULL, NULL);
98   }
99 
100 
101   // Helper methods calling the Consume method that assume the match must start
102   // at the beginning.
Consume(RegExpInput * input_string,string * matched_string1,string * matched_string2,string * matched_string3)103   inline bool Consume(RegExpInput* input_string,
104                       string* matched_string1,
105                       string* matched_string2,
106                       string* matched_string3) const {
107     return Consume(input_string, true, matched_string1, matched_string2,
108                    matched_string3, NULL, NULL, NULL);
109   }
110 
Consume(RegExpInput * input_string,string * matched_string1,string * matched_string2)111   inline bool Consume(RegExpInput* input_string,
112                       string* matched_string1,
113                       string* matched_string2) const {
114     return Consume(input_string, true, matched_string1, matched_string2, NULL,
115     		   NULL, NULL, NULL);
116   }
117 
Consume(RegExpInput * input_string,string * matched_string)118   inline bool Consume(RegExpInput* input_string, string* matched_string) const {
119     return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL,
120     	   	   NULL);
121   }
122 
Consume(RegExpInput * input_string)123   inline bool Consume(RegExpInput* input_string) const {
124     return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL);
125   }
126 
127   // Helper method calling the Consume method that assumes the match can start
128   // at any place in the string.
FindAndConsume(RegExpInput * input_string,string * matched_string)129   inline bool FindAndConsume(RegExpInput* input_string,
130                              string* matched_string) const {
131     return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL,
132     	           NULL);
133   }
134 
135   // Matches string to regular expression, returns true if the expression was
136   // matched, false otherwise.
137   // input_string - string to be searched.
138   // full_match - if true, match would be successful only if it matches the
139   // complete string.
140   // matched_string - the string extracted from the match. Can be NULL.
141   virtual bool Match(const string& input_string,
142                      bool full_match,
143                      string* matched_string) const = 0;
144 
145   // Helper methods calling the Match method with the right arguments.
PartialMatch(const string & input_string,string * matched_string)146   inline bool PartialMatch(const string& input_string,
147                            string* matched_string) const {
148     return Match(input_string, false, matched_string);
149   }
150 
PartialMatch(const string & input_string)151   inline bool PartialMatch(const string& input_string) const {
152     return Match(input_string, false, NULL);
153   }
154 
FullMatch(const string & input_string,string * matched_string)155   inline bool FullMatch(const string& input_string,
156                         string* matched_string) const {
157     return Match(input_string, true, matched_string);
158   }
159 
FullMatch(const string & input_string)160   inline bool FullMatch(const string& input_string) const {
161     return Match(input_string, true, NULL);
162   }
163 
164   // Replaces match(es) in 'string_to_process'. If 'global' is true,
165   // replaces all the matches, otherwise only the first match.
166   // replacement_string - text the matches are replaced with. The groups in the
167   // replacement string are referenced with the $[0-9] notation.
168   // Returns true if the pattern matches and a replacement occurs, false
169   // otherwise.
170   virtual bool Replace(string* string_to_process,
171                        bool global,
172                        const string& replacement_string) const = 0;
173 
174   // Helper methods calling the Replace method with the right arguments.
Replace(string * string_to_process,const string & replacement_string)175   inline bool Replace(string* string_to_process,
176                       const string& replacement_string) const {
177     return Replace(string_to_process, false, replacement_string);
178   }
179 
GlobalReplace(string * string_to_process,const string & replacement_string)180   inline bool GlobalReplace(string* string_to_process,
181                             const string& replacement_string) const {
182     return Replace(string_to_process, true, replacement_string);
183   }
184 };
185 
186 // Abstract factory class that lets its subclasses instantiate the classes
187 // implementing RegExp and RegExpInput.
188 class AbstractRegExpFactory {
189  public:
~AbstractRegExpFactory()190   virtual ~AbstractRegExpFactory() {}
191 
192   // Creates a new instance of RegExpInput. The deletion of the returned
193   // instance is under the responsibility of the caller.
194   virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
195 
196   // Creates a new instance of RegExp. The deletion of the returned instance is
197   // under the responsibility of the caller.
198   virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
199 };
200 
201 }  // namespace phonenumbers
202 }  // namespace i18n
203 
204 #endif  // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
205