1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Author: George Yakovlev
16 // Philippe Liard
17
18 #include "phonenumbers/regexp_adapter.h"
19
20 #include <string>
21 #include <vector>
22
23 #include <gtest/gtest.h>
24
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/stl_util.h"
27 #include "phonenumbers/stringutil.h"
28
29 #ifdef I18N_PHONENUMBERS_USE_RE2
30 #include "phonenumbers/regexp_adapter_re2.h"
31 #else
32 #include "phonenumbers/regexp_adapter_icu.h"
33 #endif // I18N_PHONENUMBERS_USE_RE2
34
35 namespace i18n {
36 namespace phonenumbers {
37
38 using std::vector;
39
40 // Structure that contains the attributes used to test an implementation of the
41 // regexp adapter.
42 struct RegExpTestContext {
RegExpTestContexti18n::phonenumbers::RegExpTestContext43 explicit RegExpTestContext(const string& name,
44 const AbstractRegExpFactory* factory)
45 : name(name),
46 factory(factory),
47 digits(factory->CreateRegExp("\\d+")),
48 parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
49 single_digit(factory->CreateRegExp("\\d")),
50 two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")),
51 six_digit_groups(factory->CreateRegExp(
52 "(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)")) {}
53
54 const string name;
55 const scoped_ptr<const AbstractRegExpFactory> factory;
56 const scoped_ptr<const RegExp> digits;
57 const scoped_ptr<const RegExp> parentheses_digits;
58 const scoped_ptr<const RegExp> single_digit;
59 const scoped_ptr<const RegExp> two_digit_groups;
60 const scoped_ptr<const RegExp> six_digit_groups;
61 };
62
63 class RegExpAdapterTest : public testing::Test {
64 protected:
RegExpAdapterTest()65 RegExpAdapterTest() {
66 #ifdef I18N_PHONENUMBERS_USE_RE2
67 contexts_.push_back(
68 new RegExpTestContext("RE2", new RE2RegExpFactory()));
69 #else
70 contexts_.push_back(
71 new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
72 #endif // I18N_PHONENUMBERS_USE_RE2
73 }
74
~RegExpAdapterTest()75 ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); }
76
ErrorMessage(const RegExpTestContext & context)77 static string ErrorMessage(const RegExpTestContext& context) {
78 return StrCat("Test failed with ", context.name, " implementation.");
79 }
80
81 typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
82 vector<const RegExpTestContext*> contexts_;
83 };
84
TEST_F(RegExpAdapterTest,TestConsumeNoMatch)85 TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
86 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
87 it != contexts_.end();
88 ++it) {
89 const RegExpTestContext& context = **it;
90 const scoped_ptr<RegExpInput> input(
91 context.factory->CreateInput("+1-123-456-789"));
92
93 // When 'true' is passed to Consume(), the match occurs from the beginning
94 // of the input.
95 ASSERT_FALSE(context.digits->Consume(
96 input.get(), true, NULL, NULL, NULL, NULL, NULL, NULL))
97 << ErrorMessage(context);
98 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
99
100 string res1;
101 ASSERT_FALSE(context.parentheses_digits->Consume(
102 input.get(), true, &res1, NULL, NULL, NULL, NULL, NULL))
103 << ErrorMessage(context);
104 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
105 ASSERT_EQ("", res1) << ErrorMessage(context);
106 }
107 }
108
109
TEST_F(RegExpAdapterTest,TestConsumeWithNull)110 TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
111 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
112 ++it) {
113 const RegExpTestContext& context = **it;
114 const AbstractRegExpFactory& factory = *context.factory;
115 const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
116 const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
117
118 ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL, NULL,
119 NULL, NULL))
120 << ErrorMessage(context);
121 ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
122 }
123 }
124
TEST_F(RegExpAdapterTest,TestConsumeRetainsMatches)125 TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
126 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
127 ++it) {
128 const RegExpTestContext& context = **it;
129 const scoped_ptr<RegExpInput> input(
130 context.factory->CreateInput("1-123-456-789"));
131
132 string res1, res2;
133 ASSERT_TRUE(context.two_digit_groups->Consume(
134 input.get(), true, &res1, &res2, NULL, NULL, NULL, NULL))
135 << ErrorMessage(context);
136 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
137 ASSERT_EQ("1", res1) << ErrorMessage(context);
138 ASSERT_EQ("123", res2) << ErrorMessage(context);
139 }
140 }
141
TEST_F(RegExpAdapterTest,TestFindAndConsume)142 TEST_F(RegExpAdapterTest, TestFindAndConsume) {
143 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
144 ++it) {
145 const RegExpTestContext& context = **it;
146 const scoped_ptr<RegExpInput> input(
147 context.factory->CreateInput("+1-123-456-789"));
148 const scoped_ptr<RegExpInput> input_with_six_digit_groups(
149 context.factory->CreateInput("111-222-333-444-555-666"));
150
151 // When 'false' is passed to Consume(), the match can occur from any place
152 // in the input.
153 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
154 NULL, NULL, NULL))
155 << ErrorMessage(context);
156 ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
157
158 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
159 NULL, NULL, NULL))
160 << ErrorMessage(context);
161 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
162
163 ASSERT_FALSE(context.parentheses_digits->Consume(
164 input.get(), false, NULL, NULL, NULL, NULL, NULL, NULL))
165 << ErrorMessage(context);
166 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
167
168 string res1, res2;
169 ASSERT_TRUE(context.two_digit_groups->Consume(
170 input.get(), false, &res1, &res2, NULL, NULL, NULL, NULL))
171 << ErrorMessage(context);
172 printf("previous input: %s", input.get()->ToString().c_str());
173 ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
174 ASSERT_EQ("456", res1) << ErrorMessage(context);
175 ASSERT_EQ("789", res2) << ErrorMessage(context);
176
177 // Testing maximum no of substrings that can be matched presently, six.
178 string mat1, mat2, res3, res4, res5, res6;
179 ASSERT_TRUE(context.six_digit_groups->Consume(
180 input_with_six_digit_groups.get(), false, &mat1, &mat2, &res3, &res4,
181 &res5, &res6))
182 << ErrorMessage(context);
183 printf("Present input: %s",
184 input_with_six_digit_groups.get()->ToString().c_str());
185 ASSERT_EQ("", input_with_six_digit_groups->ToString())
186 << ErrorMessage(context);
187 ASSERT_EQ("111", mat1) << ErrorMessage(context);
188 ASSERT_EQ("222", mat2) << ErrorMessage(context);
189 ASSERT_EQ("333", res3) << ErrorMessage(context);
190 ASSERT_EQ("444", res4) << ErrorMessage(context);
191 ASSERT_EQ("555", res5) << ErrorMessage(context);
192 ASSERT_EQ("666", res6) << ErrorMessage(context);
193 }
194 }
195
TEST_F(RegExpAdapterTest,TestPartialMatch)196 TEST_F(RegExpAdapterTest, TestPartialMatch) {
197 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
198 ++it) {
199 const RegExpTestContext& context = **it;
200 const AbstractRegExpFactory& factory = *context.factory;
201 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
202 string matched;
203
204 EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
205 << ErrorMessage(context);
206 EXPECT_EQ("12345af", matched) << ErrorMessage(context);
207
208 EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
209 << ErrorMessage(context);
210
211 EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
212 << ErrorMessage(context);
213 EXPECT_EQ("12", matched) << ErrorMessage(context);
214
215 matched.clear();
216 EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
217 << ErrorMessage(context);
218 EXPECT_EQ("", matched) << ErrorMessage(context);
219 }
220 }
221
TEST_F(RegExpAdapterTest,TestFullMatch)222 TEST_F(RegExpAdapterTest, TestFullMatch) {
223 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
224 ++it) {
225 const RegExpTestContext& context = **it;
226 const AbstractRegExpFactory& factory = *context.factory;
227 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
228 string matched;
229
230 EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
231 << ErrorMessage(context);
232 EXPECT_EQ("12345af", matched) << ErrorMessage(context);
233
234 EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
235
236 matched.clear();
237 EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
238 EXPECT_EQ("", matched) << ErrorMessage(context);
239
240 matched.clear();
241 EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
242 EXPECT_EQ("", matched) << ErrorMessage(context);
243 }
244 }
245
TEST_F(RegExpAdapterTest,TestReplace)246 TEST_F(RegExpAdapterTest, TestReplace) {
247 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
248 it != contexts_.end();
249 ++it) {
250 const RegExpTestContext& context = **it;
251 string input("123-4567 ");
252
253 ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
254 << ErrorMessage(context);
255 ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
256
257 ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
258 << ErrorMessage(context);
259 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
260
261 const scoped_ptr<const RegExp> single_letter(
262 context.factory->CreateRegExp("[a-z]"));
263 ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
264 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
265 }
266 }
267
TEST_F(RegExpAdapterTest,TestReplaceWithGroup)268 TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
269 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
270 ++it) {
271 const RegExpTestContext& context = **it;
272
273 // Make sure referencing groups in the regexp in the replacement string
274 // works. $[0-9] notation is used.
275 string input = "123-4567 abc";
276 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
277 << ErrorMessage(context);
278 ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
279
280 input = "123-4567";
281 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
282 << ErrorMessage(context);
283 ASSERT_EQ("123", input) << ErrorMessage(context);
284
285 input = "123-4567";
286 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
287 << ErrorMessage(context);
288 ASSERT_EQ("4567", input) << ErrorMessage(context);
289
290 input = "123-4567";
291 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
292 << ErrorMessage(context);
293 ASSERT_EQ("123 4567", input) << ErrorMessage(context);
294 }
295 }
296
TEST_F(RegExpAdapterTest,TestReplaceWithDollarSign)297 TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
298 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
299 ++it) {
300 const RegExpTestContext& context = **it;
301
302 // Make sure '$' can be used in the replacement string when escaped.
303 string input = "123-4567";
304 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
305 << ErrorMessage(context);
306
307 ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
308 }
309 }
310
TEST_F(RegExpAdapterTest,TestGlobalReplace)311 TEST_F(RegExpAdapterTest, TestGlobalReplace) {
312 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
313 ++it) {
314 const RegExpTestContext& context = **it;
315
316 string input("123-4567 ");
317
318 ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
319 << ErrorMessage(context);
320 ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
321
322 ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
323 << ErrorMessage(context);
324 ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
325 }
326 }
327
TEST_F(RegExpAdapterTest,TestUtf8)328 TEST_F(RegExpAdapterTest, TestUtf8) {
329 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
330 ++it) {
331 const RegExpTestContext& context = **it;
332 const AbstractRegExpFactory& factory = *context.factory;
333
334 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
335 "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
336 /* "℡⊏([α-ω]*)⊐" */));
337 string matched;
338
339 EXPECT_FALSE(reg_exp->Match(
340 "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
341 &matched)) << ErrorMessage(context);
342 EXPECT_TRUE(reg_exp->Match(
343 "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
344 /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
345
346 EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
347 }
348 }
349
350 } // namespace phonenumbers
351 } // namespace i18n
352