1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Author: George Yakovlev
16 // Philippe Liard
17
18 #include "phonenumbers/regexp_adapter.h"
19
20 #include <string>
21 #include <vector>
22
23 #include <gtest/gtest.h>
24
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/stl_util.h"
27 #include "phonenumbers/stringutil.h"
28
29 #ifdef I18N_PHONENUMBERS_USE_RE2
30 #include "phonenumbers/regexp_adapter_re2.h"
31 #else
32 #include "phonenumbers/regexp_adapter_icu.h"
33 #endif // I18N_PHONENUMBERS_USE_RE2
34
35 namespace i18n {
36 namespace phonenumbers {
37
38 using std::vector;
39
40 // Structure that contains the attributes used to test an implementation of the
41 // regexp adapter.
42 struct RegExpTestContext {
RegExpTestContexti18n::phonenumbers::RegExpTestContext43 explicit RegExpTestContext(const string& name,
44 const AbstractRegExpFactory* factory)
45 : name(name),
46 factory(factory),
47 digits(factory->CreateRegExp("\\d+")),
48 parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
49 single_digit(factory->CreateRegExp("\\d")),
50 two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")) {}
51
52 const string name;
53 const scoped_ptr<const AbstractRegExpFactory> factory;
54 const scoped_ptr<const RegExp> digits;
55 const scoped_ptr<const RegExp> parentheses_digits;
56 const scoped_ptr<const RegExp> single_digit;
57 const scoped_ptr<const RegExp> two_digit_groups;
58 };
59
60 class RegExpAdapterTest : public testing::Test {
61 protected:
RegExpAdapterTest()62 RegExpAdapterTest() {
63 #ifdef I18N_PHONENUMBERS_USE_RE2
64 contexts_.push_back(
65 new RegExpTestContext("RE2", new RE2RegExpFactory()));
66 #else
67 contexts_.push_back(
68 new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
69 #endif // I18N_PHONENUMBERS_USE_RE2
70 }
71
~RegExpAdapterTest()72 ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); }
73
ErrorMessage(const RegExpTestContext & context)74 static string ErrorMessage(const RegExpTestContext& context) {
75 return StrCat("Test failed with ", context.name, " implementation.");
76 }
77
78 typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
79 vector<const RegExpTestContext*> contexts_;
80 };
81
TEST_F(RegExpAdapterTest,TestConsumeNoMatch)82 TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
83 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
84 it != contexts_.end();
85 ++it) {
86 const RegExpTestContext& context = **it;
87 const scoped_ptr<RegExpInput> input(
88 context.factory->CreateInput("+1-123-456-789"));
89
90 // When 'true' is passed to Consume(), the match occurs from the beginning
91 // of the input.
92 ASSERT_FALSE(context.digits->Consume(input.get(), true, NULL, NULL, NULL))
93 << ErrorMessage(context);
94 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
95
96 string res1;
97 ASSERT_FALSE(context.parentheses_digits->Consume(
98 input.get(), true, &res1, NULL, NULL)) << ErrorMessage(context);
99 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
100 ASSERT_EQ("", res1) << ErrorMessage(context);
101 }
102 }
103
TEST_F(RegExpAdapterTest,TestConsumeWithNull)104 TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
105 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
106 ++it) {
107 const RegExpTestContext& context = **it;
108 const AbstractRegExpFactory& factory = *context.factory;
109 const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
110 const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
111
112 ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL))
113 << ErrorMessage(context);
114 ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
115 }
116 }
117
TEST_F(RegExpAdapterTest,TestConsumeRetainsMatches)118 TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
119 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
120 ++it) {
121 const RegExpTestContext& context = **it;
122 const scoped_ptr<RegExpInput> input(
123 context.factory->CreateInput("1-123-456-789"));
124
125 string res1, res2;
126 ASSERT_TRUE(context.two_digit_groups->Consume(
127 input.get(), true, &res1, &res2, NULL)) << ErrorMessage(context);
128 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
129 ASSERT_EQ("1", res1) << ErrorMessage(context);
130 ASSERT_EQ("123", res2) << ErrorMessage(context);
131 }
132 }
133
TEST_F(RegExpAdapterTest,TestFindAndConsume)134 TEST_F(RegExpAdapterTest, TestFindAndConsume) {
135 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
136 ++it) {
137 const RegExpTestContext& context = **it;
138 const scoped_ptr<RegExpInput> input(
139 context.factory->CreateInput("+1-123-456-789"));
140
141 // When 'false' is passed to Consume(), the match can occur from any place
142 // in the input.
143 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
144 << ErrorMessage(context);
145 ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
146
147 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
148 << ErrorMessage(context);
149 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
150
151 ASSERT_FALSE(context.parentheses_digits->Consume(
152 input.get(), false, NULL, NULL, NULL)) << ErrorMessage(context);
153 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
154
155 string res1, res2;
156 ASSERT_TRUE(context.two_digit_groups->Consume(
157 input.get(), false, &res1, &res2, NULL)) << ErrorMessage(context);
158 ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
159 ASSERT_EQ("456", res1) << ErrorMessage(context);
160 ASSERT_EQ("789", res2) << ErrorMessage(context);
161 }
162 }
163
TEST_F(RegExpAdapterTest,TestPartialMatch)164 TEST_F(RegExpAdapterTest, TestPartialMatch) {
165 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
166 ++it) {
167 const RegExpTestContext& context = **it;
168 const AbstractRegExpFactory& factory = *context.factory;
169 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
170 string matched;
171
172 EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
173 << ErrorMessage(context);
174 EXPECT_EQ("12345af", matched) << ErrorMessage(context);
175
176 EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
177 << ErrorMessage(context);
178
179 EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
180 << ErrorMessage(context);
181 EXPECT_EQ("12", matched) << ErrorMessage(context);
182
183 matched.clear();
184 EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
185 << ErrorMessage(context);
186 EXPECT_EQ("", matched) << ErrorMessage(context);
187 }
188 }
189
TEST_F(RegExpAdapterTest,TestFullMatch)190 TEST_F(RegExpAdapterTest, TestFullMatch) {
191 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
192 ++it) {
193 const RegExpTestContext& context = **it;
194 const AbstractRegExpFactory& factory = *context.factory;
195 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
196 string matched;
197
198 EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
199 << ErrorMessage(context);
200 EXPECT_EQ("12345af", matched) << ErrorMessage(context);
201
202 EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
203
204 matched.clear();
205 EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
206 EXPECT_EQ("", matched) << ErrorMessage(context);
207
208 matched.clear();
209 EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
210 EXPECT_EQ("", matched) << ErrorMessage(context);
211 }
212 }
213
TEST_F(RegExpAdapterTest,TestReplace)214 TEST_F(RegExpAdapterTest, TestReplace) {
215 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
216 it != contexts_.end();
217 ++it) {
218 const RegExpTestContext& context = **it;
219 string input("123-4567 ");
220
221 ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
222 << ErrorMessage(context);
223 ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
224
225 ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
226 << ErrorMessage(context);
227 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
228
229 const scoped_ptr<const RegExp> single_letter(
230 context.factory->CreateRegExp("[a-z]"));
231 ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
232 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
233 }
234 }
235
TEST_F(RegExpAdapterTest,TestReplaceWithGroup)236 TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
237 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
238 ++it) {
239 const RegExpTestContext& context = **it;
240
241 // Make sure referencing groups in the regexp in the replacement string
242 // works. $[0-9] notation is used.
243 string input = "123-4567 abc";
244 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
245 << ErrorMessage(context);
246 ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
247
248 input = "123-4567";
249 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
250 << ErrorMessage(context);
251 ASSERT_EQ("123", input) << ErrorMessage(context);
252
253 input = "123-4567";
254 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
255 << ErrorMessage(context);
256 ASSERT_EQ("4567", input) << ErrorMessage(context);
257
258 input = "123-4567";
259 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
260 << ErrorMessage(context);
261 ASSERT_EQ("123 4567", input) << ErrorMessage(context);
262 }
263 }
264
TEST_F(RegExpAdapterTest,TestReplaceWithDollarSign)265 TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
266 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
267 ++it) {
268 const RegExpTestContext& context = **it;
269
270 // Make sure '$' can be used in the replacement string when escaped.
271 string input = "123-4567";
272 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
273 << ErrorMessage(context);
274
275 ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
276 }
277 }
278
TEST_F(RegExpAdapterTest,TestGlobalReplace)279 TEST_F(RegExpAdapterTest, TestGlobalReplace) {
280 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
281 ++it) {
282 const RegExpTestContext& context = **it;
283
284 string input("123-4567 ");
285
286 ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
287 << ErrorMessage(context);
288 ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
289
290 ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
291 << ErrorMessage(context);
292 ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
293 }
294 }
295
TEST_F(RegExpAdapterTest,TestUtf8)296 TEST_F(RegExpAdapterTest, TestUtf8) {
297 for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
298 ++it) {
299 const RegExpTestContext& context = **it;
300 const AbstractRegExpFactory& factory = *context.factory;
301
302 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
303 "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
304 /* "℡⊏([α-ω]*)⊐" */));
305 string matched;
306
307 EXPECT_FALSE(reg_exp->Match(
308 "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
309 &matched)) << ErrorMessage(context);
310 EXPECT_TRUE(reg_exp->Match(
311 "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
312 /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
313
314 EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
315 }
316 }
317
318 } // namespace phonenumbers
319 } // namespace i18n
320