• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Author: George Yakovlev
16 //         Philippe Liard
17 
18 #include "phonenumbers/regexp_adapter.h"
19 
20 #include <string>
21 #include <vector>
22 
23 #include <gtest/gtest.h>
24 
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/stl_util.h"
27 #include "phonenumbers/stringutil.h"
28 
29 #ifdef I18N_PHONENUMBERS_USE_RE2
30 #include "phonenumbers/regexp_adapter_re2.h"
31 #else
32 #include "phonenumbers/regexp_adapter_icu.h"
33 #endif  // I18N_PHONENUMBERS_USE_RE2
34 
35 namespace i18n {
36 namespace phonenumbers {
37 
38 using std::vector;
39 
40 // Structure that contains the attributes used to test an implementation of the
41 // regexp adapter.
42 struct RegExpTestContext {
RegExpTestContexti18n::phonenumbers::RegExpTestContext43   explicit RegExpTestContext(const string& name,
44                              const AbstractRegExpFactory* factory)
45       : name(name),
46         factory(factory),
47         digits(factory->CreateRegExp("\\d+")),
48         parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
49         single_digit(factory->CreateRegExp("\\d")),
50         two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")),
51         six_digit_groups(factory->CreateRegExp(
52             "(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)")) {}
53 
54   const string name;
55   const scoped_ptr<const AbstractRegExpFactory> factory;
56   const scoped_ptr<const RegExp> digits;
57   const scoped_ptr<const RegExp> parentheses_digits;
58   const scoped_ptr<const RegExp> single_digit;
59   const scoped_ptr<const RegExp> two_digit_groups;
60   const scoped_ptr<const RegExp> six_digit_groups;
61 };
62 
63 class RegExpAdapterTest : public testing::Test {
64  protected:
RegExpAdapterTest()65   RegExpAdapterTest() {
66 #ifdef I18N_PHONENUMBERS_USE_RE2
67     contexts_.push_back(
68         new RegExpTestContext("RE2", new RE2RegExpFactory()));
69 #else
70     contexts_.push_back(
71         new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
72 #endif  // I18N_PHONENUMBERS_USE_RE2
73   }
74 
~RegExpAdapterTest()75   ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); }
76 
ErrorMessage(const RegExpTestContext & context)77   static string ErrorMessage(const RegExpTestContext& context) {
78     return StrCat("Test failed with ", context.name, " implementation.");
79   }
80 
81   typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
82   vector<const RegExpTestContext*> contexts_;
83 };
84 
TEST_F(RegExpAdapterTest,TestConsumeNoMatch)85 TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
86   for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
87        it != contexts_.end();
88        ++it) {
89     const RegExpTestContext& context = **it;
90     const scoped_ptr<RegExpInput> input(
91         context.factory->CreateInput("+1-123-456-789"));
92 
93     // When 'true' is passed to Consume(), the match occurs from the beginning
94     // of the input.
95     ASSERT_FALSE(context.digits->Consume(
96          input.get(), true, NULL, NULL, NULL, NULL, NULL, NULL))
97          << ErrorMessage(context);
98     ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
99 
100     string res1;
101     ASSERT_FALSE(context.parentheses_digits->Consume(
102         input.get(), true, &res1, NULL, NULL, NULL, NULL, NULL))
103         << ErrorMessage(context);
104     ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
105     ASSERT_EQ("", res1) << ErrorMessage(context);
106   }
107 }
108 
109 
TEST_F(RegExpAdapterTest,TestConsumeWithNull)110 TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
111   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
112        ++it) {
113     const RegExpTestContext& context = **it;
114     const AbstractRegExpFactory& factory = *context.factory;
115     const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
116     const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
117 
118     ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL, NULL,
119                                    NULL, NULL))
120         << ErrorMessage(context);
121     ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
122   }
123 }
124 
TEST_F(RegExpAdapterTest,TestConsumeRetainsMatches)125 TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
126   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
127        ++it) {
128     const RegExpTestContext& context = **it;
129     const scoped_ptr<RegExpInput> input(
130         context.factory->CreateInput("1-123-456-789"));
131 
132     string res1, res2;
133     ASSERT_TRUE(context.two_digit_groups->Consume(
134         input.get(), true, &res1, &res2, NULL, NULL, NULL, NULL))
135         << ErrorMessage(context);
136     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
137     ASSERT_EQ("1", res1) << ErrorMessage(context);
138     ASSERT_EQ("123", res2) << ErrorMessage(context);
139   }
140 }
141 
TEST_F(RegExpAdapterTest,TestFindAndConsume)142 TEST_F(RegExpAdapterTest, TestFindAndConsume) {
143   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
144        ++it) {
145     const RegExpTestContext& context = **it;
146     const scoped_ptr<RegExpInput> input(
147         context.factory->CreateInput("+1-123-456-789"));
148     const scoped_ptr<RegExpInput> input_with_six_digit_groups(
149         context.factory->CreateInput("111-222-333-444-555-666"));
150 
151     // When 'false' is passed to Consume(), the match can occur from any place
152     // in the input.
153     ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
154                                         NULL, NULL, NULL))
155         << ErrorMessage(context);
156     ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
157 
158     ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
159                                         NULL, NULL, NULL))
160         << ErrorMessage(context);
161     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
162 
163     ASSERT_FALSE(context.parentheses_digits->Consume(
164         input.get(), false, NULL, NULL, NULL, NULL, NULL, NULL))
165         << ErrorMessage(context);
166     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
167 
168     string res1, res2;
169     ASSERT_TRUE(context.two_digit_groups->Consume(
170         input.get(), false, &res1, &res2, NULL, NULL, NULL, NULL))
171         << ErrorMessage(context);
172     printf("previous input: %s", input.get()->ToString().c_str());
173     ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
174     ASSERT_EQ("456", res1) << ErrorMessage(context);
175     ASSERT_EQ("789", res2) << ErrorMessage(context);
176 
177     // Testing maximum no of substrings that can be matched presently, six.
178     string mat1, mat2, res3, res4, res5, res6;
179     ASSERT_TRUE(context.six_digit_groups->Consume(
180         input_with_six_digit_groups.get(), false, &mat1, &mat2, &res3, &res4,
181         &res5, &res6))
182         << ErrorMessage(context);
183     printf("Present input: %s",
184            input_with_six_digit_groups.get()->ToString().c_str());
185     ASSERT_EQ("", input_with_six_digit_groups->ToString())
186         << ErrorMessage(context);
187     ASSERT_EQ("111", mat1) << ErrorMessage(context);
188     ASSERT_EQ("222", mat2) << ErrorMessage(context);
189     ASSERT_EQ("333", res3) << ErrorMessage(context);
190     ASSERT_EQ("444", res4) << ErrorMessage(context);
191     ASSERT_EQ("555", res5) << ErrorMessage(context);
192     ASSERT_EQ("666", res6) << ErrorMessage(context);
193   }
194 }
195 
TEST_F(RegExpAdapterTest,TestPartialMatch)196 TEST_F(RegExpAdapterTest, TestPartialMatch) {
197   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
198        ++it) {
199     const RegExpTestContext& context = **it;
200     const AbstractRegExpFactory& factory = *context.factory;
201     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
202     string matched;
203 
204     EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
205         << ErrorMessage(context);
206     EXPECT_EQ("12345af", matched) << ErrorMessage(context);
207 
208     EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
209         << ErrorMessage(context);
210 
211     EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
212         << ErrorMessage(context);
213     EXPECT_EQ("12", matched) << ErrorMessage(context);
214 
215     matched.clear();
216     EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
217         << ErrorMessage(context);
218     EXPECT_EQ("", matched) << ErrorMessage(context);
219   }
220 }
221 
TEST_F(RegExpAdapterTest,TestFullMatch)222 TEST_F(RegExpAdapterTest, TestFullMatch) {
223   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
224        ++it) {
225     const RegExpTestContext& context = **it;
226     const AbstractRegExpFactory& factory = *context.factory;
227     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
228     string matched;
229 
230     EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
231         << ErrorMessage(context);
232     EXPECT_EQ("12345af", matched) << ErrorMessage(context);
233 
234     EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
235 
236     matched.clear();
237     EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
238     EXPECT_EQ("", matched) << ErrorMessage(context);
239 
240     matched.clear();
241     EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
242     EXPECT_EQ("", matched) << ErrorMessage(context);
243   }
244 }
245 
TEST_F(RegExpAdapterTest,TestReplace)246 TEST_F(RegExpAdapterTest, TestReplace) {
247   for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
248        it != contexts_.end();
249        ++it) {
250     const RegExpTestContext& context = **it;
251     string input("123-4567 ");
252 
253     ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
254         << ErrorMessage(context);
255     ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
256 
257     ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
258         << ErrorMessage(context);
259     ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
260 
261     const scoped_ptr<const RegExp> single_letter(
262         context.factory->CreateRegExp("[a-z]"));
263     ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
264     ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
265   }
266 }
267 
TEST_F(RegExpAdapterTest,TestReplaceWithGroup)268 TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
269   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
270        ++it) {
271     const RegExpTestContext& context = **it;
272 
273     // Make sure referencing groups in the regexp in the replacement string
274     // works. $[0-9] notation is used.
275     string input = "123-4567 abc";
276     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
277         << ErrorMessage(context);
278     ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
279 
280     input = "123-4567";
281     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
282         << ErrorMessage(context);
283     ASSERT_EQ("123", input) << ErrorMessage(context);
284 
285     input = "123-4567";
286     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
287         << ErrorMessage(context);
288     ASSERT_EQ("4567", input) << ErrorMessage(context);
289 
290     input = "123-4567";
291     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
292         << ErrorMessage(context);
293     ASSERT_EQ("123 4567", input) << ErrorMessage(context);
294   }
295 }
296 
TEST_F(RegExpAdapterTest,TestReplaceWithDollarSign)297 TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
298   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
299        ++it) {
300     const RegExpTestContext& context = **it;
301 
302     // Make sure '$' can be used in the replacement string when escaped.
303     string input = "123-4567";
304     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
305         << ErrorMessage(context);
306 
307     ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
308   }
309 }
310 
TEST_F(RegExpAdapterTest,TestGlobalReplace)311 TEST_F(RegExpAdapterTest, TestGlobalReplace) {
312   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
313        ++it) {
314     const RegExpTestContext& context = **it;
315 
316     string input("123-4567 ");
317 
318     ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
319         << ErrorMessage(context);
320     ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
321 
322     ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
323         << ErrorMessage(context);
324     ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
325   }
326 }
327 
TEST_F(RegExpAdapterTest,TestUtf8)328 TEST_F(RegExpAdapterTest, TestUtf8) {
329   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
330        ++it) {
331     const RegExpTestContext& context = **it;
332     const AbstractRegExpFactory& factory = *context.factory;
333 
334     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
335         "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
336         /* "℡⊏([α-ω]*)⊐" */));
337     string matched;
338 
339     EXPECT_FALSE(reg_exp->Match(
340         "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
341         &matched)) << ErrorMessage(context);
342     EXPECT_TRUE(reg_exp->Match(
343         "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
344         /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
345 
346     EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
347   }
348 }
349 
350 }  // namespace phonenumbers
351 }  // namespace i18n
352