• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Author: George Yakovlev
16 //         Philippe Liard
17 
18 #include "phonenumbers/regexp_adapter.h"
19 
20 #include <string>
21 #include <vector>
22 
23 #include <gtest/gtest.h>
24 
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/stl_util.h"
27 #include "phonenumbers/stringutil.h"
28 
29 #ifdef I18N_PHONENUMBERS_USE_RE2
30 #include "phonenumbers/regexp_adapter_re2.h"
31 #else
32 #include "phonenumbers/regexp_adapter_icu.h"
33 #endif  // I18N_PHONENUMBERS_USE_RE2
34 
35 namespace i18n {
36 namespace phonenumbers {
37 
38 using std::vector;
39 
40 // Structure that contains the attributes used to test an implementation of the
41 // regexp adapter.
42 struct RegExpTestContext {
RegExpTestContexti18n::phonenumbers::RegExpTestContext43   explicit RegExpTestContext(const string& name,
44                              const AbstractRegExpFactory* factory)
45       : name(name),
46         factory(factory),
47         digits(factory->CreateRegExp("\\d+")),
48         parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
49         single_digit(factory->CreateRegExp("\\d")),
50         two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")) {}
51 
52   const string name;
53   const scoped_ptr<const AbstractRegExpFactory> factory;
54   const scoped_ptr<const RegExp> digits;
55   const scoped_ptr<const RegExp> parentheses_digits;
56   const scoped_ptr<const RegExp> single_digit;
57   const scoped_ptr<const RegExp> two_digit_groups;
58 };
59 
60 class RegExpAdapterTest : public testing::Test {
61  protected:
RegExpAdapterTest()62   RegExpAdapterTest() {
63 #ifdef I18N_PHONENUMBERS_USE_RE2
64     contexts_.push_back(
65         new RegExpTestContext("RE2", new RE2RegExpFactory()));
66 #else
67     contexts_.push_back(
68         new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
69 #endif  // I18N_PHONENUMBERS_USE_RE2
70   }
71 
~RegExpAdapterTest()72   ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); }
73 
ErrorMessage(const RegExpTestContext & context)74   static string ErrorMessage(const RegExpTestContext& context) {
75     return StrCat("Test failed with ", context.name, " implementation.");
76   }
77 
78   typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
79   vector<const RegExpTestContext*> contexts_;
80 };
81 
TEST_F(RegExpAdapterTest,TestConsumeNoMatch)82 TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
83   for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
84        it != contexts_.end();
85        ++it) {
86     const RegExpTestContext& context = **it;
87     const scoped_ptr<RegExpInput> input(
88         context.factory->CreateInput("+1-123-456-789"));
89 
90     // When 'true' is passed to Consume(), the match occurs from the beginning
91     // of the input.
92     ASSERT_FALSE(context.digits->Consume(input.get(), true, NULL, NULL, NULL))
93         << ErrorMessage(context);
94     ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
95 
96     string res1;
97     ASSERT_FALSE(context.parentheses_digits->Consume(
98         input.get(), true, &res1, NULL, NULL)) << ErrorMessage(context);
99     ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
100     ASSERT_EQ("", res1) << ErrorMessage(context);
101   }
102 }
103 
TEST_F(RegExpAdapterTest,TestConsumeWithNull)104 TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
105   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
106        ++it) {
107     const RegExpTestContext& context = **it;
108     const AbstractRegExpFactory& factory = *context.factory;
109     const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
110     const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
111 
112     ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL))
113         << ErrorMessage(context);
114     ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
115   }
116 }
117 
TEST_F(RegExpAdapterTest,TestConsumeRetainsMatches)118 TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
119   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
120        ++it) {
121     const RegExpTestContext& context = **it;
122     const scoped_ptr<RegExpInput> input(
123         context.factory->CreateInput("1-123-456-789"));
124 
125     string res1, res2;
126     ASSERT_TRUE(context.two_digit_groups->Consume(
127         input.get(), true, &res1, &res2, NULL)) << ErrorMessage(context);
128     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
129     ASSERT_EQ("1", res1) << ErrorMessage(context);
130     ASSERT_EQ("123", res2) << ErrorMessage(context);
131   }
132 }
133 
TEST_F(RegExpAdapterTest,TestFindAndConsume)134 TEST_F(RegExpAdapterTest, TestFindAndConsume) {
135   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
136        ++it) {
137     const RegExpTestContext& context = **it;
138     const scoped_ptr<RegExpInput> input(
139         context.factory->CreateInput("+1-123-456-789"));
140 
141     // When 'false' is passed to Consume(), the match can occur from any place
142     // in the input.
143     ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
144         << ErrorMessage(context);
145     ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
146 
147     ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
148         << ErrorMessage(context);
149     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
150 
151     ASSERT_FALSE(context.parentheses_digits->Consume(
152         input.get(), false, NULL, NULL, NULL)) << ErrorMessage(context);
153     ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
154 
155     string res1, res2;
156     ASSERT_TRUE(context.two_digit_groups->Consume(
157         input.get(), false, &res1, &res2, NULL)) << ErrorMessage(context);
158     ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
159     ASSERT_EQ("456", res1) << ErrorMessage(context);
160     ASSERT_EQ("789", res2) << ErrorMessage(context);
161   }
162 }
163 
TEST_F(RegExpAdapterTest,TestPartialMatch)164 TEST_F(RegExpAdapterTest, TestPartialMatch) {
165   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
166        ++it) {
167     const RegExpTestContext& context = **it;
168     const AbstractRegExpFactory& factory = *context.factory;
169     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
170     string matched;
171 
172     EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
173         << ErrorMessage(context);
174     EXPECT_EQ("12345af", matched) << ErrorMessage(context);
175 
176     EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
177         << ErrorMessage(context);
178 
179     EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
180         << ErrorMessage(context);
181     EXPECT_EQ("12", matched) << ErrorMessage(context);
182 
183     matched.clear();
184     EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
185         << ErrorMessage(context);
186     EXPECT_EQ("", matched) << ErrorMessage(context);
187   }
188 }
189 
TEST_F(RegExpAdapterTest,TestFullMatch)190 TEST_F(RegExpAdapterTest, TestFullMatch) {
191   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
192        ++it) {
193     const RegExpTestContext& context = **it;
194     const AbstractRegExpFactory& factory = *context.factory;
195     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
196     string matched;
197 
198     EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
199         << ErrorMessage(context);
200     EXPECT_EQ("12345af", matched) << ErrorMessage(context);
201 
202     EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
203 
204     matched.clear();
205     EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
206     EXPECT_EQ("", matched) << ErrorMessage(context);
207 
208     matched.clear();
209     EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
210     EXPECT_EQ("", matched) << ErrorMessage(context);
211   }
212 }
213 
TEST_F(RegExpAdapterTest,TestReplace)214 TEST_F(RegExpAdapterTest, TestReplace) {
215   for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
216        it != contexts_.end();
217        ++it) {
218     const RegExpTestContext& context = **it;
219     string input("123-4567 ");
220 
221     ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
222         << ErrorMessage(context);
223     ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
224 
225     ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
226         << ErrorMessage(context);
227     ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
228 
229     const scoped_ptr<const RegExp> single_letter(
230         context.factory->CreateRegExp("[a-z]"));
231     ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
232     ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
233   }
234 }
235 
TEST_F(RegExpAdapterTest,TestReplaceWithGroup)236 TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
237   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
238        ++it) {
239     const RegExpTestContext& context = **it;
240 
241     // Make sure referencing groups in the regexp in the replacement string
242     // works. $[0-9] notation is used.
243     string input = "123-4567 abc";
244     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
245         << ErrorMessage(context);
246     ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
247 
248     input = "123-4567";
249     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
250         << ErrorMessage(context);
251     ASSERT_EQ("123", input) << ErrorMessage(context);
252 
253     input = "123-4567";
254     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
255         << ErrorMessage(context);
256     ASSERT_EQ("4567", input) << ErrorMessage(context);
257 
258     input = "123-4567";
259     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
260         << ErrorMessage(context);
261     ASSERT_EQ("123 4567", input) << ErrorMessage(context);
262   }
263 }
264 
TEST_F(RegExpAdapterTest,TestReplaceWithDollarSign)265 TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
266   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
267        ++it) {
268     const RegExpTestContext& context = **it;
269 
270     // Make sure '$' can be used in the replacement string when escaped.
271     string input = "123-4567";
272     ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
273         << ErrorMessage(context);
274 
275     ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
276   }
277 }
278 
TEST_F(RegExpAdapterTest,TestGlobalReplace)279 TEST_F(RegExpAdapterTest, TestGlobalReplace) {
280   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
281        ++it) {
282     const RegExpTestContext& context = **it;
283 
284     string input("123-4567 ");
285 
286     ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
287         << ErrorMessage(context);
288     ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
289 
290     ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
291         << ErrorMessage(context);
292     ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
293   }
294 }
295 
TEST_F(RegExpAdapterTest,TestUtf8)296 TEST_F(RegExpAdapterTest, TestUtf8) {
297   for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
298        ++it) {
299     const RegExpTestContext& context = **it;
300     const AbstractRegExpFactory& factory = *context.factory;
301 
302     const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
303         "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
304         /* "℡⊏([α-ω]*)⊐" */));
305     string matched;
306 
307     EXPECT_FALSE(reg_exp->Match(
308         "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
309         &matched)) << ErrorMessage(context);
310     EXPECT_TRUE(reg_exp->Match(
311         "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
312         /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
313 
314     EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
315   }
316 }
317 
318 }  // namespace phonenumbers
319 }  // namespace i18n
320