1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Author: George Yakovlev
16 // Philippe Liard
17
18 #include "phonenumbers/regexp_adapter_re2.h"
19
20 #include <cstddef>
21 #include <string>
22
23 #include <re2/re2.h>
24 #include <re2/stringpiece.h>
25
26 #include "phonenumbers/base/basictypes.h"
27 #include "phonenumbers/base/logging.h"
28 #include "phonenumbers/stringutil.h"
29
30 namespace i18n {
31 namespace phonenumbers {
32
33 // Implementation of RegExpInput abstract class.
34 class RE2RegExpInput : public RegExpInput {
35 public:
RE2RegExpInput(const string & utf8_input)36 explicit RE2RegExpInput(const string& utf8_input)
37 : string_(utf8_input),
38 utf8_input_(string_) {}
39
ToString() const40 virtual string ToString() const {
41 return utf8_input_.ToString();
42 }
43
Data()44 StringPiece* Data() {
45 return &utf8_input_;
46 }
47
48 private:
49 // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
50 // copy the string passed in.
51 const string string_;
52 StringPiece utf8_input_;
53 };
54
55 namespace {
56
57 template <typename Function, typename Input>
DispatchRE2Call(Function regex_function,Input input,const RE2 & regexp,string * out1,string * out2,string * out3)58 bool DispatchRE2Call(Function regex_function,
59 Input input,
60 const RE2& regexp,
61 string* out1,
62 string* out2,
63 string* out3) {
64 const RE2::Arg outs[] = { out1, out2, out3, };
65 const RE2::Arg* const args[] = { &outs[0], &outs[1], &outs[2], };
66 const int argc = out3 ? 3 : out2 ? 2 : out1 ? 1 : 0;
67 return regex_function(input, regexp, args, argc);
68 }
69
70 // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
71 // when they escape dollar-signs.
TransformRegularExpressionToRE2Syntax(const string & regex)72 string TransformRegularExpressionToRE2Syntax(const string& regex) {
73 string re2_regex(regex);
74 if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
75 return regex;
76 }
77 // If we replaced a dollar sign with a backslash and there are now two
78 // backslashes in the string, we assume that the dollar-sign was previously
79 // escaped and that we need to retain it. To do this, we replace pairs of
80 // backslashes with a dollar sign.
81 GlobalReplaceSubstring("\\\\", "$", &re2_regex);
82 return re2_regex;
83 }
84
85 } // namespace
86
87 // Implementation of RegExp abstract class.
88 class RE2RegExp : public RegExp {
89 public:
RE2RegExp(const string & utf8_regexp)90 explicit RE2RegExp(const string& utf8_regexp)
91 : utf8_regexp_(utf8_regexp) {}
92
Consume(RegExpInput * input_string,bool anchor_at_start,string * matched_string1,string * matched_string2,string * matched_string3) const93 virtual bool Consume(RegExpInput* input_string,
94 bool anchor_at_start,
95 string* matched_string1,
96 string* matched_string2,
97 string* matched_string3) const {
98 DCHECK(input_string);
99 StringPiece* utf8_input =
100 static_cast<RE2RegExpInput*>(input_string)->Data();
101
102 if (anchor_at_start) {
103 return DispatchRE2Call(RE2::ConsumeN, utf8_input, utf8_regexp_,
104 matched_string1, matched_string2,
105 matched_string3);
106 } else {
107 return DispatchRE2Call(RE2::FindAndConsumeN, utf8_input, utf8_regexp_,
108 matched_string1, matched_string2,
109 matched_string3);
110 }
111 }
112
Match(const string & input_string,bool full_match,string * matched_string) const113 virtual bool Match(const string& input_string,
114 bool full_match,
115 string* matched_string) const {
116 if (full_match) {
117 return DispatchRE2Call(RE2::FullMatchN, input_string, utf8_regexp_,
118 matched_string, NULL, NULL);
119 } else {
120 return DispatchRE2Call(RE2::PartialMatchN, input_string, utf8_regexp_,
121 matched_string, NULL, NULL);
122 }
123 }
124
Replace(string * string_to_process,bool global,const string & replacement_string) const125 virtual bool Replace(string* string_to_process,
126 bool global,
127 const string& replacement_string) const {
128 DCHECK(string_to_process);
129 const string re2_replacement_string =
130 TransformRegularExpressionToRE2Syntax(replacement_string);
131 if (global) {
132 return RE2::GlobalReplace(string_to_process, utf8_regexp_,
133 re2_replacement_string);
134 } else {
135 return RE2::Replace(string_to_process, utf8_regexp_,
136 re2_replacement_string);
137 }
138 }
139
140 private:
141 RE2 utf8_regexp_;
142 };
143
CreateInput(const string & utf8_input) const144 RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
145 return new RE2RegExpInput(utf8_input);
146 }
147
CreateRegExp(const string & utf8_regexp) const148 RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
149 return new RE2RegExp(utf8_regexp);
150 }
151
152 } // namespace phonenumbers
153 } // namespace i18n
154