• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Author: George Yakovlev
16 //         Philippe Liard
17 
18 #include "phonenumbers/regexp_adapter_re2.h"
19 
20 #include <cstddef>
21 #include <string>
22 
23 #include <re2/re2.h>
24 #include <re2/stringpiece.h>
25 
26 #include "phonenumbers/base/basictypes.h"
27 #include "phonenumbers/base/logging.h"
28 #include "phonenumbers/stringutil.h"
29 
30 #include "absl/strings/string_view.h"
31 namespace i18n {
32 namespace phonenumbers {
33 
34 // Implementation of RegExpInput abstract class.
35 class RE2RegExpInput : public RegExpInput {
36  public:
RE2RegExpInput(const string & utf8_input)37   explicit RE2RegExpInput(const string& utf8_input)
38       : string_(utf8_input),
39         utf8_input_(string_) {}
40 
ToString() const41   virtual string ToString() const {
42     return utf8_input_.ToString();
43   }
44 
Data()45   StringPiece* Data() {
46     return &utf8_input_;
47   }
48 
49  private:
50   // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
51   // copy the string passed in.
52   const string string_;
53   StringPiece utf8_input_;
54 };
55 
56 namespace {
57 
58 template <typename Function, typename Input>
DispatchRE2Call(Function regex_function,Input input,const RE2 & regexp,string * out1,string * out2,string * out3,string * out4,string * out5,string * out6)59 bool DispatchRE2Call(Function regex_function,
60                      Input input,
61                      const RE2& regexp,
62                      string* out1,
63                      string* out2,
64                      string* out3,
65                      string* out4,
66                      string* out5,
67                      string* out6) {
68   const RE2::Arg outs[] = { out1, out2, out3, out4, out5, out6};
69   const RE2::Arg* const args[] = {&outs[0], &outs[1], &outs[2],
70                                   &outs[3], &outs[4], &outs[5]};
71   const int argc =
72       out6 ? 6 : out5 ? 5 : out4 ? 4 : out3 ? 3 : out2 ? 2 : out1 ? 1 : 0;
73   return regex_function(input, regexp, args, argc);
74 }
75 
76 // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
77 // when they escape dollar-signs.
TransformRegularExpressionToRE2Syntax(const string & regex)78 string TransformRegularExpressionToRE2Syntax(const string& regex) {
79   string re2_regex(regex);
80   if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
81     return regex;
82   }
83   // If we replaced a dollar sign with a backslash and there are now two
84   // backslashes in the string, we assume that the dollar-sign was previously
85   // escaped and that we need to retain it. To do this, we replace pairs of
86   // backslashes with a dollar sign.
87   GlobalReplaceSubstring("\\\\", "$", &re2_regex);
88   return re2_regex;
89 }
90 
91 }  // namespace
92 
93 // Implementation of RegExp abstract class.
94 class RE2RegExp : public RegExp {
95  public:
RE2RegExp(const string & utf8_regexp)96   explicit RE2RegExp(const string& utf8_regexp)
97       : utf8_regexp_(utf8_regexp) {}
98 
Consume(RegExpInput * input_string,bool anchor_at_start,string * matched_string1,string * matched_string2,string * matched_string3,string * matched_string4,string * matched_string5,string * matched_string6) const99   virtual bool Consume(RegExpInput* input_string,
100                        bool anchor_at_start,
101                        string* matched_string1,
102                        string* matched_string2,
103                        string* matched_string3,
104                        string* matched_string4,
105                        string* matched_string5,
106                        string* matched_string6) const {
107     DCHECK(input_string);
108     StringPiece* utf8_input =
109         static_cast<RE2RegExpInput*>(input_string)->Data();
110 
111     if (anchor_at_start) {
112       return DispatchRE2Call(RE2::ConsumeN, utf8_input, utf8_regexp_,
113                              matched_string1, matched_string2,
114                              matched_string3, matched_string4,
115                              matched_string5, matched_string6);
116     } else {
117       return DispatchRE2Call(RE2::FindAndConsumeN, utf8_input, utf8_regexp_,
118                              matched_string1, matched_string2,
119                              matched_string3, matched_string4,
120                              matched_string5, matched_string6);
121     }
122   }
123 
Match(const string & input_string,bool full_match,string * matched_string) const124   virtual bool Match(const string& input_string,
125                      bool full_match,
126                      string* matched_string) const {
127     if (full_match) {
128       return DispatchRE2Call(RE2::FullMatchN, input_string, utf8_regexp_,
129                              matched_string, NULL, NULL, NULL, NULL, NULL);
130     } else {
131       return DispatchRE2Call(RE2::PartialMatchN, input_string, utf8_regexp_,
132                              matched_string, NULL, NULL), NULL, NULL, NULL);
133     }
134   }
135 
Replace(string * string_to_process,bool global,const string & replacement_string) const136   virtual bool Replace(string* string_to_process,
137                        bool global,
138                        const string& replacement_string) const {
139     DCHECK(string_to_process);
140     const string re2_replacement_string =
141         TransformRegularExpressionToRE2Syntax(replacement_string);
142     if (global) {
143       return RE2::GlobalReplace(string_to_process, utf8_regexp_,
144                                 re2_replacement_string);
145     } else {
146       return RE2::Replace(string_to_process, utf8_regexp_,
147                           re2_replacement_string);
148     }
149   }
150 
151  private:
152   RE2 utf8_regexp_;
153 };
154 
CreateInput(const string & utf8_input) const155 RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
156   return new RE2RegExpInput(utf8_input);
157 }
158 
CreateRegExp(const string & utf8_regexp) const159 RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
160   return new RE2RegExp(utf8_regexp);
161 }
162 
163 }  // namespace phonenumbers
164 }  // namespace i18n
165