• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/regexp/regexp-ast.h"
6 #include "src/utils/ostreams.h"
7 
8 namespace v8 {
9 namespace internal {
10 
11 #define MAKE_ACCEPT(Name)                                          \
12   void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13     return visitor->Visit##Name(this, data);                       \
14   }
15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 #undef MAKE_ACCEPT
17 
18 #define MAKE_TYPE_CASE(Name)                               \
19   RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
20   bool RegExpTree::Is##Name() { return false; }
21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 #undef MAKE_TYPE_CASE
23 
24 #define MAKE_TYPE_CASE(Name)                              \
25   RegExp##Name* RegExp##Name::As##Name() { return this; } \
26   bool RegExp##Name::Is##Name() { return true; }
27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 #undef MAKE_TYPE_CASE
29 
30 namespace {
31 
ListCaptureRegisters(ZoneList<RegExpTree * > * children)32 Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
33   Interval result = Interval::Empty();
34   for (int i = 0; i < children->length(); i++)
35     result = result.Union(children->at(i)->CaptureRegisters());
36   return result;
37 }
38 
39 }  // namespace
40 
CaptureRegisters()41 Interval RegExpAlternative::CaptureRegisters() {
42   return ListCaptureRegisters(nodes());
43 }
44 
45 
CaptureRegisters()46 Interval RegExpDisjunction::CaptureRegisters() {
47   return ListCaptureRegisters(alternatives());
48 }
49 
50 
CaptureRegisters()51 Interval RegExpLookaround::CaptureRegisters() {
52   return body()->CaptureRegisters();
53 }
54 
55 
CaptureRegisters()56 Interval RegExpCapture::CaptureRegisters() {
57   Interval self(StartRegister(index()), EndRegister(index()));
58   return self.Union(body()->CaptureRegisters());
59 }
60 
61 
CaptureRegisters()62 Interval RegExpQuantifier::CaptureRegisters() {
63   return body()->CaptureRegisters();
64 }
65 
66 
IsAnchoredAtStart()67 bool RegExpAssertion::IsAnchoredAtStart() {
68   return assertion_type() == RegExpAssertion::Type::START_OF_INPUT;
69 }
70 
71 
IsAnchoredAtEnd()72 bool RegExpAssertion::IsAnchoredAtEnd() {
73   return assertion_type() == RegExpAssertion::Type::END_OF_INPUT;
74 }
75 
76 
IsAnchoredAtStart()77 bool RegExpAlternative::IsAnchoredAtStart() {
78   ZoneList<RegExpTree*>* nodes = this->nodes();
79   for (int i = 0; i < nodes->length(); i++) {
80     RegExpTree* node = nodes->at(i);
81     if (node->IsAnchoredAtStart()) {
82       return true;
83     }
84     if (node->max_match() > 0) {
85       return false;
86     }
87   }
88   return false;
89 }
90 
91 
IsAnchoredAtEnd()92 bool RegExpAlternative::IsAnchoredAtEnd() {
93   ZoneList<RegExpTree*>* nodes = this->nodes();
94   for (int i = nodes->length() - 1; i >= 0; i--) {
95     RegExpTree* node = nodes->at(i);
96     if (node->IsAnchoredAtEnd()) {
97       return true;
98     }
99     if (node->max_match() > 0) {
100       return false;
101     }
102   }
103   return false;
104 }
105 
106 
IsAnchoredAtStart()107 bool RegExpDisjunction::IsAnchoredAtStart() {
108   ZoneList<RegExpTree*>* alternatives = this->alternatives();
109   for (int i = 0; i < alternatives->length(); i++) {
110     if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
111   }
112   return true;
113 }
114 
115 
IsAnchoredAtEnd()116 bool RegExpDisjunction::IsAnchoredAtEnd() {
117   ZoneList<RegExpTree*>* alternatives = this->alternatives();
118   for (int i = 0; i < alternatives->length(); i++) {
119     if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
120   }
121   return true;
122 }
123 
124 
IsAnchoredAtStart()125 bool RegExpLookaround::IsAnchoredAtStart() {
126   return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
127 }
128 
129 
IsAnchoredAtStart()130 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
131 
132 
IsAnchoredAtEnd()133 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
134 
135 namespace {
136 
137 // Convert regular expression trees to a simple sexp representation.
138 // This representation should be different from the input grammar
139 // in as many cases as possible, to make it more difficult for incorrect
140 // parses to look as correct ones which is likely if the input and
141 // output formats are alike.
142 class RegExpUnparser final : public RegExpVisitor {
143  public:
RegExpUnparser(std::ostream & os,Zone * zone)144   RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
145   void VisitCharacterRange(CharacterRange that);
146 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
147   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
148 #undef MAKE_CASE
149  private:
150   std::ostream& os_;
151   Zone* zone_;
152 };
153 
154 }  // namespace
155 
VisitDisjunction(RegExpDisjunction * that,void * data)156 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
157   os_ << "(|";
158   for (int i = 0; i < that->alternatives()->length(); i++) {
159     os_ << " ";
160     that->alternatives()->at(i)->Accept(this, data);
161   }
162   os_ << ")";
163   return nullptr;
164 }
165 
166 
VisitAlternative(RegExpAlternative * that,void * data)167 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
168   os_ << "(:";
169   for (int i = 0; i < that->nodes()->length(); i++) {
170     os_ << " ";
171     that->nodes()->at(i)->Accept(this, data);
172   }
173   os_ << ")";
174   return nullptr;
175 }
176 
177 
VisitCharacterRange(CharacterRange that)178 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
179   os_ << AsUC32(that.from());
180   if (!that.IsSingleton()) {
181     os_ << "-" << AsUC32(that.to());
182   }
183 }
184 
185 
VisitCharacterClass(RegExpCharacterClass * that,void * data)186 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
187                                           void* data) {
188   if (that->is_negated()) os_ << "^";
189   os_ << "[";
190   for (int i = 0; i < that->ranges(zone_)->length(); i++) {
191     if (i > 0) os_ << " ";
192     VisitCharacterRange(that->ranges(zone_)->at(i));
193   }
194   os_ << "]";
195   return nullptr;
196 }
197 
198 
VisitAssertion(RegExpAssertion * that,void * data)199 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
200   switch (that->assertion_type()) {
201     case RegExpAssertion::Type::START_OF_INPUT:
202       os_ << "@^i";
203       break;
204     case RegExpAssertion::Type::END_OF_INPUT:
205       os_ << "@$i";
206       break;
207     case RegExpAssertion::Type::START_OF_LINE:
208       os_ << "@^l";
209       break;
210     case RegExpAssertion::Type::END_OF_LINE:
211       os_ << "@$l";
212       break;
213     case RegExpAssertion::Type::BOUNDARY:
214       os_ << "@b";
215       break;
216     case RegExpAssertion::Type::NON_BOUNDARY:
217       os_ << "@B";
218       break;
219   }
220   return nullptr;
221 }
222 
223 
VisitAtom(RegExpAtom * that,void * data)224 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
225   os_ << "'";
226   base::Vector<const base::uc16> chardata = that->data();
227   for (int i = 0; i < chardata.length(); i++) {
228     os_ << AsUC16(chardata[i]);
229   }
230   os_ << "'";
231   return nullptr;
232 }
233 
234 
VisitText(RegExpText * that,void * data)235 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
236   if (that->elements()->length() == 1) {
237     that->elements()->at(0).tree()->Accept(this, data);
238   } else {
239     os_ << "(!";
240     for (int i = 0; i < that->elements()->length(); i++) {
241       os_ << " ";
242       that->elements()->at(i).tree()->Accept(this, data);
243     }
244     os_ << ")";
245   }
246   return nullptr;
247 }
248 
249 
VisitQuantifier(RegExpQuantifier * that,void * data)250 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
251   os_ << "(# " << that->min() << " ";
252   if (that->max() == RegExpTree::kInfinity) {
253     os_ << "- ";
254   } else {
255     os_ << that->max() << " ";
256   }
257   os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
258   that->body()->Accept(this, data);
259   os_ << ")";
260   return nullptr;
261 }
262 
263 
VisitCapture(RegExpCapture * that,void * data)264 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
265   os_ << "(^ ";
266   that->body()->Accept(this, data);
267   os_ << ")";
268   return nullptr;
269 }
270 
VisitGroup(RegExpGroup * that,void * data)271 void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
272   os_ << "(?: ";
273   that->body()->Accept(this, data);
274   os_ << ")";
275   return nullptr;
276 }
277 
VisitLookaround(RegExpLookaround * that,void * data)278 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
279   os_ << "(";
280   os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
281   os_ << (that->is_positive() ? " + " : " - ");
282   that->body()->Accept(this, data);
283   os_ << ")";
284   return nullptr;
285 }
286 
287 
VisitBackReference(RegExpBackReference * that,void * data)288 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
289                                          void* data) {
290   os_ << "(<- " << that->index() << ")";
291   return nullptr;
292 }
293 
294 
VisitEmpty(RegExpEmpty * that,void * data)295 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
296   os_ << '%';
297   return nullptr;
298 }
299 
Print(std::ostream & os,Zone * zone)300 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {
301   RegExpUnparser unparser(os, zone);
302   Accept(&unparser, nullptr);
303   return os;
304 }
305 
RegExpDisjunction(ZoneList<RegExpTree * > * alternatives)306 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
307     : alternatives_(alternatives) {
308   DCHECK_LT(1, alternatives->length());
309   RegExpTree* first_alternative = alternatives->at(0);
310   min_match_ = first_alternative->min_match();
311   max_match_ = first_alternative->max_match();
312   for (int i = 1; i < alternatives->length(); i++) {
313     RegExpTree* alternative = alternatives->at(i);
314     min_match_ = std::min(min_match_, alternative->min_match());
315     max_match_ = std::max(max_match_, alternative->max_match());
316   }
317 }
318 
319 namespace {
320 
IncreaseBy(int previous,int increase)321 int IncreaseBy(int previous, int increase) {
322   if (RegExpTree::kInfinity - previous < increase) {
323     return RegExpTree::kInfinity;
324   } else {
325     return previous + increase;
326   }
327 }
328 
329 }  // namespace
330 
RegExpAlternative(ZoneList<RegExpTree * > * nodes)331 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
332     : nodes_(nodes) {
333   DCHECK_LT(1, nodes->length());
334   min_match_ = 0;
335   max_match_ = 0;
336   for (int i = 0; i < nodes->length(); i++) {
337     RegExpTree* node = nodes->at(i);
338     int node_min_match = node->min_match();
339     min_match_ = IncreaseBy(min_match_, node_min_match);
340     int node_max_match = node->max_match();
341     max_match_ = IncreaseBy(max_match_, node_max_match);
342   }
343 }
344 
345 
346 }  // namespace internal
347 }  // namespace v8
348