1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/ostreams.h"
6 #include "src/regexp/regexp-ast.h"
7
8 namespace v8 {
9 namespace internal {
10
11 #define MAKE_ACCEPT(Name) \
12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13 return visitor->Visit##Name(this, data); \
14 }
15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 #undef MAKE_ACCEPT
17
18 #define MAKE_TYPE_CASE(Name) \
19 RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
20 bool RegExpTree::Is##Name() { return false; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 #undef MAKE_TYPE_CASE
23
24 #define MAKE_TYPE_CASE(Name) \
25 RegExp##Name* RegExp##Name::As##Name() { return this; } \
26 bool RegExp##Name::Is##Name() { return true; }
27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 #undef MAKE_TYPE_CASE
29
30
31 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32 Interval result = Interval::Empty();
33 for (int i = 0; i < children->length(); i++)
34 result = result.Union(children->at(i)->CaptureRegisters());
35 return result;
36 }
37
38
CaptureRegisters()39 Interval RegExpAlternative::CaptureRegisters() {
40 return ListCaptureRegisters(nodes());
41 }
42
43
CaptureRegisters()44 Interval RegExpDisjunction::CaptureRegisters() {
45 return ListCaptureRegisters(alternatives());
46 }
47
48
CaptureRegisters()49 Interval RegExpLookaround::CaptureRegisters() {
50 return body()->CaptureRegisters();
51 }
52
53
CaptureRegisters()54 Interval RegExpCapture::CaptureRegisters() {
55 Interval self(StartRegister(index()), EndRegister(index()));
56 return self.Union(body()->CaptureRegisters());
57 }
58
59
CaptureRegisters()60 Interval RegExpQuantifier::CaptureRegisters() {
61 return body()->CaptureRegisters();
62 }
63
64
IsAnchoredAtStart()65 bool RegExpAssertion::IsAnchoredAtStart() {
66 return assertion_type() == RegExpAssertion::START_OF_INPUT;
67 }
68
69
IsAnchoredAtEnd()70 bool RegExpAssertion::IsAnchoredAtEnd() {
71 return assertion_type() == RegExpAssertion::END_OF_INPUT;
72 }
73
74
IsAnchoredAtStart()75 bool RegExpAlternative::IsAnchoredAtStart() {
76 ZoneList<RegExpTree*>* nodes = this->nodes();
77 for (int i = 0; i < nodes->length(); i++) {
78 RegExpTree* node = nodes->at(i);
79 if (node->IsAnchoredAtStart()) {
80 return true;
81 }
82 if (node->max_match() > 0) {
83 return false;
84 }
85 }
86 return false;
87 }
88
89
IsAnchoredAtEnd()90 bool RegExpAlternative::IsAnchoredAtEnd() {
91 ZoneList<RegExpTree*>* nodes = this->nodes();
92 for (int i = nodes->length() - 1; i >= 0; i--) {
93 RegExpTree* node = nodes->at(i);
94 if (node->IsAnchoredAtEnd()) {
95 return true;
96 }
97 if (node->max_match() > 0) {
98 return false;
99 }
100 }
101 return false;
102 }
103
104
IsAnchoredAtStart()105 bool RegExpDisjunction::IsAnchoredAtStart() {
106 ZoneList<RegExpTree*>* alternatives = this->alternatives();
107 for (int i = 0; i < alternatives->length(); i++) {
108 if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109 }
110 return true;
111 }
112
113
IsAnchoredAtEnd()114 bool RegExpDisjunction::IsAnchoredAtEnd() {
115 ZoneList<RegExpTree*>* alternatives = this->alternatives();
116 for (int i = 0; i < alternatives->length(); i++) {
117 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118 }
119 return true;
120 }
121
122
IsAnchoredAtStart()123 bool RegExpLookaround::IsAnchoredAtStart() {
124 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125 }
126
127
IsAnchoredAtStart()128 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129
130
IsAnchoredAtEnd()131 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132
133
134 // Convert regular expression trees to a simple sexp representation.
135 // This representation should be different from the input grammar
136 // in as many cases as possible, to make it more difficult for incorrect
137 // parses to look as correct ones which is likely if the input and
138 // output formats are alike.
139 class RegExpUnparser final : public RegExpVisitor {
140 public:
RegExpUnparser(std::ostream & os,Zone * zone)141 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142 void VisitCharacterRange(CharacterRange that);
143 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145 #undef MAKE_CASE
146 private:
147 std::ostream& os_;
148 Zone* zone_;
149 };
150
151
VisitDisjunction(RegExpDisjunction * that,void * data)152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153 os_ << "(|";
154 for (int i = 0; i < that->alternatives()->length(); i++) {
155 os_ << " ";
156 that->alternatives()->at(i)->Accept(this, data);
157 }
158 os_ << ")";
159 return nullptr;
160 }
161
162
VisitAlternative(RegExpAlternative * that,void * data)163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164 os_ << "(:";
165 for (int i = 0; i < that->nodes()->length(); i++) {
166 os_ << " ";
167 that->nodes()->at(i)->Accept(this, data);
168 }
169 os_ << ")";
170 return nullptr;
171 }
172
173
VisitCharacterRange(CharacterRange that)174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175 os_ << AsUC32(that.from());
176 if (!that.IsSingleton()) {
177 os_ << "-" << AsUC32(that.to());
178 }
179 }
180
181
VisitCharacterClass(RegExpCharacterClass * that,void * data)182 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183 void* data) {
184 if (that->is_negated()) os_ << "^";
185 os_ << "[";
186 for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187 if (i > 0) os_ << " ";
188 VisitCharacterRange(that->ranges(zone_)->at(i));
189 }
190 os_ << "]";
191 return nullptr;
192 }
193
194
VisitAssertion(RegExpAssertion * that,void * data)195 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196 switch (that->assertion_type()) {
197 case RegExpAssertion::START_OF_INPUT:
198 os_ << "@^i";
199 break;
200 case RegExpAssertion::END_OF_INPUT:
201 os_ << "@$i";
202 break;
203 case RegExpAssertion::START_OF_LINE:
204 os_ << "@^l";
205 break;
206 case RegExpAssertion::END_OF_LINE:
207 os_ << "@$l";
208 break;
209 case RegExpAssertion::BOUNDARY:
210 os_ << "@b";
211 break;
212 case RegExpAssertion::NON_BOUNDARY:
213 os_ << "@B";
214 break;
215 }
216 return nullptr;
217 }
218
219
VisitAtom(RegExpAtom * that,void * data)220 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221 os_ << "'";
222 Vector<const uc16> chardata = that->data();
223 for (int i = 0; i < chardata.length(); i++) {
224 os_ << AsUC16(chardata[i]);
225 }
226 os_ << "'";
227 return nullptr;
228 }
229
230
VisitText(RegExpText * that,void * data)231 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232 if (that->elements()->length() == 1) {
233 that->elements()->at(0).tree()->Accept(this, data);
234 } else {
235 os_ << "(!";
236 for (int i = 0; i < that->elements()->length(); i++) {
237 os_ << " ";
238 that->elements()->at(i).tree()->Accept(this, data);
239 }
240 os_ << ")";
241 }
242 return nullptr;
243 }
244
245
VisitQuantifier(RegExpQuantifier * that,void * data)246 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247 os_ << "(# " << that->min() << " ";
248 if (that->max() == RegExpTree::kInfinity) {
249 os_ << "- ";
250 } else {
251 os_ << that->max() << " ";
252 }
253 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254 that->body()->Accept(this, data);
255 os_ << ")";
256 return nullptr;
257 }
258
259
VisitCapture(RegExpCapture * that,void * data)260 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261 os_ << "(^ ";
262 that->body()->Accept(this, data);
263 os_ << ")";
264 return nullptr;
265 }
266
VisitGroup(RegExpGroup * that,void * data)267 void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
268 os_ << "(?: ";
269 that->body()->Accept(this, data);
270 os_ << ")";
271 return nullptr;
272 }
273
VisitLookaround(RegExpLookaround * that,void * data)274 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
275 os_ << "(";
276 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
277 os_ << (that->is_positive() ? " + " : " - ");
278 that->body()->Accept(this, data);
279 os_ << ")";
280 return nullptr;
281 }
282
283
VisitBackReference(RegExpBackReference * that,void * data)284 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
285 void* data) {
286 os_ << "(<- " << that->index() << ")";
287 return nullptr;
288 }
289
290
VisitEmpty(RegExpEmpty * that,void * data)291 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
292 os_ << '%';
293 return nullptr;
294 }
295
296
Print(std::ostream & os,Zone * zone)297 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
298 RegExpUnparser unparser(os, zone);
299 Accept(&unparser, nullptr);
300 return os;
301 }
302
303
RegExpDisjunction(ZoneList<RegExpTree * > * alternatives)304 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
305 : alternatives_(alternatives) {
306 DCHECK_LT(1, alternatives->length());
307 RegExpTree* first_alternative = alternatives->at(0);
308 min_match_ = first_alternative->min_match();
309 max_match_ = first_alternative->max_match();
310 for (int i = 1; i < alternatives->length(); i++) {
311 RegExpTree* alternative = alternatives->at(i);
312 min_match_ = Min(min_match_, alternative->min_match());
313 max_match_ = Max(max_match_, alternative->max_match());
314 }
315 }
316
317
IncreaseBy(int previous,int increase)318 static int IncreaseBy(int previous, int increase) {
319 if (RegExpTree::kInfinity - previous < increase) {
320 return RegExpTree::kInfinity;
321 } else {
322 return previous + increase;
323 }
324 }
325
326
RegExpAlternative(ZoneList<RegExpTree * > * nodes)327 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
328 : nodes_(nodes) {
329 DCHECK_LT(1, nodes->length());
330 min_match_ = 0;
331 max_match_ = 0;
332 for (int i = 0; i < nodes->length(); i++) {
333 RegExpTree* node = nodes->at(i);
334 int node_min_match = node->min_match();
335 min_match_ = IncreaseBy(min_match_, node_min_match);
336 int node_max_match = node->max_match();
337 max_match_ = IncreaseBy(max_match_, node_max_match);
338 }
339 }
340
341
342 } // namespace internal
343 } // namespace v8
344