1 // Copyright 2016 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_AST_H_ 6 #define V8_REGEXP_REGEXP_AST_H_ 7 8 #include "src/objects/js-regexp.h" 9 #include "src/objects/objects.h" 10 #include "src/objects/string.h" 11 #include "src/utils/utils.h" 12 #include "src/zone/zone-containers.h" 13 #include "src/zone/zone-list.h" 14 #include "src/zone/zone.h" 15 16 namespace v8 { 17 namespace internal { 18 19 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ 20 VISIT(Disjunction) \ 21 VISIT(Alternative) \ 22 VISIT(Assertion) \ 23 VISIT(CharacterClass) \ 24 VISIT(Atom) \ 25 VISIT(Quantifier) \ 26 VISIT(Capture) \ 27 VISIT(Group) \ 28 VISIT(Lookaround) \ 29 VISIT(BackReference) \ 30 VISIT(Empty) \ 31 VISIT(Text) 32 33 #define FORWARD_DECLARE(Name) class RegExp##Name; 34 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) 35 #undef FORWARD_DECLARE 36 37 class RegExpCompiler; 38 class RegExpNode; 39 class RegExpTree; 40 41 class RegExpVisitor { 42 public: 43 virtual ~RegExpVisitor() = default; 44 #define MAKE_CASE(Name) \ 45 virtual void* Visit##Name(RegExp##Name*, void* data) = 0; 46 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 47 #undef MAKE_CASE 48 }; 49 50 51 // A simple closed interval. 52 class Interval { 53 public: Interval()54 Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size(). Interval(int from,int to)55 Interval(int from, int to) : from_(from), to_(to) {} Union(Interval that)56 Interval Union(Interval that) { 57 if (that.from_ == kNone) 58 return *this; 59 else if (from_ == kNone) 60 return that; 61 else 62 return Interval(Min(from_, that.from_), Max(to_, that.to_)); 63 } 64 Contains(int value)65 bool Contains(int value) { return (from_ <= value) && (value <= to_); } is_empty()66 bool is_empty() { return from_ == kNone; } from()67 int from() const { return from_; } to()68 int to() const { return to_; } size()69 int size() const { return to_ - from_ + 1; } 70 Empty()71 static Interval Empty() { return Interval(); } 72 73 static constexpr int kNone = -1; 74 75 private: 76 int from_; 77 int to_; 78 }; 79 80 // Represents code points (with values up to 0x10FFFF) in the range from from_ 81 // to to_, both ends are inclusive. 82 class CharacterRange { 83 public: CharacterRange()84 CharacterRange() : from_(0), to_(0) {} 85 // For compatibility with the CHECK_OK macro CharacterRange(void * null)86 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT 87 V8_EXPORT_PRIVATE static void AddClassEscape(char type, 88 ZoneList<CharacterRange>* ranges, 89 Zone* zone); 90 // Add class escapes. Add case equivalent closure for \w and \W if necessary. 91 V8_EXPORT_PRIVATE static void AddClassEscape( 92 char type, ZoneList<CharacterRange>* ranges, 93 bool add_unicode_case_equivalents, Zone* zone); 94 static Vector<const int> GetWordBounds(); Singleton(uc32 value)95 static inline CharacterRange Singleton(uc32 value) { 96 return CharacterRange(value, value); 97 } Range(uc32 from,uc32 to)98 static inline CharacterRange Range(uc32 from, uc32 to) { 99 DCHECK(0 <= from && to <= String::kMaxCodePoint); 100 DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); 101 return CharacterRange(from, to); 102 } Everything()103 static inline CharacterRange Everything() { 104 return CharacterRange(0, String::kMaxCodePoint); 105 } List(Zone * zone,CharacterRange range)106 static inline ZoneList<CharacterRange>* List(Zone* zone, 107 CharacterRange range) { 108 ZoneList<CharacterRange>* list = 109 zone->New<ZoneList<CharacterRange>>(1, zone); 110 list->Add(range, zone); 111 return list; 112 } Contains(uc32 i)113 bool Contains(uc32 i) { return from_ <= i && i <= to_; } from()114 uc32 from() const { return from_; } set_from(uc32 value)115 void set_from(uc32 value) { from_ = value; } to()116 uc32 to() const { return to_; } set_to(uc32 value)117 void set_to(uc32 value) { to_ = value; } is_valid()118 bool is_valid() { return from_ <= to_; } IsEverything(uc32 max)119 bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; } IsSingleton()120 bool IsSingleton() { return (from_ == to_); } 121 V8_EXPORT_PRIVATE static void AddCaseEquivalents( 122 Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges, 123 bool is_one_byte); 124 // Whether a range list is in canonical form: Ranges ordered by from value, 125 // and ranges non-overlapping and non-adjacent. 126 V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges); 127 // Convert range list to canonical form. The characters covered by the ranges 128 // will still be the same, but no character is in more than one range, and 129 // adjacent ranges are merged. The resulting list may be shorter than the 130 // original, but cannot be longer. 131 static void Canonicalize(ZoneList<CharacterRange>* ranges); 132 // Negate the contents of a character range in canonical form. 133 static void Negate(ZoneList<CharacterRange>* src, 134 ZoneList<CharacterRange>* dst, Zone* zone); 135 static const int kStartMarker = (1 << 24); 136 static const int kPayloadMask = (1 << 24) - 1; 137 138 private: CharacterRange(uc32 from,uc32 to)139 CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} 140 141 uc32 from_; 142 uc32 to_; 143 }; 144 145 class CharacterSet final { 146 public: CharacterSet(uc16 standard_set_type)147 explicit CharacterSet(uc16 standard_set_type) 148 : ranges_(nullptr), standard_set_type_(standard_set_type) {} CharacterSet(ZoneList<CharacterRange> * ranges)149 explicit CharacterSet(ZoneList<CharacterRange>* ranges) 150 : ranges_(ranges), standard_set_type_(0) {} 151 ZoneList<CharacterRange>* ranges(Zone* zone); standard_set_type()152 uc16 standard_set_type() const { return standard_set_type_; } set_standard_set_type(uc16 special_set_type)153 void set_standard_set_type(uc16 special_set_type) { 154 standard_set_type_ = special_set_type; 155 } is_standard()156 bool is_standard() { return standard_set_type_ != 0; } 157 V8_EXPORT_PRIVATE void Canonicalize(); 158 159 private: 160 ZoneList<CharacterRange>* ranges_; 161 // If non-zero, the value represents a standard set (e.g., all whitespace 162 // characters) without having to expand the ranges. 163 uc16 standard_set_type_; 164 }; 165 166 class TextElement final { 167 public: 168 enum TextType { ATOM, CHAR_CLASS }; 169 170 static TextElement Atom(RegExpAtom* atom); 171 static TextElement CharClass(RegExpCharacterClass* char_class); 172 cp_offset()173 int cp_offset() const { return cp_offset_; } set_cp_offset(int cp_offset)174 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } 175 int length() const; 176 text_type()177 TextType text_type() const { return text_type_; } 178 tree()179 RegExpTree* tree() const { return tree_; } 180 atom()181 RegExpAtom* atom() const { 182 DCHECK(text_type() == ATOM); 183 return reinterpret_cast<RegExpAtom*>(tree()); 184 } 185 char_class()186 RegExpCharacterClass* char_class() const { 187 DCHECK(text_type() == CHAR_CLASS); 188 return reinterpret_cast<RegExpCharacterClass*>(tree()); 189 } 190 191 private: TextElement(TextType text_type,RegExpTree * tree)192 TextElement(TextType text_type, RegExpTree* tree) 193 : cp_offset_(-1), text_type_(text_type), tree_(tree) {} 194 195 int cp_offset_; 196 TextType text_type_; 197 RegExpTree* tree_; 198 }; 199 200 201 class RegExpTree : public ZoneObject { 202 public: 203 static const int kInfinity = kMaxInt; 204 virtual ~RegExpTree() = default; 205 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; 206 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 207 RegExpNode* on_success) = 0; IsTextElement()208 virtual bool IsTextElement() { return false; } IsAnchoredAtStart()209 virtual bool IsAnchoredAtStart() { return false; } IsAnchoredAtEnd()210 virtual bool IsAnchoredAtEnd() { return false; } 211 virtual int min_match() = 0; 212 virtual int max_match() = 0; 213 // Returns the interval of registers used for captures within this 214 // expression. CaptureRegisters()215 virtual Interval CaptureRegisters() { return Interval::Empty(); } 216 virtual void AppendToText(RegExpText* text, Zone* zone); 217 V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, 218 Zone* zone); // NOLINT 219 #define MAKE_ASTYPE(Name) \ 220 virtual RegExp##Name* As##Name(); \ 221 virtual bool Is##Name(); 222 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) 223 #undef MAKE_ASTYPE 224 }; 225 226 227 class RegExpDisjunction final : public RegExpTree { 228 public: 229 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); 230 void* Accept(RegExpVisitor* visitor, void* data) override; 231 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 232 RegExpDisjunction* AsDisjunction() override; 233 Interval CaptureRegisters() override; 234 bool IsDisjunction() override; 235 bool IsAnchoredAtStart() override; 236 bool IsAnchoredAtEnd() override; min_match()237 int min_match() override { return min_match_; } max_match()238 int max_match() override { return max_match_; } alternatives()239 ZoneList<RegExpTree*>* alternatives() { return alternatives_; } 240 241 private: 242 bool SortConsecutiveAtoms(RegExpCompiler* compiler); 243 void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); 244 void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); 245 ZoneList<RegExpTree*>* alternatives_; 246 int min_match_; 247 int max_match_; 248 }; 249 250 251 class RegExpAlternative final : public RegExpTree { 252 public: 253 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); 254 void* Accept(RegExpVisitor* visitor, void* data) override; 255 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 256 RegExpAlternative* AsAlternative() override; 257 Interval CaptureRegisters() override; 258 bool IsAlternative() override; 259 bool IsAnchoredAtStart() override; 260 bool IsAnchoredAtEnd() override; min_match()261 int min_match() override { return min_match_; } max_match()262 int max_match() override { return max_match_; } nodes()263 ZoneList<RegExpTree*>* nodes() { return nodes_; } 264 265 private: 266 ZoneList<RegExpTree*>* nodes_; 267 int min_match_; 268 int max_match_; 269 }; 270 271 272 class RegExpAssertion final : public RegExpTree { 273 public: 274 enum AssertionType { 275 START_OF_LINE = 0, 276 START_OF_INPUT = 1, 277 END_OF_LINE = 2, 278 END_OF_INPUT = 3, 279 BOUNDARY = 4, 280 NON_BOUNDARY = 5, 281 LAST_TYPE = NON_BOUNDARY, 282 }; RegExpAssertion(AssertionType type,JSRegExp::Flags flags)283 RegExpAssertion(AssertionType type, JSRegExp::Flags flags) 284 : assertion_type_(type), flags_(flags) {} 285 void* Accept(RegExpVisitor* visitor, void* data) override; 286 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 287 RegExpAssertion* AsAssertion() override; 288 bool IsAssertion() override; 289 bool IsAnchoredAtStart() override; 290 bool IsAnchoredAtEnd() override; min_match()291 int min_match() override { return 0; } max_match()292 int max_match() override { return 0; } assertion_type()293 AssertionType assertion_type() const { return assertion_type_; } flags()294 JSRegExp::Flags flags() const { return flags_; } 295 296 private: 297 const AssertionType assertion_type_; 298 const JSRegExp::Flags flags_; 299 }; 300 301 302 class RegExpCharacterClass final : public RegExpTree { 303 public: 304 // NEGATED: The character class is negated and should match everything but 305 // the specified ranges. 306 // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split 307 // surrogate and should not be unicode-desugared (crbug.com/641091). 308 enum Flag { 309 NEGATED = 1 << 0, 310 CONTAINS_SPLIT_SURROGATE = 1 << 1, 311 }; 312 using CharacterClassFlags = base::Flags<Flag>; 313 314 RegExpCharacterClass( 315 Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags, 316 CharacterClassFlags character_class_flags = CharacterClassFlags()) set_(ranges)317 : set_(ranges), 318 flags_(flags), 319 character_class_flags_(character_class_flags) { 320 // Convert the empty set of ranges to the negated Everything() range. 321 if (ranges->is_empty()) { 322 ranges->Add(CharacterRange::Everything(), zone); 323 character_class_flags_ ^= NEGATED; 324 } 325 } RegExpCharacterClass(uc16 type,JSRegExp::Flags flags)326 RegExpCharacterClass(uc16 type, JSRegExp::Flags flags) 327 : set_(type), 328 flags_(flags), 329 character_class_flags_(CharacterClassFlags()) {} 330 void* Accept(RegExpVisitor* visitor, void* data) override; 331 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 332 RegExpCharacterClass* AsCharacterClass() override; 333 bool IsCharacterClass() override; IsTextElement()334 bool IsTextElement() override { return true; } min_match()335 int min_match() override { return 1; } 336 // The character class may match two code units for unicode regexps. 337 // TODO(yangguo): we should split this class for usage in TextElement, and 338 // make max_match() dependent on the character class content. max_match()339 int max_match() override { return 2; } 340 void AppendToText(RegExpText* text, Zone* zone) override; character_set()341 CharacterSet character_set() { return set_; } 342 // TODO(lrn): Remove need for complex version if is_standard that 343 // recognizes a mangled standard set and just do { return set_.is_special(); } 344 bool is_standard(Zone* zone); 345 // Returns a value representing the standard character set if is_standard() 346 // returns true. 347 // Currently used values are: 348 // s : unicode whitespace 349 // S : unicode non-whitespace 350 // w : ASCII word character (digit, letter, underscore) 351 // W : non-ASCII word character 352 // d : ASCII digit 353 // D : non-ASCII digit 354 // . : non-newline 355 // * : All characters, for advancing unanchored regexp standard_type()356 uc16 standard_type() const { return set_.standard_set_type(); } ranges(Zone * zone)357 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } is_negated()358 bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; } flags()359 JSRegExp::Flags flags() const { return flags_; } contains_split_surrogate()360 bool contains_split_surrogate() const { 361 return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0; 362 } 363 364 private: 365 CharacterSet set_; 366 const JSRegExp::Flags flags_; 367 CharacterClassFlags character_class_flags_; 368 }; 369 370 371 class RegExpAtom final : public RegExpTree { 372 public: RegExpAtom(Vector<const uc16> data,JSRegExp::Flags flags)373 explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags) 374 : data_(data), flags_(flags) {} 375 void* Accept(RegExpVisitor* visitor, void* data) override; 376 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 377 RegExpAtom* AsAtom() override; 378 bool IsAtom() override; IsTextElement()379 bool IsTextElement() override { return true; } min_match()380 int min_match() override { return data_.length(); } max_match()381 int max_match() override { return data_.length(); } 382 void AppendToText(RegExpText* text, Zone* zone) override; data()383 Vector<const uc16> data() { return data_; } length()384 int length() { return data_.length(); } flags()385 JSRegExp::Flags flags() const { return flags_; } ignore_case()386 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } 387 388 private: 389 Vector<const uc16> data_; 390 const JSRegExp::Flags flags_; 391 }; 392 393 394 class RegExpText final : public RegExpTree { 395 public: RegExpText(Zone * zone)396 explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {} 397 void* Accept(RegExpVisitor* visitor, void* data) override; 398 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 399 RegExpText* AsText() override; 400 bool IsText() override; IsTextElement()401 bool IsTextElement() override { return true; } min_match()402 int min_match() override { return length_; } max_match()403 int max_match() override { return length_; } 404 void AppendToText(RegExpText* text, Zone* zone) override; AddElement(TextElement elm,Zone * zone)405 void AddElement(TextElement elm, Zone* zone) { 406 elements_.Add(elm, zone); 407 length_ += elm.length(); 408 } elements()409 ZoneList<TextElement>* elements() { return &elements_; } 410 411 private: 412 ZoneList<TextElement> elements_; 413 int length_; 414 }; 415 416 417 class RegExpQuantifier final : public RegExpTree { 418 public: 419 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; RegExpQuantifier(int min,int max,QuantifierType type,RegExpTree * body)420 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) 421 : body_(body), 422 min_(min), 423 max_(max), 424 quantifier_type_(type) { 425 if (min > 0 && body->min_match() > kInfinity / min) { 426 min_match_ = kInfinity; 427 } else { 428 min_match_ = min * body->min_match(); 429 } 430 if (max > 0 && body->max_match() > kInfinity / max) { 431 max_match_ = kInfinity; 432 } else { 433 max_match_ = max * body->max_match(); 434 } 435 } 436 void* Accept(RegExpVisitor* visitor, void* data) override; 437 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 438 static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, 439 RegExpCompiler* compiler, RegExpNode* on_success, 440 bool not_at_start = false); 441 RegExpQuantifier* AsQuantifier() override; 442 Interval CaptureRegisters() override; 443 bool IsQuantifier() override; min_match()444 int min_match() override { return min_match_; } max_match()445 int max_match() override { return max_match_; } min()446 int min() const { return min_; } max()447 int max() const { return max_; } quantifier_type()448 QuantifierType quantifier_type() const { return quantifier_type_; } is_possessive()449 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; } is_non_greedy()450 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } is_greedy()451 bool is_greedy() const { return quantifier_type_ == GREEDY; } body()452 RegExpTree* body() { return body_; } 453 454 private: 455 RegExpTree* body_; 456 int min_; 457 int max_; 458 int min_match_; 459 int max_match_; 460 QuantifierType quantifier_type_; 461 }; 462 463 464 class RegExpCapture final : public RegExpTree { 465 public: RegExpCapture(int index)466 explicit RegExpCapture(int index) 467 : body_(nullptr), 468 index_(index), 469 min_match_(0), 470 max_match_(0), 471 name_(nullptr) {} 472 void* Accept(RegExpVisitor* visitor, void* data) override; 473 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 474 static RegExpNode* ToNode(RegExpTree* body, int index, 475 RegExpCompiler* compiler, RegExpNode* on_success); 476 RegExpCapture* AsCapture() override; 477 bool IsAnchoredAtStart() override; 478 bool IsAnchoredAtEnd() override; 479 Interval CaptureRegisters() override; 480 bool IsCapture() override; min_match()481 int min_match() override { return min_match_; } max_match()482 int max_match() override { return max_match_; } body()483 RegExpTree* body() { return body_; } set_body(RegExpTree * body)484 void set_body(RegExpTree* body) { 485 body_ = body; 486 min_match_ = body->min_match(); 487 max_match_ = body->max_match(); 488 } index()489 int index() const { return index_; } name()490 const ZoneVector<uc16>* name() const { return name_; } set_name(const ZoneVector<uc16> * name)491 void set_name(const ZoneVector<uc16>* name) { name_ = name; } StartRegister(int index)492 static int StartRegister(int index) { return index * 2; } EndRegister(int index)493 static int EndRegister(int index) { return index * 2 + 1; } 494 495 private: 496 RegExpTree* body_; 497 int index_; 498 int min_match_; 499 int max_match_; 500 const ZoneVector<uc16>* name_; 501 }; 502 503 class RegExpGroup final : public RegExpTree { 504 public: RegExpGroup(RegExpTree * body)505 explicit RegExpGroup(RegExpTree* body) 506 : body_(body), 507 min_match_(body->min_match()), 508 max_match_(body->max_match()) {} 509 void* Accept(RegExpVisitor* visitor, void* data) override; ToNode(RegExpCompiler * compiler,RegExpNode * on_success)510 RegExpNode* ToNode(RegExpCompiler* compiler, 511 RegExpNode* on_success) override { 512 return body_->ToNode(compiler, on_success); 513 } 514 RegExpGroup* AsGroup() override; IsAnchoredAtStart()515 bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); } IsAnchoredAtEnd()516 bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); } 517 bool IsGroup() override; min_match()518 int min_match() override { return min_match_; } max_match()519 int max_match() override { return max_match_; } CaptureRegisters()520 Interval CaptureRegisters() override { return body_->CaptureRegisters(); } body()521 RegExpTree* body() { return body_; } 522 523 private: 524 RegExpTree* body_; 525 int min_match_; 526 int max_match_; 527 }; 528 529 class RegExpLookaround final : public RegExpTree { 530 public: 531 enum Type { LOOKAHEAD, LOOKBEHIND }; 532 RegExpLookaround(RegExpTree * body,bool is_positive,int capture_count,int capture_from,Type type)533 RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, 534 int capture_from, Type type) 535 : body_(body), 536 is_positive_(is_positive), 537 capture_count_(capture_count), 538 capture_from_(capture_from), 539 type_(type) {} 540 541 void* Accept(RegExpVisitor* visitor, void* data) override; 542 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 543 RegExpLookaround* AsLookaround() override; 544 Interval CaptureRegisters() override; 545 bool IsLookaround() override; 546 bool IsAnchoredAtStart() override; min_match()547 int min_match() override { return 0; } max_match()548 int max_match() override { return 0; } body()549 RegExpTree* body() { return body_; } is_positive()550 bool is_positive() { return is_positive_; } capture_count()551 int capture_count() { return capture_count_; } capture_from()552 int capture_from() { return capture_from_; } type()553 Type type() { return type_; } 554 555 class Builder { 556 public: 557 Builder(bool is_positive, RegExpNode* on_success, 558 int stack_pointer_register, int position_register, 559 int capture_register_count = 0, int capture_register_start = 0); on_match_success()560 RegExpNode* on_match_success() { return on_match_success_; } 561 RegExpNode* ForMatch(RegExpNode* match); 562 563 private: 564 bool is_positive_; 565 RegExpNode* on_match_success_; 566 RegExpNode* on_success_; 567 int stack_pointer_register_; 568 int position_register_; 569 }; 570 571 private: 572 RegExpTree* body_; 573 bool is_positive_; 574 int capture_count_; 575 int capture_from_; 576 Type type_; 577 }; 578 579 580 class RegExpBackReference final : public RegExpTree { 581 public: RegExpBackReference(JSRegExp::Flags flags)582 explicit RegExpBackReference(JSRegExp::Flags flags) 583 : capture_(nullptr), name_(nullptr), flags_(flags) {} RegExpBackReference(RegExpCapture * capture,JSRegExp::Flags flags)584 RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags) 585 : capture_(capture), name_(nullptr), flags_(flags) {} 586 void* Accept(RegExpVisitor* visitor, void* data) override; 587 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 588 RegExpBackReference* AsBackReference() override; 589 bool IsBackReference() override; min_match()590 int min_match() override { return 0; } 591 // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite 592 // recursion, we give up. Ignorance is bliss. max_match()593 int max_match() override { return kInfinity; } index()594 int index() { return capture_->index(); } capture()595 RegExpCapture* capture() { return capture_; } set_capture(RegExpCapture * capture)596 void set_capture(RegExpCapture* capture) { capture_ = capture; } name()597 const ZoneVector<uc16>* name() const { return name_; } set_name(const ZoneVector<uc16> * name)598 void set_name(const ZoneVector<uc16>* name) { name_ = name; } 599 600 private: 601 RegExpCapture* capture_; 602 const ZoneVector<uc16>* name_; 603 const JSRegExp::Flags flags_; 604 }; 605 606 607 class RegExpEmpty final : public RegExpTree { 608 public: 609 RegExpEmpty() = default; 610 void* Accept(RegExpVisitor* visitor, void* data) override; 611 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 612 RegExpEmpty* AsEmpty() override; 613 bool IsEmpty() override; min_match()614 int min_match() override { return 0; } max_match()615 int max_match() override { return 0; } 616 }; 617 618 } // namespace internal 619 } // namespace v8 620 621 #endif // V8_REGEXP_REGEXP_AST_H_ 622