• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_AST_H_
6 #define V8_REGEXP_REGEXP_AST_H_
7 
8 #include "src/objects/js-regexp.h"
9 #include "src/objects/objects.h"
10 #include "src/objects/string.h"
11 #include "src/utils/utils.h"
12 #include "src/zone/zone-containers.h"
13 #include "src/zone/zone-list.h"
14 #include "src/zone/zone.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
20   VISIT(Disjunction)                      \
21   VISIT(Alternative)                      \
22   VISIT(Assertion)                        \
23   VISIT(CharacterClass)                   \
24   VISIT(Atom)                             \
25   VISIT(Quantifier)                       \
26   VISIT(Capture)                          \
27   VISIT(Group)                            \
28   VISIT(Lookaround)                       \
29   VISIT(BackReference)                    \
30   VISIT(Empty)                            \
31   VISIT(Text)
32 
33 #define FORWARD_DECLARE(Name) class RegExp##Name;
34 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
35 #undef FORWARD_DECLARE
36 
37 class RegExpCompiler;
38 class RegExpNode;
39 class RegExpTree;
40 
41 class RegExpVisitor {
42  public:
43   virtual ~RegExpVisitor() = default;
44 #define MAKE_CASE(Name) \
45   virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
46   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
47 #undef MAKE_CASE
48 };
49 
50 
51 // A simple closed interval.
52 class Interval {
53  public:
Interval()54   Interval() : from_(kNone), to_(kNone - 1) {}  // '- 1' for branchless size().
Interval(int from,int to)55   Interval(int from, int to) : from_(from), to_(to) {}
Union(Interval that)56   Interval Union(Interval that) {
57     if (that.from_ == kNone)
58       return *this;
59     else if (from_ == kNone)
60       return that;
61     else
62       return Interval(Min(from_, that.from_), Max(to_, that.to_));
63   }
64 
Contains(int value)65   bool Contains(int value) { return (from_ <= value) && (value <= to_); }
is_empty()66   bool is_empty() { return from_ == kNone; }
from()67   int from() const { return from_; }
to()68   int to() const { return to_; }
size()69   int size() const { return to_ - from_ + 1; }
70 
Empty()71   static Interval Empty() { return Interval(); }
72 
73   static constexpr int kNone = -1;
74 
75  private:
76   int from_;
77   int to_;
78 };
79 
80 // Represents code points (with values up to 0x10FFFF) in the range from from_
81 // to to_, both ends are inclusive.
82 class CharacterRange {
83  public:
CharacterRange()84   CharacterRange() : from_(0), to_(0) {}
85   // For compatibility with the CHECK_OK macro
CharacterRange(void * null)86   CharacterRange(void* null) { DCHECK_NULL(null); }  // NOLINT
87   V8_EXPORT_PRIVATE static void AddClassEscape(char type,
88                                                ZoneList<CharacterRange>* ranges,
89                                                Zone* zone);
90   // Add class escapes. Add case equivalent closure for \w and \W if necessary.
91   V8_EXPORT_PRIVATE static void AddClassEscape(
92       char type, ZoneList<CharacterRange>* ranges,
93       bool add_unicode_case_equivalents, Zone* zone);
94   static Vector<const int> GetWordBounds();
Singleton(uc32 value)95   static inline CharacterRange Singleton(uc32 value) {
96     return CharacterRange(value, value);
97   }
Range(uc32 from,uc32 to)98   static inline CharacterRange Range(uc32 from, uc32 to) {
99     DCHECK(0 <= from && to <= String::kMaxCodePoint);
100     DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
101     return CharacterRange(from, to);
102   }
Everything()103   static inline CharacterRange Everything() {
104     return CharacterRange(0, String::kMaxCodePoint);
105   }
List(Zone * zone,CharacterRange range)106   static inline ZoneList<CharacterRange>* List(Zone* zone,
107                                                CharacterRange range) {
108     ZoneList<CharacterRange>* list =
109         zone->New<ZoneList<CharacterRange>>(1, zone);
110     list->Add(range, zone);
111     return list;
112   }
Contains(uc32 i)113   bool Contains(uc32 i) { return from_ <= i && i <= to_; }
from()114   uc32 from() const { return from_; }
set_from(uc32 value)115   void set_from(uc32 value) { from_ = value; }
to()116   uc32 to() const { return to_; }
set_to(uc32 value)117   void set_to(uc32 value) { to_ = value; }
is_valid()118   bool is_valid() { return from_ <= to_; }
IsEverything(uc32 max)119   bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; }
IsSingleton()120   bool IsSingleton() { return (from_ == to_); }
121   V8_EXPORT_PRIVATE static void AddCaseEquivalents(
122       Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
123       bool is_one_byte);
124   // Whether a range list is in canonical form: Ranges ordered by from value,
125   // and ranges non-overlapping and non-adjacent.
126   V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges);
127   // Convert range list to canonical form. The characters covered by the ranges
128   // will still be the same, but no character is in more than one range, and
129   // adjacent ranges are merged. The resulting list may be shorter than the
130   // original, but cannot be longer.
131   static void Canonicalize(ZoneList<CharacterRange>* ranges);
132   // Negate the contents of a character range in canonical form.
133   static void Negate(ZoneList<CharacterRange>* src,
134                      ZoneList<CharacterRange>* dst, Zone* zone);
135   static const int kStartMarker = (1 << 24);
136   static const int kPayloadMask = (1 << 24) - 1;
137 
138  private:
CharacterRange(uc32 from,uc32 to)139   CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {}
140 
141   uc32 from_;
142   uc32 to_;
143 };
144 
145 class CharacterSet final {
146  public:
CharacterSet(uc16 standard_set_type)147   explicit CharacterSet(uc16 standard_set_type)
148       : ranges_(nullptr), standard_set_type_(standard_set_type) {}
CharacterSet(ZoneList<CharacterRange> * ranges)149   explicit CharacterSet(ZoneList<CharacterRange>* ranges)
150       : ranges_(ranges), standard_set_type_(0) {}
151   ZoneList<CharacterRange>* ranges(Zone* zone);
standard_set_type()152   uc16 standard_set_type() const { return standard_set_type_; }
set_standard_set_type(uc16 special_set_type)153   void set_standard_set_type(uc16 special_set_type) {
154     standard_set_type_ = special_set_type;
155   }
is_standard()156   bool is_standard() { return standard_set_type_ != 0; }
157   V8_EXPORT_PRIVATE void Canonicalize();
158 
159  private:
160   ZoneList<CharacterRange>* ranges_;
161   // If non-zero, the value represents a standard set (e.g., all whitespace
162   // characters) without having to expand the ranges.
163   uc16 standard_set_type_;
164 };
165 
166 class TextElement final {
167  public:
168   enum TextType { ATOM, CHAR_CLASS };
169 
170   static TextElement Atom(RegExpAtom* atom);
171   static TextElement CharClass(RegExpCharacterClass* char_class);
172 
cp_offset()173   int cp_offset() const { return cp_offset_; }
set_cp_offset(int cp_offset)174   void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
175   int length() const;
176 
text_type()177   TextType text_type() const { return text_type_; }
178 
tree()179   RegExpTree* tree() const { return tree_; }
180 
atom()181   RegExpAtom* atom() const {
182     DCHECK(text_type() == ATOM);
183     return reinterpret_cast<RegExpAtom*>(tree());
184   }
185 
char_class()186   RegExpCharacterClass* char_class() const {
187     DCHECK(text_type() == CHAR_CLASS);
188     return reinterpret_cast<RegExpCharacterClass*>(tree());
189   }
190 
191  private:
TextElement(TextType text_type,RegExpTree * tree)192   TextElement(TextType text_type, RegExpTree* tree)
193       : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
194 
195   int cp_offset_;
196   TextType text_type_;
197   RegExpTree* tree_;
198 };
199 
200 
201 class RegExpTree : public ZoneObject {
202  public:
203   static const int kInfinity = kMaxInt;
204   virtual ~RegExpTree() = default;
205   virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
206   virtual RegExpNode* ToNode(RegExpCompiler* compiler,
207                              RegExpNode* on_success) = 0;
IsTextElement()208   virtual bool IsTextElement() { return false; }
IsAnchoredAtStart()209   virtual bool IsAnchoredAtStart() { return false; }
IsAnchoredAtEnd()210   virtual bool IsAnchoredAtEnd() { return false; }
211   virtual int min_match() = 0;
212   virtual int max_match() = 0;
213   // Returns the interval of registers used for captures within this
214   // expression.
CaptureRegisters()215   virtual Interval CaptureRegisters() { return Interval::Empty(); }
216   virtual void AppendToText(RegExpText* text, Zone* zone);
217   V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os,
218                                         Zone* zone);  // NOLINT
219 #define MAKE_ASTYPE(Name)           \
220   virtual RegExp##Name* As##Name(); \
221   virtual bool Is##Name();
222   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
223 #undef MAKE_ASTYPE
224 };
225 
226 
227 class RegExpDisjunction final : public RegExpTree {
228  public:
229   explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
230   void* Accept(RegExpVisitor* visitor, void* data) override;
231   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
232   RegExpDisjunction* AsDisjunction() override;
233   Interval CaptureRegisters() override;
234   bool IsDisjunction() override;
235   bool IsAnchoredAtStart() override;
236   bool IsAnchoredAtEnd() override;
min_match()237   int min_match() override { return min_match_; }
max_match()238   int max_match() override { return max_match_; }
alternatives()239   ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
240 
241  private:
242   bool SortConsecutiveAtoms(RegExpCompiler* compiler);
243   void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
244   void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
245   ZoneList<RegExpTree*>* alternatives_;
246   int min_match_;
247   int max_match_;
248 };
249 
250 
251 class RegExpAlternative final : public RegExpTree {
252  public:
253   explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
254   void* Accept(RegExpVisitor* visitor, void* data) override;
255   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
256   RegExpAlternative* AsAlternative() override;
257   Interval CaptureRegisters() override;
258   bool IsAlternative() override;
259   bool IsAnchoredAtStart() override;
260   bool IsAnchoredAtEnd() override;
min_match()261   int min_match() override { return min_match_; }
max_match()262   int max_match() override { return max_match_; }
nodes()263   ZoneList<RegExpTree*>* nodes() { return nodes_; }
264 
265  private:
266   ZoneList<RegExpTree*>* nodes_;
267   int min_match_;
268   int max_match_;
269 };
270 
271 
272 class RegExpAssertion final : public RegExpTree {
273  public:
274   enum AssertionType {
275     START_OF_LINE = 0,
276     START_OF_INPUT = 1,
277     END_OF_LINE = 2,
278     END_OF_INPUT = 3,
279     BOUNDARY = 4,
280     NON_BOUNDARY = 5,
281     LAST_TYPE = NON_BOUNDARY,
282   };
RegExpAssertion(AssertionType type,JSRegExp::Flags flags)283   RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
284       : assertion_type_(type), flags_(flags) {}
285   void* Accept(RegExpVisitor* visitor, void* data) override;
286   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
287   RegExpAssertion* AsAssertion() override;
288   bool IsAssertion() override;
289   bool IsAnchoredAtStart() override;
290   bool IsAnchoredAtEnd() override;
min_match()291   int min_match() override { return 0; }
max_match()292   int max_match() override { return 0; }
assertion_type()293   AssertionType assertion_type() const { return assertion_type_; }
flags()294   JSRegExp::Flags flags() const { return flags_; }
295 
296  private:
297   const AssertionType assertion_type_;
298   const JSRegExp::Flags flags_;
299 };
300 
301 
302 class RegExpCharacterClass final : public RegExpTree {
303  public:
304   // NEGATED: The character class is negated and should match everything but
305   //     the specified ranges.
306   // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
307   //     surrogate and should not be unicode-desugared (crbug.com/641091).
308   enum Flag {
309     NEGATED = 1 << 0,
310     CONTAINS_SPLIT_SURROGATE = 1 << 1,
311   };
312   using CharacterClassFlags = base::Flags<Flag>;
313 
314   RegExpCharacterClass(
315       Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
316       CharacterClassFlags character_class_flags = CharacterClassFlags())
set_(ranges)317       : set_(ranges),
318         flags_(flags),
319         character_class_flags_(character_class_flags) {
320     // Convert the empty set of ranges to the negated Everything() range.
321     if (ranges->is_empty()) {
322       ranges->Add(CharacterRange::Everything(), zone);
323       character_class_flags_ ^= NEGATED;
324     }
325   }
RegExpCharacterClass(uc16 type,JSRegExp::Flags flags)326   RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
327       : set_(type),
328         flags_(flags),
329         character_class_flags_(CharacterClassFlags()) {}
330   void* Accept(RegExpVisitor* visitor, void* data) override;
331   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
332   RegExpCharacterClass* AsCharacterClass() override;
333   bool IsCharacterClass() override;
IsTextElement()334   bool IsTextElement() override { return true; }
min_match()335   int min_match() override { return 1; }
336   // The character class may match two code units for unicode regexps.
337   // TODO(yangguo): we should split this class for usage in TextElement, and
338   //                make max_match() dependent on the character class content.
max_match()339   int max_match() override { return 2; }
340   void AppendToText(RegExpText* text, Zone* zone) override;
character_set()341   CharacterSet character_set() { return set_; }
342   // TODO(lrn): Remove need for complex version if is_standard that
343   // recognizes a mangled standard set and just do { return set_.is_special(); }
344   bool is_standard(Zone* zone);
345   // Returns a value representing the standard character set if is_standard()
346   // returns true.
347   // Currently used values are:
348   // s : unicode whitespace
349   // S : unicode non-whitespace
350   // w : ASCII word character (digit, letter, underscore)
351   // W : non-ASCII word character
352   // d : ASCII digit
353   // D : non-ASCII digit
354   // . : non-newline
355   // * : All characters, for advancing unanchored regexp
standard_type()356   uc16 standard_type() const { return set_.standard_set_type(); }
ranges(Zone * zone)357   ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
is_negated()358   bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
flags()359   JSRegExp::Flags flags() const { return flags_; }
contains_split_surrogate()360   bool contains_split_surrogate() const {
361     return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
362   }
363 
364  private:
365   CharacterSet set_;
366   const JSRegExp::Flags flags_;
367   CharacterClassFlags character_class_flags_;
368 };
369 
370 
371 class RegExpAtom final : public RegExpTree {
372  public:
RegExpAtom(Vector<const uc16> data,JSRegExp::Flags flags)373   explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
374       : data_(data), flags_(flags) {}
375   void* Accept(RegExpVisitor* visitor, void* data) override;
376   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
377   RegExpAtom* AsAtom() override;
378   bool IsAtom() override;
IsTextElement()379   bool IsTextElement() override { return true; }
min_match()380   int min_match() override { return data_.length(); }
max_match()381   int max_match() override { return data_.length(); }
382   void AppendToText(RegExpText* text, Zone* zone) override;
data()383   Vector<const uc16> data() { return data_; }
length()384   int length() { return data_.length(); }
flags()385   JSRegExp::Flags flags() const { return flags_; }
ignore_case()386   bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
387 
388  private:
389   Vector<const uc16> data_;
390   const JSRegExp::Flags flags_;
391 };
392 
393 
394 class RegExpText final : public RegExpTree {
395  public:
RegExpText(Zone * zone)396   explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
397   void* Accept(RegExpVisitor* visitor, void* data) override;
398   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
399   RegExpText* AsText() override;
400   bool IsText() override;
IsTextElement()401   bool IsTextElement() override { return true; }
min_match()402   int min_match() override { return length_; }
max_match()403   int max_match() override { return length_; }
404   void AppendToText(RegExpText* text, Zone* zone) override;
AddElement(TextElement elm,Zone * zone)405   void AddElement(TextElement elm, Zone* zone) {
406     elements_.Add(elm, zone);
407     length_ += elm.length();
408   }
elements()409   ZoneList<TextElement>* elements() { return &elements_; }
410 
411  private:
412   ZoneList<TextElement> elements_;
413   int length_;
414 };
415 
416 
417 class RegExpQuantifier final : public RegExpTree {
418  public:
419   enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
RegExpQuantifier(int min,int max,QuantifierType type,RegExpTree * body)420   RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
421       : body_(body),
422         min_(min),
423         max_(max),
424         quantifier_type_(type) {
425     if (min > 0 && body->min_match() > kInfinity / min) {
426       min_match_ = kInfinity;
427     } else {
428       min_match_ = min * body->min_match();
429     }
430     if (max > 0 && body->max_match() > kInfinity / max) {
431       max_match_ = kInfinity;
432     } else {
433       max_match_ = max * body->max_match();
434     }
435   }
436   void* Accept(RegExpVisitor* visitor, void* data) override;
437   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
438   static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
439                             RegExpCompiler* compiler, RegExpNode* on_success,
440                             bool not_at_start = false);
441   RegExpQuantifier* AsQuantifier() override;
442   Interval CaptureRegisters() override;
443   bool IsQuantifier() override;
min_match()444   int min_match() override { return min_match_; }
max_match()445   int max_match() override { return max_match_; }
min()446   int min() const { return min_; }
max()447   int max() const { return max_; }
quantifier_type()448   QuantifierType quantifier_type() const { return quantifier_type_; }
is_possessive()449   bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
is_non_greedy()450   bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
is_greedy()451   bool is_greedy() const { return quantifier_type_ == GREEDY; }
body()452   RegExpTree* body() { return body_; }
453 
454  private:
455   RegExpTree* body_;
456   int min_;
457   int max_;
458   int min_match_;
459   int max_match_;
460   QuantifierType quantifier_type_;
461 };
462 
463 
464 class RegExpCapture final : public RegExpTree {
465  public:
RegExpCapture(int index)466   explicit RegExpCapture(int index)
467       : body_(nullptr),
468         index_(index),
469         min_match_(0),
470         max_match_(0),
471         name_(nullptr) {}
472   void* Accept(RegExpVisitor* visitor, void* data) override;
473   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
474   static RegExpNode* ToNode(RegExpTree* body, int index,
475                             RegExpCompiler* compiler, RegExpNode* on_success);
476   RegExpCapture* AsCapture() override;
477   bool IsAnchoredAtStart() override;
478   bool IsAnchoredAtEnd() override;
479   Interval CaptureRegisters() override;
480   bool IsCapture() override;
min_match()481   int min_match() override { return min_match_; }
max_match()482   int max_match() override { return max_match_; }
body()483   RegExpTree* body() { return body_; }
set_body(RegExpTree * body)484   void set_body(RegExpTree* body) {
485     body_ = body;
486     min_match_ = body->min_match();
487     max_match_ = body->max_match();
488   }
index()489   int index() const { return index_; }
name()490   const ZoneVector<uc16>* name() const { return name_; }
set_name(const ZoneVector<uc16> * name)491   void set_name(const ZoneVector<uc16>* name) { name_ = name; }
StartRegister(int index)492   static int StartRegister(int index) { return index * 2; }
EndRegister(int index)493   static int EndRegister(int index) { return index * 2 + 1; }
494 
495  private:
496   RegExpTree* body_;
497   int index_;
498   int min_match_;
499   int max_match_;
500   const ZoneVector<uc16>* name_;
501 };
502 
503 class RegExpGroup final : public RegExpTree {
504  public:
RegExpGroup(RegExpTree * body)505   explicit RegExpGroup(RegExpTree* body)
506       : body_(body),
507         min_match_(body->min_match()),
508         max_match_(body->max_match()) {}
509   void* Accept(RegExpVisitor* visitor, void* data) override;
ToNode(RegExpCompiler * compiler,RegExpNode * on_success)510   RegExpNode* ToNode(RegExpCompiler* compiler,
511                      RegExpNode* on_success) override {
512     return body_->ToNode(compiler, on_success);
513   }
514   RegExpGroup* AsGroup() override;
IsAnchoredAtStart()515   bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
IsAnchoredAtEnd()516   bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
517   bool IsGroup() override;
min_match()518   int min_match() override { return min_match_; }
max_match()519   int max_match() override { return max_match_; }
CaptureRegisters()520   Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
body()521   RegExpTree* body() { return body_; }
522 
523  private:
524   RegExpTree* body_;
525   int min_match_;
526   int max_match_;
527 };
528 
529 class RegExpLookaround final : public RegExpTree {
530  public:
531   enum Type { LOOKAHEAD, LOOKBEHIND };
532 
RegExpLookaround(RegExpTree * body,bool is_positive,int capture_count,int capture_from,Type type)533   RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
534                    int capture_from, Type type)
535       : body_(body),
536         is_positive_(is_positive),
537         capture_count_(capture_count),
538         capture_from_(capture_from),
539         type_(type) {}
540 
541   void* Accept(RegExpVisitor* visitor, void* data) override;
542   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
543   RegExpLookaround* AsLookaround() override;
544   Interval CaptureRegisters() override;
545   bool IsLookaround() override;
546   bool IsAnchoredAtStart() override;
min_match()547   int min_match() override { return 0; }
max_match()548   int max_match() override { return 0; }
body()549   RegExpTree* body() { return body_; }
is_positive()550   bool is_positive() { return is_positive_; }
capture_count()551   int capture_count() { return capture_count_; }
capture_from()552   int capture_from() { return capture_from_; }
type()553   Type type() { return type_; }
554 
555   class Builder {
556    public:
557     Builder(bool is_positive, RegExpNode* on_success,
558             int stack_pointer_register, int position_register,
559             int capture_register_count = 0, int capture_register_start = 0);
on_match_success()560     RegExpNode* on_match_success() { return on_match_success_; }
561     RegExpNode* ForMatch(RegExpNode* match);
562 
563    private:
564     bool is_positive_;
565     RegExpNode* on_match_success_;
566     RegExpNode* on_success_;
567     int stack_pointer_register_;
568     int position_register_;
569   };
570 
571  private:
572   RegExpTree* body_;
573   bool is_positive_;
574   int capture_count_;
575   int capture_from_;
576   Type type_;
577 };
578 
579 
580 class RegExpBackReference final : public RegExpTree {
581  public:
RegExpBackReference(JSRegExp::Flags flags)582   explicit RegExpBackReference(JSRegExp::Flags flags)
583       : capture_(nullptr), name_(nullptr), flags_(flags) {}
RegExpBackReference(RegExpCapture * capture,JSRegExp::Flags flags)584   RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
585       : capture_(capture), name_(nullptr), flags_(flags) {}
586   void* Accept(RegExpVisitor* visitor, void* data) override;
587   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
588   RegExpBackReference* AsBackReference() override;
589   bool IsBackReference() override;
min_match()590   int min_match() override { return 0; }
591   // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
592   // recursion, we give up. Ignorance is bliss.
max_match()593   int max_match() override { return kInfinity; }
index()594   int index() { return capture_->index(); }
capture()595   RegExpCapture* capture() { return capture_; }
set_capture(RegExpCapture * capture)596   void set_capture(RegExpCapture* capture) { capture_ = capture; }
name()597   const ZoneVector<uc16>* name() const { return name_; }
set_name(const ZoneVector<uc16> * name)598   void set_name(const ZoneVector<uc16>* name) { name_ = name; }
599 
600  private:
601   RegExpCapture* capture_;
602   const ZoneVector<uc16>* name_;
603   const JSRegExp::Flags flags_;
604 };
605 
606 
607 class RegExpEmpty final : public RegExpTree {
608  public:
609   RegExpEmpty() = default;
610   void* Accept(RegExpVisitor* visitor, void* data) override;
611   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
612   RegExpEmpty* AsEmpty() override;
613   bool IsEmpty() override;
min_match()614   int min_match() override { return 0; }
max_match()615   int max_match() override { return 0; }
616 };
617 
618 }  // namespace internal
619 }  // namespace v8
620 
621 #endif  // V8_REGEXP_REGEXP_AST_H_
622