• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_JS_REGEXP_H_
6 #define V8_OBJECTS_JS_REGEXP_H_
7 
8 #include "src/objects/js-array.h"
9 #include "torque-generated/bit-fields.h"
10 
11 // Has to be the last include (doesn't have include guards):
12 #include "src/objects/object-macros.h"
13 
14 namespace v8 {
15 namespace internal {
16 
17 #include "torque-generated/src/objects/js-regexp-tq.inc"
18 
19 // Regular expressions
20 // The regular expression holds a single reference to a FixedArray in
21 // the kDataOffset field.
22 // The FixedArray contains the following data:
23 // - tag : type of regexp implementation (not compiled yet, atom or irregexp)
24 // - reference to the original source string
25 // - reference to the original flag string
26 // If it is an atom regexp
27 // - a reference to a literal string to search for
28 // If it is an irregexp regexp:
29 // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
30 // used for tracking the last usage (used for regexp code flushing).
31 // - a reference to code for UC16 inputs (bytecode or compiled), or a smi
32 // used for tracking the last usage (used for regexp code flushing).
33 // - max number of registers used by irregexp implementations.
34 // - number of capture registers (output values) of the regexp.
35 class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
36  public:
37   // Meaning of Type:
38   // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
39   // ATOM: A simple string to match against using an indexOf operation.
40   // IRREGEXP: Compiled with Irregexp.
41   // EXPERIMENTAL: Compiled to use the new linear time engine.
42   enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()43   DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
44 
45   static base::Optional<Flag> FlagFromChar(char c) {
46     STATIC_ASSERT(kFlagCount == 7);
47     // clang-format off
48     return c == 'g' ? base::Optional<Flag>(kGlobal)
49          : c == 'i' ? base::Optional<Flag>(kIgnoreCase)
50          : c == 'm' ? base::Optional<Flag>(kMultiline)
51          : c == 'y' ? base::Optional<Flag>(kSticky)
52          : c == 'u' ? base::Optional<Flag>(kUnicode)
53          : c == 's' ? base::Optional<Flag>(kDotAll)
54          : (FLAG_enable_experimental_regexp_engine && c == 'l')
55            ? base::Optional<Flag>(kLinear)
56          : base::Optional<Flag>();
57     // clang-format on
58   }
59 
60   STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
61   STATIC_ASSERT(static_cast<int>(kGlobal) == v8::RegExp::kGlobal);
62   STATIC_ASSERT(static_cast<int>(kIgnoreCase) == v8::RegExp::kIgnoreCase);
63   STATIC_ASSERT(static_cast<int>(kMultiline) == v8::RegExp::kMultiline);
64   STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky);
65   STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode);
66   STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll);
67   STATIC_ASSERT(static_cast<int>(kLinear) == v8::RegExp::kLinear);
68   STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
69 
70   DECL_ACCESSORS(last_index, Object)
71 
72   // If the backtrack limit is set to this marker value, no limit is applied.
73   static constexpr uint32_t kNoBacktrackLimit = 0;
74 
75   V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
76       Isolate* isolate, Handle<String> source, Flags flags,
77       uint32_t backtrack_limit = kNoBacktrackLimit);
78   static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);
79 
80   static MaybeHandle<JSRegExp> Initialize(
81       Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
82       uint32_t backtrack_limit = kNoBacktrackLimit);
83   static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
84                                           Handle<String> source,
85                                           Handle<String> flags_string);
86 
87   static Flags FlagsFromString(Isolate* isolate, Handle<String> flags,
88                                bool* success);
89 
90   bool CanTierUp();
91   bool MarkedForTierUp();
92   void ResetLastTierUpTick();
93   void TierUpTick();
94   void MarkTierUpForNextExec();
95 
96   inline Type TypeTag() const;
TypeSupportsCaptures(Type t)97   static bool TypeSupportsCaptures(Type t) {
98     return t == IRREGEXP || t == EXPERIMENTAL;
99   }
100 
101   // Maximum number of captures allowed.
102   static constexpr int kMaxCaptures = 1 << 16;
103 
104   // Number of captures (without the match itself).
105   inline int CaptureCount() const;
106   // Each capture (including the match itself) needs two registers.
RegistersForCaptureCount(int count)107   static int RegistersForCaptureCount(int count) { return (count + 1) * 2; }
108 
109   inline int MaxRegisterCount() const;
110   inline Flags GetFlags();
111   inline String Pattern();
112   inline Object CaptureNameMap();
113   inline Object DataAt(int index) const;
114   // Set implementation data after the object has been prepared.
115   inline void SetDataAt(int index, Object value);
116   inline void SetCaptureNameMap(Handle<FixedArray> capture_name_map);
117 
code_index(bool is_latin1)118   static constexpr int code_index(bool is_latin1) {
119     return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
120   }
121 
bytecode_index(bool is_latin1)122   static constexpr int bytecode_index(bool is_latin1) {
123     return is_latin1 ? kIrregexpLatin1BytecodeIndex
124                      : kIrregexpUC16BytecodeIndex;
125   }
126 
127   // This could be a Smi kUninitializedValue or Code.
128   V8_EXPORT_PRIVATE Object Code(bool is_latin1) const;
129   // This could be a Smi kUninitializedValue or ByteArray.
130   V8_EXPORT_PRIVATE Object Bytecode(bool is_latin1) const;
131 
132   bool ShouldProduceBytecode();
133   inline bool HasCompiledCode() const;
134   inline void DiscardCompiledCodeForSerialization();
135 
136   uint32_t BacktrackLimit() const;
137 
138   // Dispatched behavior.
139   DECL_PRINTER(JSRegExp)
140   DECL_VERIFIER(JSRegExp)
141 
142   /* This is already an in-object field. */
143   // TODO(v8:8944): improve handling of in-object fields
144   static constexpr int kLastIndexOffset = kHeaderSize;
145 
146   // Indices in the data array.
147   static const int kTagIndex = 0;
148   static const int kSourceIndex = kTagIndex + 1;
149   static const int kFlagsIndex = kSourceIndex + 1;
150   static const int kDataIndex = kFlagsIndex + 1;
151 
152   // TODO(jgruber): Rename kDataIndex to something more appropriate.
153   // There is no 'data' field, kDataIndex is just a marker for the
154   // first non-generic index.
155   static constexpr int kMinDataArrayLength = kDataIndex;
156 
157   // The data fields are used in different ways depending on the
158   // value of the tag.
159   // Atom regexps (literal strings).
160   static const int kAtomPatternIndex = kDataIndex;
161 
162   static const int kAtomDataSize = kAtomPatternIndex + 1;
163 
164   // Irregexp compiled code or trampoline to interpreter for Latin1. If
165   // compilation fails, this fields hold an exception object that should be
166   // thrown if the regexp is used again.
167   static const int kIrregexpLatin1CodeIndex = kDataIndex;
168   // Irregexp compiled code or trampoline to interpreter for UC16.  If
169   // compilation fails, this fields hold an exception object that should be
170   // thrown if the regexp is used again.
171   static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
172   // Bytecode to interpret the regexp for Latin1. Contains kUninitializedValue
173   // if we haven't compiled the regexp yet, regexp are always compiled or if
174   // tier-up has happened (i.e. when kIrregexpLatin1CodeIndex contains native
175   // irregexp code).
176   static const int kIrregexpLatin1BytecodeIndex = kDataIndex + 2;
177   // Bytecode to interpret the regexp for UC16. Contains kUninitializedValue if
178   // we haven't compiled the regxp yet, regexp are always compiled or if tier-up
179   // has happened (i.e. when kIrregexpUC16CodeIndex contains native irregexp
180   // code).
181   static const int kIrregexpUC16BytecodeIndex = kDataIndex + 3;
182   // Maximal number of registers used by either Latin1 or UC16.
183   // Only used to check that there is enough stack space
184   static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 4;
185   // Number of captures in the compiled regexp.
186   static const int kIrregexpCaptureCountIndex = kDataIndex + 5;
187   // Maps names of named capture groups (at indices 2i) to their corresponding
188   // (1-based) capture group indices (at indices 2i + 1).
189   static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
190   // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
191   // decremented on each execution of the bytecode, so that the tier-up
192   // happens once the ticks reach zero.
193   // This value is ignored if the regexp-tier-up flag isn't turned on.
194   static const int kIrregexpTicksUntilTierUpIndex = kDataIndex + 7;
195   // A smi containing either the backtracking limit or kNoBacktrackLimit.
196   // TODO(jgruber): If needed, this limit could be packed into other fields
197   // above to save space.
198   static const int kIrregexpBacktrackLimit = kDataIndex + 8;
199   static const int kIrregexpDataSize = kDataIndex + 9;
200 
201   // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
202   // to the format of an IRREGEXP data array, with most fields set to some
203   // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
204   // regexps take the same code path in `RegExpExecInternal`, which reads off
205   // various fields from the data array. `RegExpExecInternal` should probably
206   // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
207   // all the IRREGEXP only fields.
208   static constexpr int kExperimentalDataSize = kIrregexpDataSize;
209 
210   // In-object fields.
211   static const int kLastIndexFieldIndex = 0;
212   static const int kInObjectFieldCount = 1;
213 
214   // Descriptor array index to important methods in the prototype.
215   static const int kExecFunctionDescriptorIndex = 1;
216   static const int kSymbolMatchFunctionDescriptorIndex = 13;
217   static const int kSymbolMatchAllFunctionDescriptorIndex = 14;
218   static const int kSymbolReplaceFunctionDescriptorIndex = 15;
219   static const int kSymbolSearchFunctionDescriptorIndex = 16;
220   static const int kSymbolSplitFunctionDescriptorIndex = 17;
221 
222   // The uninitialized value for a regexp code object.
223   static const int kUninitializedValue = -1;
224 
225   // The heuristic value for the length of the subject string for which we
226   // tier-up to the compiler immediately, instead of using the interpreter.
227   static constexpr int kTierUpForSubjectLengthValue = 1000;
228 
229   TQ_OBJECT_CONSTRUCTORS(JSRegExp)
230 };
231 
DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)232 DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
233 
234 // JSRegExpResult is just a JSArray with a specific initial map.
235 // This initial map adds in-object properties for "index" and "input"
236 // properties, as assigned by RegExp.prototype.exec, which allows
237 // faster creation of RegExp exec results.
238 // This class just holds constants used when creating the result.
239 // After creation the result must be treated as a JSArray in all regards.
240 class JSRegExpResult : public JSArray {
241  public:
242   DECL_CAST(JSRegExpResult)
243 
244   // TODO(joshualitt): We would like to add printers and verifiers to
245   // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
246   // instance type as JSArray.
247 
248   // Layout description.
249   DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kHeaderSize,
250                                 TORQUE_GENERATED_JS_REG_EXP_RESULT_FIELDS)
251 
252   static MaybeHandle<JSArray> GetAndCacheIndices(
253       Isolate* isolate, Handle<JSRegExpResult> regexp_result);
254 
255   // Indices of in-object properties.
256   static const int kIndexIndex = 0;
257   static const int kInputIndex = 1;
258   static const int kGroupsIndex = 2;
259 
260   // Private internal only fields.
261   static const int kCachedIndicesOrRegExpIndex = 3;
262   static const int kNamesIndex = 4;
263   static const int kRegExpInputIndex = 5;
264   static const int kRegExpLastIndex = 6;
265   static const int kInObjectPropertyCount = 7;
266 
267   OBJECT_CONSTRUCTORS(JSRegExpResult, JSArray);
268 };
269 
270 // JSRegExpResultIndices is just a JSArray with a specific initial map.
271 // This initial map adds in-object properties for "group"
272 // properties, as assigned by RegExp.prototype.exec, which allows
273 // faster creation of RegExp exec results.
274 // This class just holds constants used when creating the result.
275 // After creation the result must be treated as a JSArray in all regards.
276 class JSRegExpResultIndices : public JSArray {
277  public:
278   DECL_CAST(JSRegExpResultIndices)
279 
280   // Layout description.
281   DEFINE_FIELD_OFFSET_CONSTANTS(
282       JSArray::kHeaderSize, TORQUE_GENERATED_JS_REG_EXP_RESULT_INDICES_FIELDS)
283 
284   static Handle<JSRegExpResultIndices> BuildIndices(
285       Isolate* isolate, Handle<RegExpMatchInfo> match_info,
286       Handle<Object> maybe_names);
287 
288   // Indices of in-object properties.
289   static const int kGroupsIndex = 0;
290   static const int kInObjectPropertyCount = 1;
291 
292   // Descriptor index of groups.
293   static const int kGroupsDescriptorIndex = 1;
294 
295   OBJECT_CONSTRUCTORS(JSRegExpResultIndices, JSArray);
296 };
297 
298 }  // namespace internal
299 }  // namespace v8
300 
301 #include "src/objects/object-macros-undef.h"
302 
303 #endif  // V8_OBJECTS_JS_REGEXP_H_
304