• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_JS_REGEXP_H_
6 #define V8_OBJECTS_JS_REGEXP_H_
7 
8 #include "include/v8-regexp.h"
9 #include "src/objects/contexts.h"
10 #include "src/objects/js-array.h"
11 #include "src/regexp/regexp-flags.h"
12 #include "torque-generated/bit-fields.h"
13 
14 // Has to be the last include (doesn't have include guards):
15 #include "src/objects/object-macros.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 #include "torque-generated/src/objects/js-regexp-tq.inc"
21 
22 // Regular expressions
23 // The regular expression holds a single reference to a FixedArray in
24 // the kDataOffset field.
25 // The FixedArray contains the following data:
26 // - tag : type of regexp implementation (not compiled yet, atom or irregexp)
27 // - reference to the original source string
28 // - reference to the original flag string
29 // If it is an atom regexp
30 // - a reference to a literal string to search for
31 // If it is an irregexp regexp:
32 // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
33 // used for tracking the last usage (used for regexp code flushing).
34 // - a reference to code for UC16 inputs (bytecode or compiled), or a smi
35 // used for tracking the last usage (used for regexp code flushing).
36 // - max number of registers used by irregexp implementations.
37 // - number of capture registers (output values) of the regexp.
38 class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
39  public:
40   enum Type {
41     NOT_COMPILED,  // Initial value. No data array has been set yet.
42     ATOM,          // A simple string match.
43     IRREGEXP,      // Compiled with Irregexp (code or bytecode).
44     EXPERIMENTAL,  // Compiled to use the experimental linear time engine.
45   };
46   DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
47 
48   V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
49       Isolate* isolate, Handle<String> source, Flags flags,
50       uint32_t backtrack_limit = kNoBacktrackLimit);
51 
52   static MaybeHandle<JSRegExp> Initialize(
53       Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
54       uint32_t backtrack_limit = kNoBacktrackLimit);
55   static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
56                                           Handle<String> source,
57                                           Handle<String> flags_string);
58 
59   DECL_ACCESSORS(last_index, Object)
60 
61   // Instance fields accessors.
62   inline String source() const;
63   inline Flags flags() const;
64 
65   // Data array field accessors.
66 
67   inline Type type_tag() const;
68   inline String atom_pattern() const;
69   // This could be a Smi kUninitializedValue or Code.
70   V8_EXPORT_PRIVATE Object code(bool is_latin1) const;
71   V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code);
72   // This could be a Smi kUninitializedValue or ByteArray.
73   V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const;
74   // Sets the bytecode as well as initializing trampoline slots to the
75   // RegExpInterpreterTrampoline.
76   void set_bytecode_and_trampoline(Isolate* isolate,
77                                    Handle<ByteArray> bytecode);
78   inline int max_register_count() const;
79   // Number of captures (without the match itself).
80   inline int capture_count() const;
81   inline Object capture_name_map();
82   inline void set_capture_name_map(Handle<FixedArray> capture_name_map);
83   uint32_t backtrack_limit() const;
84 
AsJSRegExpFlag(RegExpFlag f)85   static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
86     return static_cast<Flag>(f);
87   }
AsJSRegExpFlags(RegExpFlags f)88   static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
89     return Flags{static_cast<int>(f)};
90   }
AsRegExpFlags(Flags f)91   static constexpr RegExpFlags AsRegExpFlags(Flags f) {
92     return RegExpFlags{static_cast<int>(f)};
93   }
94 
FlagFromChar(char c)95   static base::Optional<RegExpFlag> FlagFromChar(char c) {
96     base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
97     if (!f.has_value()) return f;
98     if (f.value() == RegExpFlag::kLinear &&
99         !FLAG_enable_experimental_regexp_engine) {
100       return {};
101     }
102     return f;
103   }
104 
105   STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
106 #define V(_, Camel, ...)                                             \
107   STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
108   STATIC_ASSERT(static_cast<int>(k##Camel) ==                        \
109                 static_cast<int>(RegExpFlag::k##Camel));
110   REGEXP_FLAG_LIST(V)
111 #undef V
112   STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
113   STATIC_ASSERT(kFlagCount == kRegExpFlagCount);
114 
115   static base::Optional<Flags> FlagsFromString(Isolate* isolate,
116                                                Handle<String> flags);
117 
118   V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
119                                                           Flags flags);
120 
121   inline String EscapedPattern();
122 
123   bool CanTierUp();
124   bool MarkedForTierUp();
125   void ResetLastTierUpTick();
126   void TierUpTick();
127   void MarkTierUpForNextExec();
128 
129   bool ShouldProduceBytecode();
130   inline bool HasCompiledCode() const;
131   inline void DiscardCompiledCodeForSerialization();
132 
TypeSupportsCaptures(Type t)133   static constexpr bool TypeSupportsCaptures(Type t) {
134     return t == IRREGEXP || t == EXPERIMENTAL;
135   }
136 
137   // Each capture (including the match itself) needs two registers.
RegistersForCaptureCount(int count)138   static constexpr int RegistersForCaptureCount(int count) {
139     return (count + 1) * 2;
140   }
141 
code_index(bool is_latin1)142   static constexpr int code_index(bool is_latin1) {
143     return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
144   }
145 
bytecode_index(bool is_latin1)146   static constexpr int bytecode_index(bool is_latin1) {
147     return is_latin1 ? kIrregexpLatin1BytecodeIndex
148                      : kIrregexpUC16BytecodeIndex;
149   }
150 
151   // Dispatched behavior.
152   DECL_PRINTER(JSRegExp)
153   DECL_VERIFIER(JSRegExp)
154 
155   /* This is already an in-object field. */
156   // TODO(v8:8944): improve handling of in-object fields
157   static constexpr int kLastIndexOffset = kHeaderSize;
158 
159   // The initial value of the last_index field on a new JSRegExp instance.
160   static constexpr int kInitialLastIndexValue = 0;
161 
162   // Indices in the data array.
163   static constexpr int kTagIndex = 0;
164   static constexpr int kSourceIndex = kTagIndex + 1;
165   static constexpr int kFlagsIndex = kSourceIndex + 1;
166   static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1;
167   static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex;
168 
169   // The data fields are used in different ways depending on the
170   // value of the tag.
171   // Atom regexps (literal strings).
172   static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex;
173   static constexpr int kAtomDataSize = kAtomPatternIndex + 1;
174 
175   // A Code object or a Smi marker value equal to kUninitializedValue.
176   static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex;
177   static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1;
178   // A ByteArray object or a Smi marker value equal to kUninitializedValue.
179   static constexpr int kIrregexpLatin1BytecodeIndex =
180       kIrregexpUC16CodeIndex + 1;
181   static constexpr int kIrregexpUC16BytecodeIndex =
182       kIrregexpLatin1BytecodeIndex + 1;
183   // Maximal number of registers used by either Latin1 or UC16.
184   // Only used to check that there is enough stack space
185   static constexpr int kIrregexpMaxRegisterCountIndex =
186       kIrregexpUC16BytecodeIndex + 1;
187   // Number of captures in the compiled regexp.
188   static constexpr int kIrregexpCaptureCountIndex =
189       kIrregexpMaxRegisterCountIndex + 1;
190   // Maps names of named capture groups (at indices 2i) to their corresponding
191   // (1-based) capture group indices (at indices 2i + 1).
192   static constexpr int kIrregexpCaptureNameMapIndex =
193       kIrregexpCaptureCountIndex + 1;
194   // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
195   // decremented on each execution of the bytecode, so that the tier-up
196   // happens once the ticks reach zero.
197   // This value is ignored if the regexp-tier-up flag isn't turned on.
198   static constexpr int kIrregexpTicksUntilTierUpIndex =
199       kIrregexpCaptureNameMapIndex + 1;
200   // A smi containing either the backtracking limit or kNoBacktrackLimit.
201   // TODO(jgruber): If needed, this limit could be packed into other fields
202   // above to save space.
203   static constexpr int kIrregexpBacktrackLimit =
204       kIrregexpTicksUntilTierUpIndex + 1;
205   static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1;
206 
207   // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
208   // to the format of an IRREGEXP data array, with most fields set to some
209   // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
210   // regexps take the same code path in `RegExpExecInternal`, which reads off
211   // various fields from the data array. `RegExpExecInternal` should probably
212   // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
213   // all the IRREGEXP only fields.
214   static constexpr int kExperimentalDataSize = kIrregexpDataSize;
215 
216   // In-object fields.
217   static constexpr int kLastIndexFieldIndex = 0;
218   static constexpr int kInObjectFieldCount = 1;
219 
220   // The actual object size including in-object fields.
Size()221   static constexpr int Size() {
222     return kHeaderSize + kInObjectFieldCount * kTaggedSize;
223   }
224 
225   // Descriptor array index to important methods in the prototype.
226   static constexpr int kExecFunctionDescriptorIndex = 1;
227   static constexpr int kSymbolMatchFunctionDescriptorIndex = 14;
228   static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15;
229   static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16;
230   static constexpr int kSymbolSearchFunctionDescriptorIndex = 17;
231   static constexpr int kSymbolSplitFunctionDescriptorIndex = 18;
232 
233   // The uninitialized value for a regexp code object.
234   static constexpr int kUninitializedValue = -1;
235 
236   // If the backtrack limit is set to this marker value, no limit is applied.
237   static constexpr uint32_t kNoBacktrackLimit = 0;
238 
239   // The heuristic value for the length of the subject string for which we
240   // tier-up to the compiler immediately, instead of using the interpreter.
241   static constexpr int kTierUpForSubjectLengthValue = 1000;
242 
243   // Maximum number of captures allowed.
244   static constexpr int kMaxCaptures = 1 << 16;
245 
246  private:
247   inline Object DataAt(int index) const;
248   inline void SetDataAt(int index, Object value);
249 
250   TQ_OBJECT_CONSTRUCTORS(JSRegExp)
251 };
252 
DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)253 DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
254 
255 // JSRegExpResult is just a JSArray with a specific initial map.
256 // This initial map adds in-object properties for "index" and "input"
257 // properties, as assigned by RegExp.prototype.exec, which allows
258 // faster creation of RegExp exec results.
259 // This class just holds constants used when creating the result.
260 // After creation the result must be treated as a JSArray in all regards.
261 class JSRegExpResult
262     : public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> {
263  public:
264   // TODO(joshualitt): We would like to add printers and verifiers to
265   // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
266   // instance type as JSArray.
267 
268   // Indices of in-object properties.
269   static constexpr int kIndexIndex = 0;
270   static constexpr int kInputIndex = 1;
271   static constexpr int kGroupsIndex = 2;
272 
273   // Private internal only fields.
274   static constexpr int kNamesIndex = 3;
275   static constexpr int kRegExpInputIndex = 4;
276   static constexpr int kRegExpLastIndex = 5;
277   static constexpr int kInObjectPropertyCount = 6;
278 
279   static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX;
280 
281   TQ_OBJECT_CONSTRUCTORS(JSRegExpResult)
282 };
283 
284 class JSRegExpResultWithIndices
285     : public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices,
286                                                       JSRegExpResult> {
287  public:
288   static_assert(
289       JSRegExpResult::kInObjectPropertyCount == 6,
290       "JSRegExpResultWithIndices must be a subclass of JSRegExpResult");
291   static constexpr int kIndicesIndex = 6;
292   static constexpr int kInObjectPropertyCount = 7;
293 
294   TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices)
295 };
296 
297 // JSRegExpResultIndices is just a JSArray with a specific initial map.
298 // This initial map adds in-object properties for "group"
299 // properties, as assigned by RegExp.prototype.exec, which allows
300 // faster creation of RegExp exec results.
301 // This class just holds constants used when creating the result.
302 // After creation the result must be treated as a JSArray in all regards.
303 class JSRegExpResultIndices
304     : public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices,
305                                                   JSArray> {
306  public:
307   static Handle<JSRegExpResultIndices> BuildIndices(
308       Isolate* isolate, Handle<RegExpMatchInfo> match_info,
309       Handle<Object> maybe_names);
310 
311   // Indices of in-object properties.
312   static constexpr int kGroupsIndex = 0;
313   static constexpr int kInObjectPropertyCount = 1;
314 
315   // Descriptor index of groups.
316   static constexpr int kGroupsDescriptorIndex = 1;
317 
318   TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices)
319 };
320 
321 }  // namespace internal
322 }  // namespace v8
323 
324 #include "src/objects/object-macros-undef.h"
325 
326 #endif  // V8_OBJECTS_JS_REGEXP_H_
327