// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_OBJECTS_JS_REGEXP_H_ #define V8_OBJECTS_JS_REGEXP_H_ #include "include/v8-regexp.h" #include "src/objects/contexts.h" #include "src/objects/js-array.h" #include "src/regexp/regexp-flags.h" #include "torque-generated/bit-fields.h" // Has to be the last include (doesn't have include guards): #include "src/objects/object-macros.h" namespace v8 { namespace internal { #include "torque-generated/src/objects/js-regexp-tq.inc" // Regular expressions // The regular expression holds a single reference to a FixedArray in // the kDataOffset field. // The FixedArray contains the following data: // - tag : type of regexp implementation (not compiled yet, atom or irregexp) // - reference to the original source string // - reference to the original flag string // If it is an atom regexp // - a reference to a literal string to search for // If it is an irregexp regexp: // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi // used for tracking the last usage (used for regexp code flushing). // - a reference to code for UC16 inputs (bytecode or compiled), or a smi // used for tracking the last usage (used for regexp code flushing). // - max number of registers used by irregexp implementations. // - number of capture registers (output values) of the regexp. class JSRegExp : public TorqueGeneratedJSRegExp { public: enum Type { NOT_COMPILED, // Initial value. No data array has been set yet. ATOM, // A simple string match. IRREGEXP, // Compiled with Irregexp (code or bytecode). EXPERIMENTAL, // Compiled to use the experimental linear time engine. }; DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS() V8_EXPORT_PRIVATE static MaybeHandle New( Isolate* isolate, Handle source, Flags flags, uint32_t backtrack_limit = kNoBacktrackLimit); static MaybeHandle Initialize( Handle regexp, Handle source, Flags flags, uint32_t backtrack_limit = kNoBacktrackLimit); static MaybeHandle Initialize(Handle regexp, Handle source, Handle flags_string); DECL_ACCESSORS(last_index, Object) // Instance fields accessors. inline String source() const; inline Flags flags() const; // Data array field accessors. inline Type type_tag() const; inline String atom_pattern() const; // This could be a Smi kUninitializedValue or Code. V8_EXPORT_PRIVATE Object code(bool is_latin1) const; V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle code); // This could be a Smi kUninitializedValue or ByteArray. V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const; // Sets the bytecode as well as initializing trampoline slots to the // RegExpInterpreterTrampoline. void set_bytecode_and_trampoline(Isolate* isolate, Handle bytecode); inline int max_register_count() const; // Number of captures (without the match itself). inline int capture_count() const; inline Object capture_name_map(); inline void set_capture_name_map(Handle capture_name_map); uint32_t backtrack_limit() const; static constexpr Flag AsJSRegExpFlag(RegExpFlag f) { return static_cast(f); } static constexpr Flags AsJSRegExpFlags(RegExpFlags f) { return Flags{static_cast(f)}; } static constexpr RegExpFlags AsRegExpFlags(Flags f) { return RegExpFlags{static_cast(f)}; } static base::Optional FlagFromChar(char c) { base::Optional f = TryRegExpFlagFromChar(c); if (!f.has_value()) return f; if (f.value() == RegExpFlag::kLinear && !FLAG_enable_experimental_regexp_engine) { return {}; } return f; } STATIC_ASSERT(static_cast(kNone) == v8::RegExp::kNone); #define V(_, Camel, ...) \ STATIC_ASSERT(static_cast(k##Camel) == v8::RegExp::k##Camel); \ STATIC_ASSERT(static_cast(k##Camel) == \ static_cast(RegExpFlag::k##Camel)); REGEXP_FLAG_LIST(V) #undef V STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount); STATIC_ASSERT(kFlagCount == kRegExpFlagCount); static base::Optional FlagsFromString(Isolate* isolate, Handle flags); V8_EXPORT_PRIVATE static Handle StringFromFlags(Isolate* isolate, Flags flags); inline String EscapedPattern(); bool CanTierUp(); bool MarkedForTierUp(); void ResetLastTierUpTick(); void TierUpTick(); void MarkTierUpForNextExec(); bool ShouldProduceBytecode(); inline bool HasCompiledCode() const; inline void DiscardCompiledCodeForSerialization(); static constexpr bool TypeSupportsCaptures(Type t) { return t == IRREGEXP || t == EXPERIMENTAL; } // Each capture (including the match itself) needs two registers. static constexpr int RegistersForCaptureCount(int count) { return (count + 1) * 2; } static constexpr int code_index(bool is_latin1) { return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex; } static constexpr int bytecode_index(bool is_latin1) { return is_latin1 ? kIrregexpLatin1BytecodeIndex : kIrregexpUC16BytecodeIndex; } // Dispatched behavior. DECL_PRINTER(JSRegExp) DECL_VERIFIER(JSRegExp) /* This is already an in-object field. */ // TODO(v8:8944): improve handling of in-object fields static constexpr int kLastIndexOffset = kHeaderSize; // The initial value of the last_index field on a new JSRegExp instance. static constexpr int kInitialLastIndexValue = 0; // Indices in the data array. static constexpr int kTagIndex = 0; static constexpr int kSourceIndex = kTagIndex + 1; static constexpr int kFlagsIndex = kSourceIndex + 1; static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1; static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex; // The data fields are used in different ways depending on the // value of the tag. // Atom regexps (literal strings). static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex; static constexpr int kAtomDataSize = kAtomPatternIndex + 1; // A Code object or a Smi marker value equal to kUninitializedValue. static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex; static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1; // A ByteArray object or a Smi marker value equal to kUninitializedValue. static constexpr int kIrregexpLatin1BytecodeIndex = kIrregexpUC16CodeIndex + 1; static constexpr int kIrregexpUC16BytecodeIndex = kIrregexpLatin1BytecodeIndex + 1; // Maximal number of registers used by either Latin1 or UC16. // Only used to check that there is enough stack space static constexpr int kIrregexpMaxRegisterCountIndex = kIrregexpUC16BytecodeIndex + 1; // Number of captures in the compiled regexp. static constexpr int kIrregexpCaptureCountIndex = kIrregexpMaxRegisterCountIndex + 1; // Maps names of named capture groups (at indices 2i) to their corresponding // (1-based) capture group indices (at indices 2i + 1). static constexpr int kIrregexpCaptureNameMapIndex = kIrregexpCaptureCountIndex + 1; // Tier-up ticks are set to the value of the tier-up ticks flag. The value is // decremented on each execution of the bytecode, so that the tier-up // happens once the ticks reach zero. // This value is ignored if the regexp-tier-up flag isn't turned on. static constexpr int kIrregexpTicksUntilTierUpIndex = kIrregexpCaptureNameMapIndex + 1; // A smi containing either the backtracking limit or kNoBacktrackLimit. // TODO(jgruber): If needed, this limit could be packed into other fields // above to save space. static constexpr int kIrregexpBacktrackLimit = kIrregexpTicksUntilTierUpIndex + 1; static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1; // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms // to the format of an IRREGEXP data array, with most fields set to some // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP // regexps take the same code path in `RegExpExecInternal`, which reads off // various fields from the data array. `RegExpExecInternal` should probably // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of // all the IRREGEXP only fields. static constexpr int kExperimentalDataSize = kIrregexpDataSize; // In-object fields. static constexpr int kLastIndexFieldIndex = 0; static constexpr int kInObjectFieldCount = 1; // The actual object size including in-object fields. static constexpr int Size() { return kHeaderSize + kInObjectFieldCount * kTaggedSize; } // Descriptor array index to important methods in the prototype. static constexpr int kExecFunctionDescriptorIndex = 1; static constexpr int kSymbolMatchFunctionDescriptorIndex = 14; static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15; static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16; static constexpr int kSymbolSearchFunctionDescriptorIndex = 17; static constexpr int kSymbolSplitFunctionDescriptorIndex = 18; // The uninitialized value for a regexp code object. static constexpr int kUninitializedValue = -1; // If the backtrack limit is set to this marker value, no limit is applied. static constexpr uint32_t kNoBacktrackLimit = 0; // The heuristic value for the length of the subject string for which we // tier-up to the compiler immediately, instead of using the interpreter. static constexpr int kTierUpForSubjectLengthValue = 1000; // Maximum number of captures allowed. static constexpr int kMaxCaptures = 1 << 16; private: inline Object DataAt(int index) const; inline void SetDataAt(int index, Object value); TQ_OBJECT_CONSTRUCTORS(JSRegExp) }; DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags) // JSRegExpResult is just a JSArray with a specific initial map. // This initial map adds in-object properties for "index" and "input" // properties, as assigned by RegExp.prototype.exec, which allows // faster creation of RegExp exec results. // This class just holds constants used when creating the result. // After creation the result must be treated as a JSArray in all regards. class JSRegExpResult : public TorqueGeneratedJSRegExpResult { public: // TODO(joshualitt): We would like to add printers and verifiers to // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same // instance type as JSArray. // Indices of in-object properties. static constexpr int kIndexIndex = 0; static constexpr int kInputIndex = 1; static constexpr int kGroupsIndex = 2; // Private internal only fields. static constexpr int kNamesIndex = 3; static constexpr int kRegExpInputIndex = 4; static constexpr int kRegExpLastIndex = 5; static constexpr int kInObjectPropertyCount = 6; static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX; TQ_OBJECT_CONSTRUCTORS(JSRegExpResult) }; class JSRegExpResultWithIndices : public TorqueGeneratedJSRegExpResultWithIndices { public: static_assert( JSRegExpResult::kInObjectPropertyCount == 6, "JSRegExpResultWithIndices must be a subclass of JSRegExpResult"); static constexpr int kIndicesIndex = 6; static constexpr int kInObjectPropertyCount = 7; TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices) }; // JSRegExpResultIndices is just a JSArray with a specific initial map. // This initial map adds in-object properties for "group" // properties, as assigned by RegExp.prototype.exec, which allows // faster creation of RegExp exec results. // This class just holds constants used when creating the result. // After creation the result must be treated as a JSArray in all regards. class JSRegExpResultIndices : public TorqueGeneratedJSRegExpResultIndices { public: static Handle BuildIndices( Isolate* isolate, Handle match_info, Handle maybe_names); // Indices of in-object properties. static constexpr int kGroupsIndex = 0; static constexpr int kInObjectPropertyCount = 1; // Descriptor index of groups. static constexpr int kGroupsDescriptorIndex = 1; TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices) }; } // namespace internal } // namespace v8 #include "src/objects/object-macros-undef.h" #endif // V8_OBJECTS_JS_REGEXP_H_