1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_H_ 6 #define V8_REGEXP_REGEXP_H_ 7 8 #include "src/common/assert-scope.h" 9 #include "src/handles/handles.h" 10 #include "src/regexp/regexp-error.h" 11 #include "src/regexp/regexp-flags.h" 12 #include "src/zone/zone-containers.h" 13 14 namespace v8 { 15 namespace internal { 16 17 class JSRegExp; 18 class RegExpCapture; 19 class RegExpMatchInfo; 20 class RegExpNode; 21 class RegExpTree; 22 23 enum class RegExpCompilationTarget : int { kBytecode, kNative }; 24 25 // TODO(jgruber): Do not expose in regexp.h. 26 // TODO(jgruber): Consider splitting between ParseData and CompileData. 27 struct RegExpCompileData { 28 // The parsed AST as produced by the RegExpParser. 29 RegExpTree* tree = nullptr; 30 31 // The compiled Node graph as produced by RegExpTree::ToNode methods. 32 RegExpNode* node = nullptr; 33 34 // Either the generated code as produced by the compiler or a trampoline 35 // to the interpreter. 36 Handle<Object> code; 37 38 // True, iff the pattern is a 'simple' atom with zero captures. In other 39 // words, the pattern consists of a string with no metacharacters and special 40 // regexp features, and can be implemented as a standard string search. 41 bool simple = true; 42 43 // True, iff the pattern is anchored at the start of the string with '^'. 44 bool contains_anchor = false; 45 46 // Only set if the pattern contains named captures. 47 // Note: the lifetime equals that of the parse/compile zone. 48 ZoneVector<RegExpCapture*>* named_captures = nullptr; 49 50 // The error message. Only used if an error occurred during parsing or 51 // compilation. 52 RegExpError error = RegExpError::kNone; 53 54 // The position at which the error was detected. Only used if an 55 // error occurred. 56 int error_pos = 0; 57 58 // The number of capture groups, without the global capture \0. 59 int capture_count = 0; 60 61 // The number of registers used by the generated code. 62 int register_count = 0; 63 64 // The compilation target (bytecode or native code). 65 RegExpCompilationTarget compilation_target; 66 }; 67 68 class RegExp final : public AllStatic { 69 public: 70 // Whether the irregexp engine generates interpreter bytecode. 71 static bool CanGenerateBytecode(); 72 73 // Verify the given pattern, i.e. check that parsing succeeds. If 74 // verification fails, `regexp_error_out` is set. 75 template <class CharT> 76 static bool VerifySyntax(Zone* zone, uintptr_t stack_limit, 77 const CharT* input, int input_length, 78 RegExpFlags flags, RegExpError* regexp_error_out, 79 const DisallowGarbageCollection& no_gc); 80 81 // Parses the RegExp pattern and prepares the JSRegExp object with 82 // generic data and choice of implementation - as well as what 83 // the implementation wants to store in the data field. 84 // Returns false if compilation fails. 85 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile( 86 Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, 87 RegExpFlags flags, uint32_t backtrack_limit); 88 89 // Ensures that a regexp is fully compiled and ready to be executed on a 90 // subject string. Returns true on success. Return false on failure, and 91 // then an exception will be pending. 92 V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, 93 Handle<JSRegExp> re, 94 Handle<String> subject); 95 96 enum CallOrigin : int { 97 kFromRuntime = 0, 98 kFromJs = 1, 99 }; 100 101 enum class ExecQuirks { 102 kNone, 103 // Used to work around an issue in the RegExpPrototypeSplit fast path, 104 // which diverges from the spec by not creating a sticky copy of the RegExp 105 // instance and calling `exec` in a loop. If called in this context, we 106 // must not update the last_match_info on a successful match at the subject 107 // string end. See crbug.com/1075514 for more information. 108 kTreatMatchAtEndAsFailure, 109 }; 110 111 // See ECMA-262 section 15.10.6.2. 112 // This function calls the garbage collector if necessary. 113 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( 114 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 115 int index, Handle<RegExpMatchInfo> last_match_info, 116 ExecQuirks exec_quirks = ExecQuirks::kNone); 117 118 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> 119 ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, 120 Handle<String> subject, int index, 121 Handle<RegExpMatchInfo> last_match_info, 122 ExecQuirks exec_quirks = ExecQuirks::kNone); 123 124 // Integral return values used throughout regexp code layers. 125 static constexpr int kInternalRegExpFailure = 0; 126 static constexpr int kInternalRegExpSuccess = 1; 127 static constexpr int kInternalRegExpException = -1; 128 static constexpr int kInternalRegExpRetry = -2; 129 static constexpr int kInternalRegExpFallbackToExperimental = -3; 130 static constexpr int kInternalRegExpSmallestResult = -3; 131 132 enum IrregexpResult : int32_t { 133 RE_FAILURE = kInternalRegExpFailure, 134 RE_SUCCESS = kInternalRegExpSuccess, 135 RE_EXCEPTION = kInternalRegExpException, 136 RE_RETRY = kInternalRegExpRetry, 137 RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, 138 }; 139 140 // Set last match info. If match is nullptr, then setting captures is 141 // omitted. 142 static Handle<RegExpMatchInfo> SetLastMatchInfo( 143 Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, 144 Handle<String> subject, int capture_count, int32_t* match); 145 146 V8_EXPORT_PRIVATE static bool CompileForTesting( 147 Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags, 148 Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte); 149 150 V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, 151 RegExpNode* node); 152 153 static const int kRegExpTooLargeToOptimize = 20 * KB; 154 155 V8_WARN_UNUSED_RESULT 156 static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate, 157 Handle<JSRegExp> re, 158 Handle<String> pattern, 159 RegExpError error); 160 static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, 161 RegExpError error_text); 162 163 static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); 164 165 static Handle<FixedArray> CreateCaptureNameMap( 166 Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures); 167 }; 168 169 // Uses a special global mode of irregexp-generated code to perform a global 170 // search and return multiple results at once. As such, this is essentially an 171 // iterator over multiple results (retrieved batch-wise in advance). 172 class RegExpGlobalCache final { 173 public: 174 RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject, 175 Isolate* isolate); 176 177 ~RegExpGlobalCache(); 178 179 // Fetch the next entry in the cache for global regexp match results. 180 // This does not set the last match info. Upon failure, nullptr is 181 // returned. The cause can be checked with Result(). The previous result is 182 // still in available in memory when a failure happens. 183 int32_t* FetchNext(); 184 185 int32_t* LastSuccessfulMatch(); 186 HasException()187 bool HasException() { return num_matches_ < 0; } 188 189 private: 190 int AdvanceZeroLength(int last_index); 191 192 int num_matches_; 193 int max_matches_; 194 int current_match_index_; 195 int registers_per_match_; 196 // Pointer to the last set of captures. 197 int32_t* register_array_; 198 int register_array_size_; 199 Handle<JSRegExp> regexp_; 200 Handle<String> subject_; 201 Isolate* isolate_; 202 }; 203 204 // Caches results for specific regexp queries on the isolate. At the time of 205 // writing, this is used during global calls to RegExp.prototype.exec and 206 // @@split. 207 class RegExpResultsCache final : public AllStatic { 208 public: 209 enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; 210 211 // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. 212 // On success, the returned result is guaranteed to be a COW-array. 213 static Object Lookup(Heap* heap, String key_string, Object key_pattern, 214 FixedArray* last_match_out, ResultsCacheType type); 215 // Attempt to add value_array to the cache specified by type. On success, 216 // value_array is turned into a COW-array. 217 static void Enter(Isolate* isolate, Handle<String> key_string, 218 Handle<Object> key_pattern, Handle<FixedArray> value_array, 219 Handle<FixedArray> last_match_cache, ResultsCacheType type); 220 static void Clear(FixedArray cache); 221 222 static constexpr int kRegExpResultsCacheSize = 0x100; 223 224 private: 225 static constexpr int kStringOffset = 0; 226 static constexpr int kPatternOffset = 1; 227 static constexpr int kArrayOffset = 2; 228 static constexpr int kLastMatchOffset = 3; 229 static constexpr int kArrayEntriesPerCacheEntry = 4; 230 }; 231 232 } // namespace internal 233 } // namespace v8 234 235 #endif // V8_REGEXP_REGEXP_H_ 236