1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 17 #define ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 18 19 #include "ecmascript/base/builtins_base.h" 20 #include "ecmascript/builtins/builtins_string.h" 21 #include "ecmascript/ecma_runtime_call_info.h" 22 #include "ecmascript/js_tagged_value.h" 23 #include "ecmascript/regexp/regexp_executor.h" 24 #include "ecmascript/regexp/regexp_parser.h" 25 26 namespace panda::ecmascript::builtins { 27 class BuiltinsRegExp : public base::BuiltinsBase { 28 public: 29 // 21.2.3.1 RegExp ( pattern, flags ) 30 static JSTaggedValue RegExpConstructor(EcmaRuntimeCallInfo *argv); 31 32 // prototype 33 // 21.2.5.2 RegExp.prototype.exec ( string ) 34 static JSTaggedValue Exec(EcmaRuntimeCallInfo *argv); 35 // 21.2.5.13 RegExp.prototype.test( S ) 36 static JSTaggedValue Test(EcmaRuntimeCallInfo *argv); 37 // 21.2.5.14 RegExp.prototype.toString ( ) 38 static JSTaggedValue ToString(EcmaRuntimeCallInfo *argv); 39 // 21.2.5.3 get RegExp.prototype.flags 40 static JSTaggedValue GetFlags(EcmaRuntimeCallInfo *argv); 41 // 21.2.5.4 get RegExp.prototype.global 42 static JSTaggedValue GetGlobal(EcmaRuntimeCallInfo *argv); 43 // 21.2.5.5 get RegExp.prototype.ignoreCase 44 static JSTaggedValue GetIgnoreCase(EcmaRuntimeCallInfo *argv); 45 // 21.2.5.7 get RegExp.prototype.multiline 46 static JSTaggedValue GetMultiline(EcmaRuntimeCallInfo *argv); 47 static JSTaggedValue GetDotAll(EcmaRuntimeCallInfo *argv); 48 // 21.2.5.10 get RegExp.prototype.source 49 static JSTaggedValue GetSource(EcmaRuntimeCallInfo *argv); 50 // 21.2.5.12 get RegExp.prototype.sticky 51 static JSTaggedValue GetSticky(EcmaRuntimeCallInfo *argv); 52 // 21.2.5.15 get RegExp.prototype.unicode 53 static JSTaggedValue GetUnicode(EcmaRuntimeCallInfo *argv); 54 // 21.2.4.2 get RegExp [ @@species ] 55 static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv); 56 // 21.2.5.6 RegExp.prototype [ @@match ] ( string ) 57 static JSTaggedValue Match(EcmaRuntimeCallInfo *argv); 58 // 22.2.5.8 RegExp.prototype [ @@matchAll ] ( string ) 59 static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv); 60 // 21.2.5.8 RegExp.prototype [ @@replace ] ( string, replaceValue ) 61 static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv); 62 // 21.2.5.9 RegExp.prototype [ @@search ] ( string ) 63 static JSTaggedValue Search(EcmaRuntimeCallInfo *argv); 64 // 21.2.5.11 RegExp.prototype [ @@split ] ( string, limit ) 65 static JSTaggedValue Split(EcmaRuntimeCallInfo *argv); 66 // 21.2.3.2.3 Runtime Semantics: RegExpCreate ( P, F ) 67 static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle<JSTaggedValue> &pattern, 68 const JSHandle<JSTaggedValue> &flags); 69 static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags); 70 // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S ) 71 static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 72 const JSHandle<JSTaggedValue> &inputString, bool useCache); 73 // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode ) 74 static int64_t AdvanceStringIndex(const JSHandle<JSTaggedValue> &inputStr, int64_t index, 75 bool unicode); 76 // 22.2.6.6 get RegExp.prototype.hasIndices 77 static JSTaggedValue GetHasIndices(EcmaRuntimeCallInfo *argv); 78 79 static JSTaggedValue ReplaceInternal(JSThread *thread, 80 JSHandle<JSTaggedValue> thisObj, 81 JSHandle<JSTaggedValue> string, 82 JSHandle<JSTaggedValue> inputReplaceValue); 83 static JSTaggedValue GetAllFlagsInternal(JSThread *thread, JSHandle<JSTaggedValue> &thisObj); 84 static JSTaggedValue IsValidRegularExpression(JSThread *thread, JSHandle<JSTaggedValue> &thisObj); 85 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 86 #define SET_GET_CAPTURE(index) \ 87 static JSTaggedValue GetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj); \ 88 static bool SetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj, \ 89 const JSHandle<JSTaggedValue> &value, bool mayThrow); 90 91 SET_GET_CAPTURE(1) 92 SET_GET_CAPTURE(2) 93 SET_GET_CAPTURE(3) 94 SET_GET_CAPTURE(4) 95 SET_GET_CAPTURE(5) 96 SET_GET_CAPTURE(6) 97 SET_GET_CAPTURE(7) 98 SET_GET_CAPTURE(8) 99 SET_GET_CAPTURE(9) 100 #undef SET_GET_CAPTURE 101 102 private: 103 static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000; 104 static constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFu; 105 static constexpr uint32_t REGEXP_GLOBAL_ARRAY_SIZE = 9; 106 static constexpr uint32_t LAST_INDEX_OFFSET = 0; 107 static constexpr uint32_t MAX_REGEXP_STRING_COUNT = 1U << 16; 108 109 static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 110 const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16); 111 112 static JSTaggedValue GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedValue> &obj, 113 const JSHandle<JSTaggedValue> &constructor, const uint8_t mask); 114 // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S ) 115 static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 116 const JSHandle<JSTaggedValue> &inputStr, bool useCache); 117 118 // 21.2.3.2.1 Runtime Semantics: RegExpAlloc ( newTarget ) 119 static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle<JSTaggedValue> &newTarget); 120 121 static uint32_t UpdateExpressionFlags(JSThread *thread, const CString &checkStr); 122 123 // 21.2.3.2.2 Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 124 static JSTaggedValue RegExpInitialize(JSThread *thread, const JSHandle<JSTaggedValue> &obj, 125 const JSHandle<JSTaggedValue> &pattern, const JSHandle<JSTaggedValue> &flags); 126 // 21.2.3.2.4 Runtime Semantics: EscapeRegExpPattern ( P, F ) 127 static EcmaString *EscapeRegExpPattern(JSThread *thread, const JSHandle<JSTaggedValue> &src, 128 const JSHandle<JSTaggedValue> &flags); 129 static JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle<JSTaggedValue> ®exp, 130 JSHandle<EcmaString> inputString, uint32_t inputLength); 131 static JSTaggedValue RegExpTestFast(JSThread *thread, JSHandle<JSTaggedValue> ®exp, 132 const JSHandle<JSTaggedValue> &inputString, bool useCache); 133 static JSTaggedValue RegExpExecForTestFast(JSThread *thread, JSHandle<JSTaggedValue> ®exp, 134 const JSHandle<JSTaggedValue> &inputStr, bool useCache); 135 static bool IsFastRegExp(JSThread *thread, JSHandle<JSTaggedValue> ®exp); 136 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups ) 137 static JSHandle<JSTaggedValue> MakeMatchIndicesIndexPairArray(JSThread* thread, 138 const std::vector<std::pair<JSTaggedValue, JSTaggedValue>>& indices, 139 const std::vector<JSHandle<JSTaggedValue>>& groupNames, bool hasGroups); 140 static bool RegExpExecInternal(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 141 JSHandle<EcmaString> &inputString, int32_t lastIndex); 142 static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 143 JSHandle<EcmaString> string, uint32_t limit, bool useCache); 144 }; 145 146 class RegExpExecResultCache : public TaggedArray { 147 public: 148 enum CacheType { 149 REPLACE_TYPE, 150 SPLIT_TYPE, 151 MATCH_TYPE, 152 EXEC_TYPE, 153 INTERMEDIATE_REPLACE_TYPE, 154 TEST_TYPE 155 }; Cast(TaggedObject * object)156 static RegExpExecResultCache *Cast(TaggedObject *object) 157 { 158 return reinterpret_cast<RegExpExecResultCache *>(object); 159 } 160 static JSTaggedValue CreateCacheTable(JSThread *thread); 161 // extend as an additional parameter to judge cached 162 JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle<JSTaggedValue> &patten, 163 const JSHandle<JSTaggedValue> &flags, const JSHandle<JSTaggedValue> &input, 164 CacheType type, const JSHandle<JSTaggedValue> ®exp, 165 JSTaggedValue lastIndexInput, JSTaggedValue extend = JSTaggedValue::Undefined(), 166 bool isIntermediateResult = false); 167 // extend as an additional parameter to judge cached 168 static void AddResultInCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache, 169 const JSHandle<JSTaggedValue> &patten, const JSHandle<JSTaggedValue> &flags, 170 const JSHandle<JSTaggedValue> &input, const JSHandle<JSTaggedValue> &resultArray, 171 CacheType type, uint32_t lastIndexInput, uint32_t lastIndex, 172 JSTaggedValue extend = JSTaggedValue::Undefined(), 173 bool isIntermediateResult = false); 174 175 static void GrowRegexpCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache); 176 177 void ClearEntry(JSThread *thread, int entry); 178 void SetEntry(JSThread *thread, int entry, JSTaggedValue &patten, JSTaggedValue &flags, JSTaggedValue &input, 179 JSTaggedValue &lastIndexInputValue, JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue); 180 void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type); 181 bool Match(int entry, JSTaggedValue &pattenStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr, 182 JSTaggedValue &lastIndexInputValue, JSTaggedValue &extend, CacheType type); SetHitCount(JSThread * thread,int hitCount)183 inline void SetHitCount(JSThread *thread, int hitCount) 184 { 185 Set(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount)); 186 } 187 GetHitCount()188 inline int GetHitCount() 189 { 190 return Get(CACHE_HIT_COUNT_INDEX).GetInt(); 191 } 192 SetCacheCount(JSThread * thread,int hitCount)193 inline void SetCacheCount(JSThread *thread, int hitCount) 194 { 195 Set(thread, CACHE_COUNT_INDEX, JSTaggedValue(hitCount)); 196 } 197 GetCacheCount()198 inline int GetCacheCount() 199 { 200 return Get(CACHE_COUNT_INDEX).GetInt(); 201 } 202 Print()203 void Print() 204 { 205 std::cout << "cache count: " << GetCacheCount() << std::endl; 206 std::cout << "cache hit count: " << GetHitCount() << std::endl; 207 } 208 SetLargeStrCount(JSThread * thread,uint32_t newCount)209 inline void SetLargeStrCount(JSThread *thread, uint32_t newCount) 210 { 211 Set(thread, LARGE_STRING_COUNT_INDEX, JSTaggedValue(newCount)); 212 } 213 SetConflictCount(JSThread * thread,uint32_t newCount)214 inline void SetConflictCount(JSThread *thread, uint32_t newCount) 215 { 216 Set(thread, CONFLICT_COUNT_INDEX, JSTaggedValue(newCount)); 217 } 218 SetStrLenThreshold(JSThread * thread,uint32_t newThreshold)219 inline void SetStrLenThreshold(JSThread *thread, uint32_t newThreshold) 220 { 221 Set(thread, STRING_LENGTH_THRESHOLD_INDEX, JSTaggedValue(newThreshold)); 222 } 223 GetLargeStrCount()224 inline uint32_t GetLargeStrCount() 225 { 226 return Get(LARGE_STRING_COUNT_INDEX).GetInt(); 227 } 228 GetConflictCount()229 inline uint32_t GetConflictCount() 230 { 231 return Get(CONFLICT_COUNT_INDEX).GetInt(); 232 } 233 GetStrLenThreshold()234 inline uint32_t GetStrLenThreshold() 235 { 236 return Get(STRING_LENGTH_THRESHOLD_INDEX).GetInt(); 237 } 238 SetCacheLength(JSThread * thread,int length)239 inline void SetCacheLength(JSThread *thread, int length) 240 { 241 Set(thread, CACHE_LENGTH_INDEX, JSTaggedValue(length)); 242 } 243 GetCacheLength()244 inline int GetCacheLength() 245 { 246 return Get(CACHE_LENGTH_INDEX).GetInt(); 247 } 248 249 private: 250 static constexpr int DEFAULT_LARGE_STRING_COUNT = 10; 251 static constexpr int DEFAULT_CONFLICT_COUNT = 100; 252 static constexpr int INITIAL_CACHE_NUMBER = 0x10; 253 static constexpr int DEFAULT_CACHE_NUMBER = 0x1000; 254 static constexpr int CACHE_COUNT_INDEX = 0; 255 static constexpr int CACHE_HIT_COUNT_INDEX = 1; 256 static constexpr int LARGE_STRING_COUNT_INDEX = 2; 257 static constexpr int CONFLICT_COUNT_INDEX = 3; 258 static constexpr int STRING_LENGTH_THRESHOLD_INDEX = 4; 259 static constexpr int CACHE_LENGTH_INDEX = 5; 260 static constexpr int CACHE_TABLE_HEADER_SIZE = 6; 261 static constexpr int PATTERN_INDEX = 0; 262 static constexpr int FLAG_INDEX = 1; 263 static constexpr int INPUT_STRING_INDEX = 2; 264 static constexpr int LAST_INDEX_INPUT_INDEX = 3; 265 static constexpr int LAST_INDEX_INDEX = 4; 266 static constexpr int RESULT_REPLACE_INDEX = 5; 267 static constexpr int RESULT_SPLIT_INDEX = 6; 268 static constexpr int RESULT_MATCH_INDEX = 7; 269 static constexpr int RESULT_EXEC_INDEX = 8; 270 static constexpr int RESULT_INTERMEDIATE_REPLACE_INDEX = 9; 271 static constexpr int RESULT_TEST_INDEX = 10; 272 // Extend index used for saving an additional parameter to judge cached 273 static constexpr int EXTEND_INDEX = 11; 274 static constexpr int ENTRY_SIZE = 12; 275 }; 276 277 class RegExpGlobalResult : public TaggedArray { 278 public: Cast(TaggedObject * object)279 static RegExpGlobalResult *Cast(TaggedObject *object) 280 { 281 return reinterpret_cast<RegExpGlobalResult *>(object); 282 } 283 static JSTaggedValue CreateGlobalResultTable(JSThread *thread); 284 SetCapture(JSThread * thread,int index,JSTaggedValue value)285 void SetCapture(JSThread *thread, int index, JSTaggedValue value) 286 { 287 ASSERT(CAPTURE_START_INDEX + index - 1 < GLOBAL_TABLE_SIZE); 288 Set(thread, CAPTURE_START_INDEX + index - 1, value); 289 } 290 ResetDollar(JSThread * thread)291 void ResetDollar(JSThread *thread) 292 { 293 for (uint32_t i = 0; i < DOLLAR_NUMBER; i++) { 294 Set(thread, CAPTURE_START_INDEX + i, JSTaggedValue::Hole()); 295 } 296 } 297 298 template <int N> GetCapture(JSThread * thread)299 static JSTaggedValue GetCapture(JSThread *thread) 300 { 301 JSHandle<builtins::RegExpGlobalResult> globalTable(thread->GetCurrentEcmaContext()->GetRegExpGlobalResult()); 302 JSTaggedValue res = globalTable->Get(CAPTURE_START_INDEX + N - 1); 303 int captureNum = globalTable->GetTotalCaptureCounts().GetInt(); 304 if (res.IsHole() && (N < captureNum)) { 305 uint32_t startIndex = static_cast<uint32_t>(globalTable->GetStartOfCaptureIndex(N).GetInt()); 306 uint32_t endIndex = static_cast<uint32_t>(globalTable->GetEndOfCaptureIndex(N).GetInt()); 307 uint32_t len = endIndex - startIndex; 308 if (len < 0) { 309 res = JSTaggedValue::Undefined(); 310 } else { 311 res = JSTaggedValue(EcmaStringAccessor::FastSubString(thread->GetEcmaVM(), 312 JSHandle<EcmaString>(thread, EcmaString::Cast(globalTable->GetInputString())), startIndex, len)); 313 } 314 globalTable->Set(thread, CAPTURE_START_INDEX + N - 1, res); 315 } else if (res.IsHole()) { 316 res = thread->GetEcmaVM()->GetFactory()->GetEmptyString().GetTaggedValue(); 317 globalTable->Set(thread, CAPTURE_START_INDEX + N - 1, res); 318 } 319 return res; 320 } 321 SetTotalCaptureCounts(JSThread * thread,JSTaggedValue counts)322 void SetTotalCaptureCounts(JSThread *thread, JSTaggedValue counts) 323 { 324 Set(thread, TOTAL_CAPTURE_COUNTS_INDEX, counts); 325 } 326 GetTotalCaptureCounts()327 JSTaggedValue GetTotalCaptureCounts() 328 { 329 return Get(TOTAL_CAPTURE_COUNTS_INDEX); 330 } 331 SetEndIndex(JSThread * thread,JSTaggedValue endIndex)332 void SetEndIndex(JSThread *thread, JSTaggedValue endIndex) 333 { 334 Set(thread, END_INDEX, endIndex); 335 } 336 GetEndIndex()337 JSTaggedValue GetEndIndex() 338 { 339 return Get(END_INDEX); 340 } 341 SetInputString(JSThread * thread,JSTaggedValue string)342 void SetInputString(JSThread *thread, JSTaggedValue string) 343 { 344 Set(thread, INPUT_STRING_INDEX, string); 345 } 346 GetInputString()347 JSTaggedValue GetInputString() 348 { 349 return Get(INPUT_STRING_INDEX); 350 } 351 SetStartOfCaptureIndex(JSThread * thread,uint32_t index,JSTaggedValue value)352 void SetStartOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value) 353 { 354 Set(thread, FIRST_CAPTURE_INDEX + index * 2, value); // 2 : double 355 } 356 SetEndOfCaptureIndex(JSThread * thread,uint32_t index,JSTaggedValue value)357 void SetEndOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value) 358 { 359 Set(thread, FIRST_CAPTURE_INDEX + index * 2 + 1, value); // 2 : double 360 } 361 GetStartOfCaptureIndex(uint32_t index)362 JSTaggedValue GetStartOfCaptureIndex(uint32_t index) 363 { 364 return Get(FIRST_CAPTURE_INDEX + index * 2); // 2 : double 365 } 366 GetEndOfCaptureIndex(uint32_t index)367 JSTaggedValue GetEndOfCaptureIndex(uint32_t index) 368 { 369 return Get(FIRST_CAPTURE_INDEX + index * 2 + 1); // 2 : double 370 } 371 372 static JSHandle<RegExpGlobalResult> GrowCapturesCapacity(JSThread *thread, 373 JSHandle<RegExpGlobalResult>result, uint32_t length); 374 375 static constexpr int FIRST_CAPTURE_INDEX = 12; // capture index starts here 376 377 private: 378 static constexpr int GLOBAL_TABLE_SIZE = 12; // initial length 379 static constexpr int DOLLAR_NUMBER = 9; 380 static constexpr int CAPTURE_START_INDEX = 0; 381 382 static constexpr int TOTAL_CAPTURE_COUNTS_INDEX = 9; // save total capture size 383 static constexpr int INPUT_STRING_INDEX = 10; // save input string 384 static constexpr int END_INDEX = 11; // save last index 385 static constexpr int INITIAL_CAPTURE_INDICES = 18; // length: pairs of capture start index and end index 386 }; 387 } // namespace panda::ecmascript::builtins 388 #endif // ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 389