1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/builtins/builtins_regexp.h" 20 #include "ecmascript/global_env.h" 21 #include "ecmascript/js_tagged_value-inl.h" 22 #include "ecmascript/js_handle.h" 23 #include "ecmascript/mem/regexp_cached_chunk.h" 24 #include "ecmascript/regexp/regexp_parser.h" 25 26 namespace panda::ecmascript { 27 class RegExpExecutor { 28 public: 29 struct CaptureState { 30 const uint8_t *captureStart; 31 const uint8_t *captureEnd; 32 }; 33 34 enum StateType : uint8_t { 35 STATE_SPLIT = 0, /* Do not re-order. */ 36 STATE_NEGATIVE_MATCH_AHEAD, /* OP_NEGATIVE_MATCH_AHEAD - OP_SPLIT_NEXT */ 37 STATE_MATCH_AHEAD, /* OP_MATCH_AHEAD - OP_SPLIT_NEXT */ 38 STATE_SAVE, 39 STATE_PUSH, 40 STATE_POP, 41 STATE_SET, 42 STATE_INVALID, 43 }; 44 45 struct RegExpState { 46 StateType type_ = STATE_SPLIT; 47 uint32_t currentPc_ = 0; 48 const uint8_t *currentPtr_ = nullptr; 49 }; 50 RegExpExecutor(RegExpCachedChunk * chunk)51 explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk) 52 { 53 ASSERT(chunk_ != nullptr); 54 }; 55 56 ~RegExpExecutor() = default; 57 58 NO_COPY_SEMANTIC(RegExpExecutor); 59 NO_MOVE_SEMANTIC(RegExpExecutor); 60 61 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 62 63 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); HandleFirstSplit()64 inline bool HandleFirstSplit() 65 { 66 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && 67 (flags_ & RegExpParser::FLAG_STICKY) == 0) { 68 if (IsEOF()) { 69 if (MatchFailed()) { 70 return false; 71 } 72 } else if (prefilter_ && !isWideChar_) { 73 ++currentPtr_; 74 currentPtr_ = (const uint8_t *)memchr(currentPtr_, prefilter_, inputEnd_ - currentPtr_); 75 if (currentPtr_ == nullptr) { 76 currentPtr_ = inputEnd_; 77 } 78 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 79 } else { 80 AdvanceCurrentPtr(); 81 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 82 } 83 } 84 return true; 85 } 86 HandleOpAll(uint8_t opCode)87 inline bool HandleOpAll(uint8_t opCode) 88 { 89 if (IsEOF()) { 90 return !MatchFailed(); 91 } 92 uint32_t currentChar = GetCurrentChar(); 93 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { 94 return !MatchFailed(); 95 } 96 Advance(opCode); 97 return true; 98 } 99 HandleOpChar(const DynChunk & byteCode,uint8_t opCode)100 inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) 101 { 102 uint32_t expectedChar = 0; 103 if (opCode == RegExpOpCode::OP_CHAR32) { 104 expectedChar = byteCode.GetU32(GetCurrentPC() + 1); 105 } else { 106 expectedChar = byteCode.GetU16(GetCurrentPC() + 1); 107 } 108 if (IsEOF()) { 109 return !MatchFailed(); 110 } 111 uint32_t currentChar = GetCurrentChar(); 112 if (IsIgnoreCase()) { 113 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 114 } 115 if (currentChar == expectedChar) { 116 Advance(opCode); 117 } else { 118 if (MatchFailed()) { 119 return false; 120 } 121 } 122 return true; 123 } 124 HandleOpWordBoundary(uint8_t opCode)125 inline bool HandleOpWordBoundary(uint8_t opCode) 126 { 127 bool preIsWord = false; 128 if (GetCurrentPtr() != input_) { 129 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 130 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); 131 } 132 bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_)); 133 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && 134 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || 135 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && 136 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { 137 Advance(opCode); 138 } else { 139 if (MatchFailed()) { 140 return false; 141 } 142 } 143 return true; 144 } 145 HandleOpLineStart(uint8_t opCode)146 inline bool HandleOpLineStart(uint8_t opCode) 147 { 148 if ((GetCurrentPtr() == input_) || 149 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 150 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { 151 Advance(opCode); 152 } else { 153 if (MatchFailed()) { 154 return false; 155 } 156 } 157 return true; 158 } 159 HandleOpLineEnd(uint8_t opCode)160 inline bool HandleOpLineEnd(uint8_t opCode) 161 { 162 if (IsEOF() || 163 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 164 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { 165 Advance(opCode); 166 } else { 167 if (MatchFailed()) { 168 return false; 169 } 170 } 171 return true; 172 } 173 HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)174 inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) 175 { 176 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 177 ASSERT(captureIndex < nCapture_); 178 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 179 CaptureState *captureState = &captureResultList_[captureIndex]; 180 // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 181 PushRegExpState(STATE_SAVE, captureIndex * 2, reinterpret_cast<uintptr_t>(captureState->captureStart)); 182 captureState->captureStart = GetCurrentPtr(); 183 Advance(opCode); 184 } 185 HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)186 inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) 187 { 188 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 189 ASSERT(captureIndex < nCapture_); 190 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 191 CaptureState *captureState = &captureResultList_[captureIndex]; 192 // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 193 PushRegExpState(STATE_SAVE, captureIndex * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd)); 194 captureState->captureEnd = GetCurrentPtr(); 195 Advance(opCode); 196 } 197 HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)198 inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) 199 { 200 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); 201 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); 202 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { 203 CaptureState *captureState = 204 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 205 // 2: Even indexes store captureStart. 206 // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 207 PushRegExpState(STATE_SAVE, i * 2, reinterpret_cast<uintptr_t>(captureState->captureStart)); 208 // 2: Even indexes store captureStart. 209 // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 210 PushRegExpState(STATE_SAVE, i * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd)); 211 captureState->captureStart = nullptr; 212 captureState->captureEnd = nullptr; 213 } 214 Advance(opCode); 215 } 216 HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)217 inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) 218 { 219 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT); 220 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); 221 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 222 Advance(opCode); 223 uint32_t splitPc = GetCurrentPC() + offset; 224 PushRegExpState(type, splitPc); 225 } 226 HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)227 inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) 228 { 229 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 230 Advance(opCode); 231 PushRegExpState(STATE_SPLIT, GetCurrentPC()); 232 AdvanceOffset(offset); 233 } 234 HandleOpPrev(uint8_t opCode)235 inline bool HandleOpPrev(uint8_t opCode) 236 { 237 if (GetCurrentPtr() == input_) { 238 if (MatchFailed()) { 239 return false; 240 } 241 } else { 242 PrevPtr(¤tPtr_, input_); 243 Advance(opCode); 244 } 245 return true; 246 } 247 HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)248 inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) 249 { 250 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); 251 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); 252 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); 253 Advance(opCode); 254 uint32_t loopPcEnd = GetCurrentPC(); 255 uint32_t loopPcStart = GetCurrentPC() + pcOffset; 256 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; 257 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; 258 259 uint32_t loopCount = PeekStack(); 260 PushRegExpState(StateType::STATE_SET, 0, loopCount); 261 SetStackValue(++loopCount); 262 if (loopCount < loopMax) { 263 // greedy failed, goto next 264 if (loopCount >= quantifyMin) { 265 PushRegExpState(STATE_SPLIT, loopPcEnd); 266 } 267 // Goto loop start 268 SetCurrentPC(loopPcStart); 269 } else { 270 if (!isGreedy && (loopCount < quantifyMax)) { 271 PushRegExpState(STATE_SPLIT, loopPcStart); 272 } 273 } 274 } 275 HandleOpRange32(const DynChunk & byteCode)276 inline bool HandleOpRange32(const DynChunk &byteCode) 277 { 278 if (IsEOF()) { 279 return !MatchFailed(); 280 } 281 uint32_t currentChar = GetCurrentChar(); 282 if (IsIgnoreCase()) { 283 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 284 } 285 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 286 bool isFound = false; 287 int32_t idxMin = 0; 288 int32_t idxMax = static_cast<int32_t>(rangeCount) - 1; 289 int32_t idx = 0; 290 uint32_t low = 0; 291 uint32_t high = 292 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + 293 RANGE32_MAX_HALF_OFFSET); 294 if (currentChar <= high) { 295 while (idxMin <= idxMax) { 296 idx = (idxMin + idxMax) / RANGE32_OFFSET; 297 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 298 RANGE32_MAX_OFFSET); 299 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 300 RANGE32_MAX_OFFSET + 301 RANGE32_MAX_HALF_OFFSET); 302 if (currentChar < low) { 303 idxMax = idx - 1; 304 } else if (currentChar > high) { 305 idxMin = idx + 1; 306 } else { 307 isFound = true; 308 break; 309 } 310 } 311 } 312 if (isFound) { 313 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); 314 } else { 315 if (MatchFailed()) { 316 return false; 317 } 318 } 319 return true; 320 } 321 HandleOpRange(const DynChunk & byteCode)322 inline bool HandleOpRange(const DynChunk &byteCode) 323 { 324 if (IsEOF()) { 325 return !MatchFailed(); 326 } 327 uint32_t currentChar = GetCurrentChar(); 328 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 329 bool flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount); 330 if (IsIgnoreCase() && !flag) { 331 currentChar = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar)); 332 flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount); 333 } 334 if (flag) { 335 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); 336 } else { 337 if (MatchFailed()) { 338 return false; 339 } 340 } 341 return true; 342 } 343 HandleOpSparse(const DynChunk & byteCode)344 inline bool HandleOpSparse(const DynChunk &byteCode) 345 { 346 if (IsEOF()) { 347 return !MatchFailed(); 348 } 349 uint32_t currentChar = GetCurrentChar(); 350 if (IsIgnoreCase()) { 351 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 352 } 353 uint16_t sparseCount = byteCode.GetU16(GetCurrentPC() + 1); 354 for (uint32_t i = 0; i < sparseCount; i++) { 355 uint32_t sparseChar = byteCode.GetU16(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET); 356 if (currentChar == sparseChar) { 357 uint32_t offset = byteCode.GetU32(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET + 358 SPARSE_OFF_OFFSET); 359 AdvanceOffset(offset + sparseCount * SPARSE_MAX_OFFSET + SPARSE_HEAD_OFFSET); 360 return true; 361 } 362 } 363 return !MatchFailed(); 364 } 365 HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)366 inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) 367 { 368 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 369 if (captureIndex >= nCapture_) { 370 return !MatchFailed(); 371 } 372 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 373 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; 374 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 375 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; 376 if (captureStart == nullptr || captureEnd == nullptr) { 377 Advance(opCode); 378 return true; 379 } 380 bool isMatched = true; 381 if (opCode == RegExpOpCode::OP_BACKREFERENCE) { 382 const uint8_t *refCptr = captureStart; 383 while (refCptr < captureEnd) { 384 if (IsEOF()) { 385 isMatched = false; 386 break; 387 } 388 // NOLINTNEXTLINE(readability-identifier-naming) 389 uint32_t c1 = GetChar(&refCptr, captureEnd); 390 // NOLINTNEXTLINE(readability-identifier-naming) 391 uint32_t c2 = GetChar(¤tPtr_, inputEnd_); 392 if (IsIgnoreCase()) { 393 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 394 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 395 } 396 if (c1 != c2) { 397 isMatched = false; 398 break; 399 } 400 } 401 if (!isMatched) { 402 if (MatchFailed()) { 403 return false; 404 } 405 } else { 406 Advance(opCode); 407 } 408 } else { 409 const uint8_t *refCptr = captureEnd; 410 while (refCptr > captureStart) { 411 if (GetCurrentPtr() == input_) { 412 isMatched = false; 413 break; 414 } 415 // NOLINTNEXTLINE(readability-identifier-naming) 416 uint32_t c1 = GetPrevChar(&refCptr, captureStart); 417 // NOLINTNEXTLINE(readability-identifier-naming) 418 uint32_t c2 = GetPrevChar(¤tPtr_, input_); 419 if (IsIgnoreCase()) { 420 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 421 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 422 } 423 if (c1 != c2) { 424 isMatched = false; 425 break; 426 } 427 } 428 if (!isMatched) { 429 if (MatchFailed()) { 430 return false; 431 } 432 } else { 433 Advance(opCode); 434 } 435 } 436 return true; 437 } 438 439 inline void Advance(uint8_t opCode, uint32_t offset = 0) 440 { 441 currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); 442 } 443 AdvanceOffset(uint32_t offset)444 inline void AdvanceOffset(uint32_t offset) 445 { 446 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 447 currentPc_ += offset; 448 } 449 GetCurrentChar()450 inline uint32_t GetCurrentChar() 451 { 452 return GetChar(¤tPtr_, inputEnd_); 453 } 454 AdvanceCurrentPtr()455 inline void AdvanceCurrentPtr() 456 { 457 AdvancePtr(¤tPtr_, inputEnd_); 458 } 459 GetChar(const uint8_t ** pp,const uint8_t * end)460 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 461 { 462 uint32_t c = 0; 463 const uint8_t *cptr = *pp; 464 if (!isWideChar_) { 465 c = *cptr; 466 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 467 } else { 468 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 469 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 470 c = c1; 471 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 472 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 473 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 474 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 475 if (U16_IS_TRAIL(c1)) { 476 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 477 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 478 } 479 } 480 *pp = cptr; 481 } 482 return c; 483 } 484 PeekChar(const uint8_t * p,const uint8_t * end)485 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 486 { 487 uint32_t c = 0; 488 const uint8_t *cptr = p; 489 if (!isWideChar_) { 490 c = *cptr; 491 } else { 492 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 493 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 494 c = c1; 495 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 496 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 497 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 498 if (U16_IS_TRAIL(c1)) { 499 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 500 } 501 } 502 } 503 return c; 504 } 505 AdvancePtr(const uint8_t ** pp,const uint8_t * end)506 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 507 { 508 const uint8_t *cptr = *pp; 509 if (!isWideChar_) { 510 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 511 } else { 512 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 513 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 514 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 515 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 516 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 517 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 518 if (U16_IS_TRAIL(c1)) { 519 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 520 } 521 } 522 *pp = cptr; 523 } 524 } 525 PeekPrevChar(const uint8_t * p,const uint8_t * start)526 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 527 { 528 uint32_t c = 0; 529 const uint8_t *cptr = p; 530 if (!isWideChar_) { 531 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 532 } else { 533 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 534 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 535 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 536 c = c1; 537 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 538 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 539 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 540 if (U16_IS_LEAD(c1)) { 541 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 542 } 543 } 544 } 545 return c; 546 } 547 GetPrevChar(const uint8_t ** pp,const uint8_t * start)548 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 549 { 550 uint32_t c = 0; 551 const uint8_t *cptr = *pp; 552 if (!isWideChar_) { 553 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 554 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 555 *pp = cptr; 556 } else { 557 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 558 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 559 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 560 c = c1; 561 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 562 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 563 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 564 if (U16_IS_LEAD(c1)) { 565 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 566 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 567 } 568 } 569 *pp = cptr; 570 } 571 return c; 572 } 573 PrevPtr(const uint8_t ** pp,const uint8_t * start)574 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 575 { 576 const uint8_t *cptr = *pp; 577 if (!isWideChar_) { 578 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 579 *pp = cptr; 580 } else { 581 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 582 uint16_t c1 = *(const uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 583 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 584 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 585 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 586 if (U16_IS_LEAD(c1)) { 587 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 588 } 589 } 590 *pp = cptr; 591 } 592 } 593 594 bool MatchFailed(bool isMatched = false); 595 SetCurrentPC(uint32_t pc)596 void SetCurrentPC(uint32_t pc) 597 { 598 currentPc_ = pc; 599 } 600 SetCurrentPtr(const uint8_t * ptr)601 void SetCurrentPtr(const uint8_t *ptr) 602 { 603 currentPtr_ = ptr; 604 } 605 IsEOF()606 bool IsEOF() const 607 { 608 return currentPtr_ >= inputEnd_; 609 } 610 IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)611 bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar, 612 const DynChunk &byteCode, const uint16_t rangeCount) 613 { 614 bool isFound = false; 615 int32_t idxMin = 0; 616 int32_t idxMax = static_cast<int32_t>(rangeCount - 1); 617 int32_t idx = 0; 618 uint32_t low = 0; 619 uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + 620 static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 621 if (nowChar <= high) { 622 while (idxMin <= idxMax) { 623 idx = (idxMin + idxMax) / RANGE32_OFFSET; 624 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 625 RANGE32_MAX_HALF_OFFSET); 626 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 627 RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 628 if (nowChar < low) { 629 idxMax = idx - 1; 630 } else if (nowChar > high) { 631 idxMin = idx + 1; 632 } else { 633 isFound = true; 634 break; 635 } 636 } 637 } 638 return isFound; 639 } 640 GetCurrentPC()641 uint32_t GetCurrentPC() const 642 { 643 return currentPc_; 644 } 645 PushStack(uintptr_t val)646 void PushStack(uintptr_t val) 647 { 648 ASSERT(currentStack_ < nStack_); 649 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 650 stack_[currentStack_++] = val; 651 } 652 SetStackValue(uintptr_t val)653 void SetStackValue(uintptr_t val) const 654 { 655 ASSERT(currentStack_ >= 1); 656 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 657 stack_[currentStack_ - 1] = val; 658 } 659 PopStack()660 uintptr_t PopStack() 661 { 662 ASSERT(currentStack_ >= 1); 663 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 664 return stack_[--currentStack_]; 665 } 666 PeekStack()667 uintptr_t PeekStack() const 668 { 669 ASSERT(currentStack_ >= 1); 670 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 671 return stack_[currentStack_ - 1]; 672 } 673 GetCurrentPtr()674 const uint8_t *GetCurrentPtr() const 675 { 676 return currentPtr_; 677 } 678 GetCaptureResultList()679 CaptureState *GetCaptureResultList() const 680 { 681 return captureResultList_; 682 } 683 684 void DumpResult(std::ostream &out) const; 685 686 void GetResult(JSThread *thread); 687 688 void PushRegExpState(StateType type, uint32_t pc); 689 void PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr); 690 691 StateType PopRegExpState(bool copyCapture = true); 692 DropRegExpState()693 void DropRegExpState() 694 { 695 stateStackLen_--; 696 } 697 PeekRegExpState()698 RegExpState *PeekRegExpState() const 699 { 700 ASSERT(stateStackLen_ >= 1); 701 return reinterpret_cast<RegExpState *>( 702 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 703 (stateStackLen_ - 1) * sizeof(RegExpState)); 704 } 705 706 void ReAllocStack(uint32_t stackLen); 707 IsWordChar(uint8_t value)708 inline bool IsWordChar(uint8_t value) const 709 { 710 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 711 (value == '_')); 712 } 713 IsTerminator(uint32_t value)714 inline bool IsTerminator(uint32_t value) const 715 { 716 // NOLINTNEXTLINE(readability-magic-numbers) 717 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 718 } 719 IsIgnoreCase()720 inline bool IsIgnoreCase() const 721 { 722 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 723 } 724 IsUtf16()725 inline bool IsUtf16() const 726 { 727 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 728 } 729 730 private: 731 static constexpr size_t CHAR_SIZE = 1; 732 static constexpr size_t WIDE_CHAR_SIZE = 2; 733 static constexpr size_t SAVE_RESET_START = 1; 734 static constexpr size_t SAVE_RESET_END = 2; 735 static constexpr size_t LOOP_MIN_OFFSET = 5; 736 static constexpr size_t LOOP_MAX_OFFSET = 9; 737 static constexpr size_t LOOP_PC_OFFSET = 1; 738 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 739 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 740 static constexpr size_t RANGE32_MAX_OFFSET = 8; 741 static constexpr size_t RANGE32_OFFSET = 2; 742 static constexpr size_t SPARSE_HEAD_OFFSET = 3; 743 static constexpr size_t SPARSE_OFF_OFFSET = 2; 744 static constexpr size_t SPARSE_MAX_OFFSET = 6; 745 static constexpr uint32_t STACK_MULTIPLIER = 2; 746 static constexpr uint32_t MIN_STACK_SIZE = 8; 747 static constexpr int TMP_BUF_SIZE = 128; 748 uint8_t *input_ = nullptr; 749 uint8_t *inputEnd_ = nullptr; 750 bool isWideChar_ = false; 751 uint16_t prefilter_ = 0; 752 753 uint32_t currentPc_ = 0; 754 const uint8_t *currentPtr_ = nullptr; 755 CaptureState *captureResultList_ = nullptr; 756 uintptr_t *stack_ = nullptr; 757 uint32_t currentStack_ = 0; 758 759 uint32_t nCapture_ = 0; 760 uint32_t nStack_ = 0; 761 762 uint32_t flags_ = 0; 763 uint32_t stateStackLen_ = 0; 764 uint32_t stateStackSize_ = 0; 765 uint8_t *stateStack_ = nullptr; 766 RegExpCachedChunk *chunk_ = nullptr; 767 }; 768 } // namespace panda::ecmascript 769 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 770