1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/builtins/builtins_regexp.h" 20 #include "ecmascript/regexp/regexp_parser.h" 21 #include "ecmascript/mem/regexp_cached_chunk.h" 22 #include "ecmascript/js_handle.h" 23 24 namespace panda::ecmascript { 25 class RegExpExecutor { 26 public: 27 struct CaptureState { 28 const uint8_t *captureStart; 29 const uint8_t *captureEnd; 30 }; 31 32 enum StateType : uint8_t { 33 STATE_SPLIT = 0, 34 STATE_MATCH_AHEAD, 35 STATE_NEGATIVE_MATCH_AHEAD, 36 }; 37 38 struct RegExpState { 39 StateType type_ = STATE_SPLIT; 40 uint32_t currentPc_ = 0; 41 uint32_t currentStack_ = 0; 42 const uint8_t *currentPtr_ = nullptr; 43 __extension__ CaptureState *captureResultList_[0]; // NOLINT(modernize-avoid-c-arrays) 44 }; 45 RegExpExecutor(RegExpCachedChunk * chunk)46 explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk) 47 { 48 ASSERT(chunk_ != nullptr); 49 }; 50 51 ~RegExpExecutor() = default; 52 53 NO_COPY_SEMANTIC(RegExpExecutor); 54 NO_MOVE_SEMANTIC(RegExpExecutor); 55 56 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 57 58 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); HandleFirstSplit()59 inline bool HandleFirstSplit() 60 { 61 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && 62 (flags_ & RegExpParser::FLAG_STICKY) == 0) { 63 if (IsEOF()) { 64 if (MatchFailed()) { 65 return false; 66 } 67 } else { 68 AdvanceCurrentPtr(); 69 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 70 } 71 } 72 return true; 73 } 74 HandleOpAll(uint8_t opCode)75 inline bool HandleOpAll(uint8_t opCode) 76 { 77 if (IsEOF()) { 78 return !MatchFailed(); 79 } 80 uint32_t currentChar = GetCurrentChar(); 81 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { 82 return !MatchFailed(); 83 } 84 Advance(opCode); 85 return true; 86 } 87 HandleOpChar(const DynChunk & byteCode,uint8_t opCode)88 inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) 89 { 90 uint32_t expectedChar = 0; 91 if (opCode == RegExpOpCode::OP_CHAR32) { 92 expectedChar = byteCode.GetU32(GetCurrentPC() + 1); 93 } else { 94 expectedChar = byteCode.GetU16(GetCurrentPC() + 1); 95 } 96 if (IsEOF()) { 97 return !MatchFailed(); 98 } 99 uint32_t currentChar = GetCurrentChar(); 100 if (IsIgnoreCase()) { 101 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 102 } 103 if (currentChar == expectedChar) { 104 Advance(opCode); 105 } else { 106 if (MatchFailed()) { 107 return false; 108 } 109 } 110 return true; 111 } 112 HandleOpWordBoundary(uint8_t opCode)113 inline bool HandleOpWordBoundary(uint8_t opCode) 114 { 115 bool preIsWord = false; 116 if (GetCurrentPtr() != input_) { 117 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 118 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); 119 } 120 bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_)); 121 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && 122 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || 123 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && 124 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { 125 Advance(opCode); 126 } else { 127 if (MatchFailed()) { 128 return false; 129 } 130 } 131 return true; 132 } 133 HandleOpLineStart(uint8_t opCode)134 inline bool HandleOpLineStart(uint8_t opCode) 135 { 136 if ((GetCurrentPtr() == input_) || 137 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 138 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { 139 Advance(opCode); 140 } else { 141 if (MatchFailed()) { 142 return false; 143 } 144 } 145 return true; 146 } 147 HandleOpLineEnd(uint8_t opCode)148 inline bool HandleOpLineEnd(uint8_t opCode) 149 { 150 if (IsEOF() || 151 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 152 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { 153 Advance(opCode); 154 } else { 155 if (MatchFailed()) { 156 return false; 157 } 158 } 159 return true; 160 } 161 HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)162 inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) 163 { 164 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 165 ASSERT(captureIndex < nCapture_); 166 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 167 CaptureState *captureState = &captureResultList_[captureIndex]; 168 captureState->captureStart = GetCurrentPtr(); 169 Advance(opCode); 170 } 171 HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)172 inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) 173 { 174 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 175 ASSERT(captureIndex < nCapture_); 176 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 177 CaptureState *captureState = &captureResultList_[captureIndex]; 178 captureState->captureEnd = GetCurrentPtr(); 179 Advance(opCode); 180 } 181 HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)182 inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) 183 { 184 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); 185 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); 186 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { 187 CaptureState *captureState = 188 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 189 captureState->captureStart = nullptr; 190 captureState->captureEnd = nullptr; 191 } 192 Advance(opCode); 193 } 194 HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)195 inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) 196 { 197 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT); 198 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); 199 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 200 Advance(opCode); 201 uint32_t splitPc = GetCurrentPC() + offset; 202 PushRegExpState(type, splitPc); 203 } 204 HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)205 inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) 206 { 207 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 208 Advance(opCode); 209 PushRegExpState(STATE_SPLIT, GetCurrentPC()); 210 AdvanceOffset(offset); 211 } 212 HandleOpPrev(uint8_t opCode)213 inline bool HandleOpPrev(uint8_t opCode) 214 { 215 if (GetCurrentPtr() == input_) { 216 if (MatchFailed()) { 217 return false; 218 } 219 } else { 220 PrevPtr(¤tPtr_, input_); 221 Advance(opCode); 222 } 223 return true; 224 } 225 HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)226 inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) 227 { 228 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); 229 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); 230 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); 231 Advance(opCode); 232 uint32_t loopPcEnd = GetCurrentPC(); 233 uint32_t loopPcStart = GetCurrentPC() + pcOffset; 234 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; 235 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; 236 237 uint32_t loopCount = PeekStack(); 238 SetStackValue(++loopCount); 239 if (loopCount < loopMax) { 240 // greedy failed, goto next 241 if (loopCount >= quantifyMin) { 242 PushRegExpState(STATE_SPLIT, loopPcEnd); 243 } 244 // Goto loop start 245 SetCurrentPC(loopPcStart); 246 } else { 247 if (!isGreedy && (loopCount < quantifyMax)) { 248 PushRegExpState(STATE_SPLIT, loopPcStart); 249 } 250 } 251 } 252 HandleOpRange32(const DynChunk & byteCode)253 inline bool HandleOpRange32(const DynChunk &byteCode) 254 { 255 if (IsEOF()) { 256 return !MatchFailed(); 257 } 258 uint32_t currentChar = GetCurrentChar(); 259 if (IsIgnoreCase()) { 260 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 261 } 262 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 263 bool isFound = false; 264 int32_t idxMin = 0; 265 int32_t idxMax = static_cast<int32_t>(rangeCount) - 1; 266 int32_t idx = 0; 267 uint32_t low = 0; 268 uint32_t high = 269 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + 270 RANGE32_MAX_HALF_OFFSET); 271 if (currentChar <= high) { 272 while (idxMin <= idxMax) { 273 idx = (idxMin + idxMax) / RANGE32_OFFSET; 274 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 275 RANGE32_MAX_OFFSET); 276 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 277 RANGE32_MAX_OFFSET + 278 RANGE32_MAX_HALF_OFFSET); 279 if (currentChar < low) { 280 idxMax = idx - 1; 281 } else if (currentChar > high) { 282 idxMin = idx + 1; 283 } else { 284 isFound = true; 285 break; 286 } 287 } 288 } 289 if (isFound) { 290 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); 291 } else { 292 if (MatchFailed()) { 293 return false; 294 } 295 } 296 return true; 297 } 298 HandleOpRange(const DynChunk & byteCode)299 inline bool HandleOpRange(const DynChunk &byteCode) 300 { 301 if (IsEOF()) { 302 return !MatchFailed(); 303 } 304 uint32_t currentChar = GetCurrentChar(); 305 uint32_t currentCharNext = currentChar; 306 if (IsIgnoreCase()) { 307 currentCharNext = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar)); 308 } 309 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 310 if (IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount) || 311 IsFoundOpRange(GetCurrentPC(), currentCharNext, byteCode, rangeCount)) { 312 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); 313 } else { 314 if (MatchFailed()) { 315 return false; 316 } 317 } 318 return true; 319 } HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)320 inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) 321 { 322 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 323 if (captureIndex >= nCapture_) { 324 return !MatchFailed(); 325 } 326 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 327 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; 328 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 329 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; 330 if (captureStart == nullptr || captureEnd == nullptr) { 331 Advance(opCode); 332 return true; 333 } 334 bool isMatched = true; 335 if (opCode == RegExpOpCode::OP_BACKREFERENCE) { 336 const uint8_t *refCptr = captureStart; 337 while (refCptr < captureEnd) { 338 if (IsEOF()) { 339 isMatched = false; 340 break; 341 } 342 // NOLINTNEXTLINE(readability-identifier-naming) 343 uint32_t c1 = GetChar(&refCptr, captureEnd); 344 // NOLINTNEXTLINE(readability-identifier-naming) 345 uint32_t c2 = GetChar(¤tPtr_, inputEnd_); 346 if (IsIgnoreCase()) { 347 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 348 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 349 } 350 if (c1 != c2) { 351 isMatched = false; 352 break; 353 } 354 } 355 if (!isMatched) { 356 if (MatchFailed()) { 357 return false; 358 } 359 } else { 360 Advance(opCode); 361 } 362 } else { 363 const uint8_t *refCptr = captureEnd; 364 while (refCptr > captureStart) { 365 if (GetCurrentPtr() == input_) { 366 isMatched = false; 367 break; 368 } 369 // NOLINTNEXTLINE(readability-identifier-naming) 370 uint32_t c1 = GetPrevChar(&refCptr, captureStart); 371 // NOLINTNEXTLINE(readability-identifier-naming) 372 uint32_t c2 = GetPrevChar(¤tPtr_, input_); 373 if (IsIgnoreCase()) { 374 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 375 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 376 } 377 if (c1 != c2) { 378 isMatched = false; 379 break; 380 } 381 } 382 if (!isMatched) { 383 if (MatchFailed()) { 384 return false; 385 } 386 } else { 387 Advance(opCode); 388 } 389 } 390 return true; 391 } 392 393 inline void Advance(uint8_t opCode, uint32_t offset = 0) 394 { 395 currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); 396 } 397 AdvanceOffset(uint32_t offset)398 inline void AdvanceOffset(uint32_t offset) 399 { 400 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 401 currentPc_ += offset; 402 } 403 GetCurrentChar()404 inline uint32_t GetCurrentChar() 405 { 406 return GetChar(¤tPtr_, inputEnd_); 407 } 408 AdvanceCurrentPtr()409 inline void AdvanceCurrentPtr() 410 { 411 AdvancePtr(¤tPtr_, inputEnd_); 412 } 413 GetChar(const uint8_t ** pp,const uint8_t * end)414 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 415 { 416 uint32_t c = 0; 417 const uint8_t *cptr = *pp; 418 if (!isWideChar_) { 419 c = *cptr; 420 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 421 } else { 422 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 423 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 424 c = c1; 425 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 426 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 427 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 428 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 429 if (U16_IS_TRAIL(c1)) { 430 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 431 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 432 } 433 } 434 *pp = cptr; 435 } 436 return c; 437 } 438 PeekChar(const uint8_t * p,const uint8_t * end)439 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 440 { 441 uint32_t c = 0; 442 const uint8_t *cptr = p; 443 if (!isWideChar_) { 444 c = *cptr; 445 } else { 446 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 447 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 448 c = c1; 449 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 450 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 451 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 452 if (U16_IS_TRAIL(c1)) { 453 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 454 } 455 } 456 } 457 return c; 458 } 459 AdvancePtr(const uint8_t ** pp,const uint8_t * end)460 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 461 { 462 const uint8_t *cptr = *pp; 463 if (!isWideChar_) { 464 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 465 } else { 466 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 467 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 468 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 469 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 470 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 471 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 472 if (U16_IS_TRAIL(c1)) { 473 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 474 } 475 } 476 *pp = cptr; 477 } 478 } 479 PeekPrevChar(const uint8_t * p,const uint8_t * start)480 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 481 { 482 uint32_t c = 0; 483 const uint8_t *cptr = p; 484 if (!isWideChar_) { 485 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 486 } else { 487 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 488 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 489 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 490 c = c1; 491 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 492 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 493 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 494 if (U16_IS_LEAD(c1)) { 495 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 496 } 497 } 498 } 499 return c; 500 } 501 GetPrevChar(const uint8_t ** pp,const uint8_t * start)502 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 503 { 504 uint32_t c = 0; 505 const uint8_t *cptr = *pp; 506 if (!isWideChar_) { 507 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 508 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 509 *pp = cptr; 510 } else { 511 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 512 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 513 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 514 c = c1; 515 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 516 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 517 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 518 if (U16_IS_LEAD(c1)) { 519 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 520 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 521 } 522 } 523 *pp = cptr; 524 } 525 return c; 526 } 527 PrevPtr(const uint8_t ** pp,const uint8_t * start)528 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 529 { 530 const uint8_t *cptr = *pp; 531 if (!isWideChar_) { 532 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 533 *pp = cptr; 534 } else { 535 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 536 uint16_t c1 = *(const uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 537 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 538 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 539 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 540 if (U16_IS_LEAD(c1)) { 541 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 542 } 543 } 544 *pp = cptr; 545 } 546 } 547 548 bool MatchFailed(bool isMatched = false); 549 SetCurrentPC(uint32_t pc)550 void SetCurrentPC(uint32_t pc) 551 { 552 currentPc_ = pc; 553 } 554 SetCurrentPtr(const uint8_t * ptr)555 void SetCurrentPtr(const uint8_t *ptr) 556 { 557 currentPtr_ = ptr; 558 } 559 IsEOF()560 bool IsEOF() const 561 { 562 return currentPtr_ >= inputEnd_; 563 } 564 IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)565 bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar, 566 const DynChunk &byteCode, const uint16_t rangeCount) 567 { 568 bool isFound = false; 569 int32_t idxMin = 0; 570 int32_t idxMax = static_cast<int32_t>(rangeCount - 1); 571 int32_t idx = 0; 572 uint32_t low = 0; 573 uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + 574 static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 575 if (nowChar <= high) { 576 while (idxMin <= idxMax) { 577 idx = (idxMin + idxMax) / RANGE32_OFFSET; 578 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 579 RANGE32_MAX_HALF_OFFSET); 580 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 581 RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 582 if (nowChar < low) { 583 idxMax = idx - 1; 584 } else if (nowChar > high) { 585 idxMin = idx + 1; 586 } else { 587 isFound = true; 588 break; 589 } 590 } 591 } 592 return isFound; 593 } 594 GetCurrentPC()595 uint32_t GetCurrentPC() const 596 { 597 return currentPc_; 598 } 599 PushStack(uintptr_t val)600 void PushStack(uintptr_t val) 601 { 602 ASSERT(currentStack_ < nStack_); 603 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 604 stack_[currentStack_++] = val; 605 } 606 SetStackValue(uintptr_t val)607 void SetStackValue(uintptr_t val) const 608 { 609 ASSERT(currentStack_ >= 1); 610 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 611 stack_[currentStack_ - 1] = val; 612 } 613 PopStack()614 uintptr_t PopStack() 615 { 616 ASSERT(currentStack_ >= 1); 617 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 618 return stack_[--currentStack_]; 619 } 620 PeekStack()621 uintptr_t PeekStack() const 622 { 623 ASSERT(currentStack_ >= 1); 624 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 625 return stack_[currentStack_ - 1]; 626 } 627 GetCurrentPtr()628 const uint8_t *GetCurrentPtr() const 629 { 630 return currentPtr_; 631 } 632 GetCaptureResultList()633 CaptureState *GetCaptureResultList() const 634 { 635 return captureResultList_; 636 } 637 638 void DumpResult(std::ostream &out) const; 639 640 void GetResult(JSThread *thread); 641 642 void PushRegExpState(StateType type, uint32_t pc); 643 644 RegExpState *PopRegExpState(bool copyCaptrue = true); 645 DropRegExpState()646 void DropRegExpState() 647 { 648 stateStackLen_--; 649 } 650 PeekRegExpState()651 RegExpState *PeekRegExpState() const 652 { 653 ASSERT(stateStackLen_ >= 1); 654 return reinterpret_cast<RegExpState *>( 655 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 656 (stateStackLen_ - 1) * stateSize_); 657 } 658 659 void ReAllocStack(uint32_t stackLen); 660 IsWordChar(uint8_t value)661 inline bool IsWordChar(uint8_t value) const 662 { 663 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 664 (value == '_')); 665 } 666 IsTerminator(uint32_t value)667 inline bool IsTerminator(uint32_t value) const 668 { 669 // NOLINTNEXTLINE(readability-magic-numbers) 670 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 671 } 672 IsIgnoreCase()673 inline bool IsIgnoreCase() const 674 { 675 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 676 } 677 IsUtf16()678 inline bool IsUtf16() const 679 { 680 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 681 } 682 683 private: 684 static constexpr size_t CHAR_SIZE = 1; 685 static constexpr size_t WIDE_CHAR_SIZE = 2; 686 static constexpr size_t SAVE_RESET_START = 1; 687 static constexpr size_t SAVE_RESET_END = 2; 688 static constexpr size_t LOOP_MIN_OFFSET = 5; 689 static constexpr size_t LOOP_MAX_OFFSET = 9; 690 static constexpr size_t LOOP_PC_OFFSET = 1; 691 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 692 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 693 static constexpr size_t RANGE32_MAX_OFFSET = 8; 694 static constexpr size_t RANGE32_OFFSET = 2; 695 static constexpr uint32_t STACK_MULTIPLIER = 2; 696 static constexpr uint32_t MIN_STACK_SIZE = 8; 697 static constexpr int TMP_BUF_SIZE = 128; 698 uint8_t *input_ = nullptr; 699 uint8_t *inputEnd_ = nullptr; 700 bool isWideChar_ = false; 701 702 uint32_t currentPc_ = 0; 703 const uint8_t *currentPtr_ = nullptr; 704 CaptureState *captureResultList_ = nullptr; 705 uintptr_t *stack_ = nullptr; 706 uint32_t currentStack_ = 0; 707 708 uint32_t nCapture_ = 0; 709 uint32_t nStack_ = 0; 710 711 uint32_t flags_ = 0; 712 uint32_t stateStackLen_ = 0; 713 uint32_t stateStackSize_ = 0; 714 uint32_t stateSize_ = 0; 715 uint8_t *stateStack_ = nullptr; 716 RegExpCachedChunk *chunk_ = nullptr; 717 }; 718 } // namespace panda::ecmascript 719 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 720