1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/regexp/regexp_parser.h" 20 #include "ecmascript/mem/chunk.h" 21 22 namespace panda::ecmascript { 23 class RegExpExecutor { 24 public: 25 struct CaptureState { 26 const uint8_t *captureStart; 27 const uint8_t *captureEnd; 28 }; 29 30 enum StateType : uint8_t { 31 STATE_SPLIT = 0, 32 STATE_MATCH_AHEAD, 33 STATE_NEGATIVE_MATCH_AHEAD, 34 }; 35 36 struct RegExpState { 37 StateType type_ = STATE_SPLIT; 38 uint32_t currentPc_ = 0; 39 uint32_t currentStack_ = 0; 40 const uint8_t *currentPtr_ = nullptr; 41 __extension__ CaptureState *captureResultList_[0]; // NOLINT(modernize-avoid-c-arrays) 42 }; 43 44 struct MatchResult { 45 uint32_t endIndex_ = 0; 46 uint32_t index_ = 0; 47 // first value is true if result is undefined 48 std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_; 49 bool isSuccess_ = false; 50 }; 51 RegExpExecutor(Chunk * chunk)52 explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk) 53 { 54 ASSERT(chunk_ != nullptr); 55 }; 56 57 ~RegExpExecutor() = default; 58 59 NO_COPY_SEMANTIC(RegExpExecutor); 60 NO_MOVE_SEMANTIC(RegExpExecutor); 61 62 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 63 64 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); HandleFirstSplit()65 inline bool HandleFirstSplit() 66 { 67 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && 68 (flags_ & RegExpParser::FLAG_STICKY) == 0) { 69 if (IsEOF()) { 70 if (MatchFailed()) { 71 return false; 72 } 73 } else { 74 AdvanceCurrentPtr(); 75 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 76 } 77 } 78 return true; 79 } 80 HandleOpAll(uint8_t opCode)81 inline bool HandleOpAll(uint8_t opCode) 82 { 83 if (IsEOF()) { 84 return !MatchFailed(); 85 } 86 uint32_t currentChar = GetCurrentChar(); 87 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { 88 return !MatchFailed(); 89 } 90 Advance(opCode); 91 return true; 92 } 93 HandleOpChar(const DynChunk & byteCode,uint8_t opCode)94 inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) 95 { 96 uint32_t expectedChar = 0; 97 if (opCode == RegExpOpCode::OP_CHAR32) { 98 expectedChar = byteCode.GetU32(GetCurrentPC() + 1); 99 } else { 100 expectedChar = byteCode.GetU16(GetCurrentPC() + 1); 101 } 102 if (IsEOF()) { 103 return !MatchFailed(); 104 } 105 uint32_t currentChar = GetCurrentChar(); 106 if (IsIgnoreCase()) { 107 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 108 } 109 if (currentChar == expectedChar) { 110 Advance(opCode); 111 } else { 112 if (MatchFailed()) { 113 return false; 114 } 115 } 116 return true; 117 } 118 HandleOpWordBoundary(uint8_t opCode)119 inline bool HandleOpWordBoundary(uint8_t opCode) 120 { 121 bool preIsWord = false; 122 if (GetCurrentPtr() != input_) { 123 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 124 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); 125 } 126 bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_)); 127 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && 128 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || 129 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && 130 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { 131 Advance(opCode); 132 } else { 133 if (MatchFailed()) { 134 return false; 135 } 136 } 137 return true; 138 } 139 HandleOpLineStart(uint8_t opCode)140 inline bool HandleOpLineStart(uint8_t opCode) 141 { 142 if ((GetCurrentPtr() == input_) || 143 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 144 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { 145 Advance(opCode); 146 } else { 147 if (MatchFailed()) { 148 return false; 149 } 150 } 151 return true; 152 } 153 HandleOpLineEnd(uint8_t opCode)154 inline bool HandleOpLineEnd(uint8_t opCode) 155 { 156 if (IsEOF() || 157 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 158 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { 159 Advance(opCode); 160 } else { 161 if (MatchFailed()) { 162 return false; 163 } 164 } 165 return true; 166 } 167 HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)168 inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) 169 { 170 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 171 ASSERT(captureIndex < nCapture_); 172 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 173 CaptureState *captureState = &captureResultList_[captureIndex]; 174 captureState->captureStart = GetCurrentPtr(); 175 Advance(opCode); 176 } 177 HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)178 inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) 179 { 180 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 181 ASSERT(captureIndex < nCapture_); 182 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 183 CaptureState *captureState = &captureResultList_[captureIndex]; 184 captureState->captureEnd = GetCurrentPtr(); 185 Advance(opCode); 186 } 187 HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)188 inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) 189 { 190 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); 191 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); 192 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { 193 CaptureState *captureState = 194 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 195 captureState->captureStart = nullptr; 196 captureState->captureEnd = nullptr; 197 } 198 Advance(opCode); 199 } 200 HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)201 inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) 202 { 203 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT); 204 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); 205 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 206 Advance(opCode); 207 uint32_t splitPc = GetCurrentPC() + offset; 208 PushRegExpState(type, splitPc); 209 } 210 HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)211 inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) 212 { 213 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 214 Advance(opCode); 215 PushRegExpState(STATE_SPLIT, GetCurrentPC()); 216 AdvanceOffset(offset); 217 } 218 HandleOpPrev(uint8_t opCode)219 inline bool HandleOpPrev(uint8_t opCode) 220 { 221 if (GetCurrentPtr() == input_) { 222 if (MatchFailed()) { 223 return false; 224 } 225 } else { 226 PrevPtr(¤tPtr_, input_); 227 Advance(opCode); 228 } 229 return true; 230 } 231 HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)232 inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) 233 { 234 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); 235 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); 236 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); 237 Advance(opCode); 238 uint32_t loopPcEnd = GetCurrentPC(); 239 uint32_t loopPcStart = GetCurrentPC() + pcOffset; 240 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; 241 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; 242 243 uint32_t loopCount = PeekStack(); 244 SetStackValue(++loopCount); 245 if (loopCount < loopMax) { 246 // greedy failed, goto next 247 if (loopCount >= quantifyMin) { 248 PushRegExpState(STATE_SPLIT, loopPcEnd); 249 } 250 // Goto loop start 251 SetCurrentPC(loopPcStart); 252 } else { 253 if (!isGreedy && (loopCount < quantifyMax)) { 254 PushRegExpState(STATE_SPLIT, loopPcStart); 255 } 256 } 257 } 258 HandleOpRange32(const DynChunk & byteCode)259 inline bool HandleOpRange32(const DynChunk &byteCode) 260 { 261 if (IsEOF()) { 262 return !MatchFailed(); 263 } 264 uint32_t currentChar = GetCurrentChar(); 265 if (IsIgnoreCase()) { 266 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 267 } 268 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 269 bool isFound = false; 270 int32_t idxMin = 0; 271 int32_t idxMax = static_cast<int32_t>(rangeCount) - 1; 272 int32_t idx = 0; 273 uint32_t low = 0; 274 uint32_t high = 275 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + 276 RANGE32_MAX_HALF_OFFSET); 277 if (currentChar <= high) { 278 while (idxMin <= idxMax) { 279 idx = (idxMin + idxMax) / RANGE32_OFFSET; 280 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 281 RANGE32_MAX_OFFSET); 282 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 283 RANGE32_MAX_OFFSET + 284 RANGE32_MAX_HALF_OFFSET); 285 if (currentChar < low) { 286 idxMax = idx - 1; 287 } else if (currentChar > high) { 288 idxMin = idx + 1; 289 } else { 290 isFound = true; 291 break; 292 } 293 } 294 } 295 if (isFound) { 296 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); 297 } else { 298 if (MatchFailed()) { 299 return false; 300 } 301 } 302 return true; 303 } 304 HandleOpRange(const DynChunk & byteCode)305 inline bool HandleOpRange(const DynChunk &byteCode) 306 { 307 if (IsEOF()) { 308 return !MatchFailed(); 309 } 310 uint32_t currentChar = GetCurrentChar(); 311 uint32_t currentCharNext = currentChar; 312 if (IsIgnoreCase()) { 313 currentCharNext = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar)); 314 } 315 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 316 if (IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount) || 317 IsFoundOpRange(GetCurrentPC(), currentCharNext, byteCode, rangeCount)) { 318 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); 319 } else { 320 if (MatchFailed()) { 321 return false; 322 } 323 } 324 return true; 325 } HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)326 inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) 327 { 328 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 329 if (captureIndex >= nCapture_) { 330 return !MatchFailed(); 331 } 332 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 333 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; 334 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 335 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; 336 if (captureStart == nullptr || captureEnd == nullptr) { 337 Advance(opCode); 338 return true; 339 } 340 bool isMatched = true; 341 if (opCode == RegExpOpCode::OP_BACKREFERENCE) { 342 const uint8_t *refCptr = captureStart; 343 while (refCptr < captureEnd) { 344 if (IsEOF()) { 345 isMatched = false; 346 break; 347 } 348 // NOLINTNEXTLINE(readability-identifier-naming) 349 uint32_t c1 = GetChar(&refCptr, captureEnd); 350 // NOLINTNEXTLINE(readability-identifier-naming) 351 uint32_t c2 = GetChar(¤tPtr_, inputEnd_); 352 if (IsIgnoreCase()) { 353 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 354 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 355 } 356 if (c1 != c2) { 357 isMatched = false; 358 break; 359 } 360 } 361 if (!isMatched) { 362 if (MatchFailed()) { 363 return false; 364 } 365 } else { 366 Advance(opCode); 367 } 368 } else { 369 const uint8_t *refCptr = captureEnd; 370 while (refCptr > captureStart) { 371 if (GetCurrentPtr() == input_) { 372 isMatched = false; 373 break; 374 } 375 // NOLINTNEXTLINE(readability-identifier-naming) 376 uint32_t c1 = GetPrevChar(&refCptr, captureStart); 377 // NOLINTNEXTLINE(readability-identifier-naming) 378 uint32_t c2 = GetPrevChar(¤tPtr_, input_); 379 if (IsIgnoreCase()) { 380 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 381 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 382 } 383 if (c1 != c2) { 384 isMatched = false; 385 break; 386 } 387 } 388 if (!isMatched) { 389 if (MatchFailed()) { 390 return false; 391 } 392 } else { 393 Advance(opCode); 394 } 395 } 396 return true; 397 } 398 399 inline void Advance(uint8_t opCode, uint32_t offset = 0) 400 { 401 currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); 402 } 403 AdvanceOffset(uint32_t offset)404 inline void AdvanceOffset(uint32_t offset) 405 { 406 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 407 currentPc_ += offset; 408 } 409 GetCurrentChar()410 inline uint32_t GetCurrentChar() 411 { 412 return GetChar(¤tPtr_, inputEnd_); 413 } 414 AdvanceCurrentPtr()415 inline void AdvanceCurrentPtr() 416 { 417 AdvancePtr(¤tPtr_, inputEnd_); 418 } 419 GetChar(const uint8_t ** pp,const uint8_t * end)420 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 421 { 422 uint32_t c = 0; 423 const uint8_t *cptr = *pp; 424 if (!isWideChar_) { 425 c = *cptr; 426 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 427 } else { 428 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 429 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 430 c = c1; 431 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 432 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 433 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 434 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 435 if (U16_IS_TRAIL(c1)) { 436 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 437 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 438 } 439 } 440 *pp = cptr; 441 } 442 return c; 443 } 444 PeekChar(const uint8_t * p,const uint8_t * end)445 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 446 { 447 uint32_t c = 0; 448 const uint8_t *cptr = p; 449 if (!isWideChar_) { 450 c = *cptr; 451 } else { 452 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 453 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 454 c = c1; 455 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 456 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 457 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 458 if (U16_IS_TRAIL(c1)) { 459 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 460 } 461 } 462 } 463 return c; 464 } 465 AdvancePtr(const uint8_t ** pp,const uint8_t * end)466 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 467 { 468 const uint8_t *cptr = *pp; 469 if (!isWideChar_) { 470 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 471 } else { 472 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 473 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 474 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 475 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 476 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 477 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 478 if (U16_IS_TRAIL(c1)) { 479 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 480 } 481 } 482 *pp = cptr; 483 } 484 } 485 PeekPrevChar(const uint8_t * p,const uint8_t * start)486 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 487 { 488 uint32_t c = 0; 489 const uint8_t *cptr = p; 490 if (!isWideChar_) { 491 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 492 } else { 493 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 494 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 495 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 496 c = c1; 497 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 498 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 499 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 500 if (U16_IS_LEAD(c1)) { 501 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 502 } 503 } 504 } 505 return c; 506 } 507 GetPrevChar(const uint8_t ** pp,const uint8_t * start)508 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 509 { 510 uint32_t c = 0; 511 const uint8_t *cptr = *pp; 512 if (!isWideChar_) { 513 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 514 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 515 *pp = cptr; 516 } else { 517 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 518 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 519 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 520 c = c1; 521 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 522 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 523 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 524 if (U16_IS_LEAD(c1)) { 525 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 526 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 527 } 528 } 529 *pp = cptr; 530 } 531 return c; 532 } 533 PrevPtr(const uint8_t ** pp,const uint8_t * start)534 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 535 { 536 const uint8_t *cptr = *pp; 537 if (!isWideChar_) { 538 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 539 *pp = cptr; 540 } else { 541 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 542 uint16_t c1 = *(const uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 543 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 544 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 545 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 546 if (U16_IS_LEAD(c1)) { 547 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 548 } 549 } 550 *pp = cptr; 551 } 552 } 553 554 bool MatchFailed(bool isMatched = false); 555 SetCurrentPC(uint32_t pc)556 void SetCurrentPC(uint32_t pc) 557 { 558 currentPc_ = pc; 559 } 560 SetCurrentPtr(const uint8_t * ptr)561 void SetCurrentPtr(const uint8_t *ptr) 562 { 563 currentPtr_ = ptr; 564 } 565 IsEOF()566 bool IsEOF() const 567 { 568 return currentPtr_ >= inputEnd_; 569 } 570 IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)571 bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar, 572 const DynChunk &byteCode, const uint16_t rangeCount) 573 { 574 bool isFound = false; 575 int32_t idxMin = 0; 576 int32_t idxMax = static_cast<int32_t>(rangeCount - 1); 577 int32_t idx = 0; 578 uint32_t low = 0; 579 uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + 580 static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 581 if (nowChar <= high) { 582 while (idxMin <= idxMax) { 583 idx = (idxMin + idxMax) / RANGE32_OFFSET; 584 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 585 RANGE32_MAX_HALF_OFFSET); 586 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 587 RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 588 if (nowChar < low) { 589 idxMax = idx - 1; 590 } else if (nowChar > high) { 591 idxMin = idx + 1; 592 } else { 593 isFound = true; 594 break; 595 } 596 } 597 } 598 return isFound; 599 } 600 GetCurrentPC()601 uint32_t GetCurrentPC() const 602 { 603 return currentPc_; 604 } 605 PushStack(uintptr_t val)606 void PushStack(uintptr_t val) 607 { 608 ASSERT(currentStack_ < nStack_); 609 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 610 stack_[currentStack_++] = val; 611 } 612 SetStackValue(uintptr_t val)613 void SetStackValue(uintptr_t val) const 614 { 615 ASSERT(currentStack_ >= 1); 616 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 617 stack_[currentStack_ - 1] = val; 618 } 619 PopStack()620 uintptr_t PopStack() 621 { 622 ASSERT(currentStack_ >= 1); 623 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 624 return stack_[--currentStack_]; 625 } 626 PeekStack()627 uintptr_t PeekStack() const 628 { 629 ASSERT(currentStack_ >= 1); 630 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 631 return stack_[currentStack_ - 1]; 632 } 633 GetCurrentPtr()634 const uint8_t *GetCurrentPtr() const 635 { 636 return currentPtr_; 637 } 638 GetCaptureResultList()639 CaptureState *GetCaptureResultList() const 640 { 641 return captureResultList_; 642 } 643 644 void DumpResult(std::ostream &out) const; 645 646 MatchResult GetResult(const JSThread *thread, bool isSuccess) const; 647 648 void PushRegExpState(StateType type, uint32_t pc); 649 650 RegExpState *PopRegExpState(bool copyCaptrue = true); 651 DropRegExpState()652 void DropRegExpState() 653 { 654 stateStackLen_--; 655 } 656 PeekRegExpState()657 RegExpState *PeekRegExpState() const 658 { 659 ASSERT(stateStackLen_ >= 1); 660 return reinterpret_cast<RegExpState *>( 661 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 662 (stateStackLen_ - 1) * stateSize_); 663 } 664 665 void ReAllocStack(uint32_t stackLen); 666 IsWordChar(uint8_t value)667 inline bool IsWordChar(uint8_t value) const 668 { 669 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 670 (value == '_')); 671 } 672 IsTerminator(uint32_t value)673 inline bool IsTerminator(uint32_t value) const 674 { 675 // NOLINTNEXTLINE(readability-magic-numbers) 676 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 677 } 678 IsIgnoreCase()679 inline bool IsIgnoreCase() const 680 { 681 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 682 } 683 IsUtf16()684 inline bool IsUtf16() const 685 { 686 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 687 } 688 689 private: 690 static constexpr size_t CHAR_SIZE = 1; 691 static constexpr size_t WIDE_CHAR_SIZE = 2; 692 static constexpr size_t SAVE_RESET_START = 1; 693 static constexpr size_t SAVE_RESET_END = 2; 694 static constexpr size_t LOOP_MIN_OFFSET = 5; 695 static constexpr size_t LOOP_MAX_OFFSET = 9; 696 static constexpr size_t LOOP_PC_OFFSET = 1; 697 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 698 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 699 static constexpr size_t RANGE32_MAX_OFFSET = 8; 700 static constexpr size_t RANGE32_OFFSET = 2; 701 static constexpr uint32_t STACK_MULTIPLIER = 2; 702 static constexpr uint32_t MIN_STACK_SIZE = 8; 703 static constexpr int TMP_BUF_SIZE = 128; 704 uint8_t *input_ = nullptr; 705 uint8_t *inputEnd_ = nullptr; 706 bool isWideChar_ = false; 707 708 uint32_t currentPc_ = 0; 709 const uint8_t *currentPtr_ = nullptr; 710 CaptureState *captureResultList_ = nullptr; 711 uintptr_t *stack_ = nullptr; 712 uint32_t currentStack_ = 0; 713 714 uint32_t nCapture_ = 0; 715 uint32_t nStack_ = 0; 716 717 uint32_t flags_ = 0; 718 uint32_t stateStackLen_ = 0; 719 uint32_t stateStackSize_ = 0; 720 uint32_t stateSize_ = 0; 721 uint8_t *stateStack_ = nullptr; 722 Chunk *chunk_ = nullptr; 723 }; 724 } // namespace panda::ecmascript 725 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 726