1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/regexp/regexp_parser.h" 20 #include "ecmascript/mem/chunk.h" 21 22 namespace panda::ecmascript { 23 class RegExpExecutor { 24 public: 25 struct CaptureState { 26 const uint8_t *captureStart; 27 const uint8_t *captureEnd; 28 }; 29 30 enum StateType : uint8_t { 31 STATE_SPLIT = 0, 32 STATE_MATCH_AHEAD, 33 STATE_NEGATIVE_MATCH_AHEAD, 34 }; 35 36 struct RegExpState { 37 StateType type_ = STATE_SPLIT; 38 uint32_t currentPc_ = 0; 39 uint32_t currentStack_ = 0; 40 const uint8_t *currentPtr_ = nullptr; 41 __extension__ CaptureState *captureResultList_[0]; // NOLINT(modernize-avoid-c-arrays) 42 }; 43 44 struct MatchResult { 45 uint32_t endIndex_ = 0; 46 uint32_t index_ = 0; 47 // first value is true if result is undefined 48 std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_; 49 bool isSuccess_ = false; 50 }; 51 RegExpExecutor(Chunk * chunk)52 explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk) 53 { 54 ASSERT(chunk_ != nullptr); 55 }; 56 57 ~RegExpExecutor() = default; 58 59 NO_COPY_SEMANTIC(RegExpExecutor); 60 NO_MOVE_SEMANTIC(RegExpExecutor); 61 62 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 63 64 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); HandleFirstSplit()65 inline bool HandleFirstSplit() 66 { 67 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && 68 (flags_ & RegExpParser::FLAG_STICKY) == 0) { 69 if (IsEOF()) { 70 if (MatchFailed()) { 71 return false; 72 } 73 } else { 74 AdvanceCurrentPtr(); 75 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 76 } 77 } 78 return true; 79 } 80 HandleOpAll(uint8_t opCode)81 inline bool HandleOpAll(uint8_t opCode) 82 { 83 if (IsEOF()) { 84 return !MatchFailed(); 85 } 86 uint32_t currentChar = GetCurrentChar(); 87 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { 88 return !MatchFailed(); 89 } 90 Advance(opCode); 91 return true; 92 } 93 HandleOpChar(const DynChunk & byteCode,uint8_t opCode)94 inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) 95 { 96 uint32_t expectedChar = 0; 97 if (opCode == RegExpOpCode::OP_CHAR32) { 98 expectedChar = byteCode.GetU32(GetCurrentPC() + 1); 99 } else { 100 expectedChar = byteCode.GetU16(GetCurrentPC() + 1); 101 } 102 if (IsEOF()) { 103 return !MatchFailed(); 104 } 105 uint32_t currentChar = GetCurrentChar(); 106 if (IsIgnoreCase()) { 107 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 108 } 109 if (currentChar == expectedChar) { 110 Advance(opCode); 111 } else { 112 if (MatchFailed()) { 113 return false; 114 } 115 } 116 return true; 117 } 118 HandleOpWordBoundary(uint8_t opCode)119 inline bool HandleOpWordBoundary(uint8_t opCode) 120 { 121 if (IsEOF()) { 122 if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) { 123 Advance(opCode); 124 } else { 125 if (MatchFailed()) { 126 return false; 127 } 128 } 129 return true; 130 } 131 bool preIsWord = false; 132 if (GetCurrentPtr() != input_) { 133 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 134 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); 135 } 136 bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_)); 137 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && 138 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || 139 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && 140 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { 141 Advance(opCode); 142 } else { 143 if (MatchFailed()) { 144 return false; 145 } 146 } 147 return true; 148 } 149 HandleOpLineStart(uint8_t opCode)150 inline bool HandleOpLineStart(uint8_t opCode) 151 { 152 if (IsEOF()) { 153 return !MatchFailed(); 154 } 155 if ((GetCurrentPtr() == input_) || 156 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 157 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { 158 Advance(opCode); 159 } else { 160 if (MatchFailed()) { 161 return false; 162 } 163 } 164 return true; 165 } 166 HandleOpLineEnd(uint8_t opCode)167 inline bool HandleOpLineEnd(uint8_t opCode) 168 { 169 if (IsEOF() || 170 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 171 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) { 172 Advance(opCode); 173 } else { 174 if (MatchFailed()) { 175 return false; 176 } 177 } 178 return true; 179 } 180 HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)181 inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) 182 { 183 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 184 ASSERT(captureIndex < nCapture_); 185 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 186 CaptureState *captureState = &captureResultList_[captureIndex]; 187 captureState->captureStart = GetCurrentPtr(); 188 Advance(opCode); 189 } 190 HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)191 inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) 192 { 193 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 194 ASSERT(captureIndex < nCapture_); 195 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 196 CaptureState *captureState = &captureResultList_[captureIndex]; 197 captureState->captureEnd = GetCurrentPtr(); 198 Advance(opCode); 199 } 200 HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)201 inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) 202 { 203 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); 204 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); 205 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { 206 CaptureState *captureState = 207 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 208 captureState->captureStart = nullptr; 209 captureState->captureEnd = nullptr; 210 } 211 Advance(opCode); 212 } 213 HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)214 inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) 215 { 216 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT); 217 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); 218 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 219 Advance(opCode); 220 uint32_t splitPc = GetCurrentPC() + offset; 221 PushRegExpState(type, splitPc); 222 } 223 HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)224 inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) 225 { 226 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 227 Advance(opCode); 228 PushRegExpState(STATE_SPLIT, GetCurrentPC()); 229 AdvanceOffset(offset); 230 } 231 HandleOpPrev(uint8_t opCode)232 inline bool HandleOpPrev(uint8_t opCode) 233 { 234 if (GetCurrentPtr() == input_) { 235 if (MatchFailed()) { 236 return false; 237 } 238 } else { 239 PrevPtr(¤tPtr_, input_); 240 Advance(opCode); 241 } 242 return true; 243 } 244 HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)245 inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) 246 { 247 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); 248 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); 249 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); 250 Advance(opCode); 251 uint32_t loopPcEnd = GetCurrentPC(); 252 uint32_t loopPcStart = GetCurrentPC() + pcOffset; 253 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; 254 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; 255 256 uint32_t loopCount = PeekStack(); 257 SetStackValue(++loopCount); 258 if (loopCount < loopMax) { 259 // greedy failed, goto next 260 if (loopCount >= quantifyMin) { 261 PushRegExpState(STATE_SPLIT, loopPcEnd); 262 } 263 // Goto loop start 264 SetCurrentPC(loopPcStart); 265 } else { 266 if (!isGreedy && (loopCount < quantifyMax)) { 267 PushRegExpState(STATE_SPLIT, loopPcStart); 268 } 269 } 270 } 271 HandleOpRange32(const DynChunk & byteCode)272 inline bool HandleOpRange32(const DynChunk &byteCode) 273 { 274 if (IsEOF()) { 275 return !MatchFailed(); 276 } 277 uint32_t currentChar = GetCurrentChar(); 278 if (IsIgnoreCase()) { 279 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 280 } 281 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 282 bool isFound = false; 283 int32_t idxMin = 0; 284 int32_t idxMax = static_cast<int32_t>(rangeCount) - 1; 285 int32_t idx = 0; 286 uint32_t low = 0; 287 uint32_t high = 288 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + 289 RANGE32_MAX_HALF_OFFSET); 290 if (currentChar <= high) { 291 while (idxMin <= idxMax) { 292 idx = (idxMin + idxMax) / RANGE32_OFFSET; 293 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 294 RANGE32_MAX_OFFSET); 295 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 296 RANGE32_MAX_OFFSET + 297 RANGE32_MAX_HALF_OFFSET); 298 if (currentChar < low) { 299 idxMax = idx - 1; 300 } else if (currentChar > high) { 301 idxMin = idx + 1; 302 } else { 303 isFound = true; 304 break; 305 } 306 } 307 } 308 if (isFound) { 309 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); 310 } else { 311 if (MatchFailed()) { 312 return false; 313 } 314 } 315 return true; 316 } 317 HandleOpRange(const DynChunk & byteCode)318 inline bool HandleOpRange(const DynChunk &byteCode) 319 { 320 if (IsEOF()) { 321 return !MatchFailed(); 322 } 323 uint32_t currentChar = GetCurrentChar(); 324 if (IsIgnoreCase()) { 325 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 326 } 327 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 328 bool isFound = false; 329 int32_t idxMin = 0; 330 int32_t idxMax = static_cast<int32_t>(rangeCount - 1); 331 int32_t idx = 0; 332 uint32_t low = 0; 333 uint32_t high = 334 byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 335 if (currentChar <= high) { 336 while (idxMin <= idxMax) { 337 idx = (idxMin + idxMax) / RANGE32_OFFSET; 338 low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 339 RANGE32_MAX_HALF_OFFSET); 340 high = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 341 RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 342 if (currentChar < low) { 343 idxMax = idx - 1; 344 } else if (currentChar > high) { 345 idxMin = idx + 1; 346 } else { 347 isFound = true; 348 break; 349 } 350 } 351 } 352 if (isFound) { 353 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); 354 } else { 355 if (MatchFailed()) { 356 return false; 357 } 358 } 359 return true; 360 } 361 HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)362 inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) 363 { 364 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 365 if (captureIndex >= nCapture_) { 366 return !MatchFailed(); 367 } 368 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 369 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; 370 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 371 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; 372 if (captureStart == nullptr || captureEnd == nullptr) { 373 Advance(opCode); 374 return true; 375 } 376 bool isMatched = true; 377 if (opCode == RegExpOpCode::OP_BACKREFERENCE) { 378 const uint8_t *refCptr = captureStart; 379 while (refCptr < captureEnd) { 380 if (IsEOF()) { 381 isMatched = false; 382 break; 383 } 384 // NOLINTNEXTLINE(readability-identifier-naming) 385 uint32_t c1 = GetChar(&refCptr, captureEnd); 386 // NOLINTNEXTLINE(readability-identifier-naming) 387 uint32_t c2 = GetChar(¤tPtr_, inputEnd_); 388 if (IsIgnoreCase()) { 389 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 390 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 391 } 392 if (c1 != c2) { 393 isMatched = false; 394 break; 395 } 396 } 397 if (!isMatched) { 398 if (MatchFailed()) { 399 return false; 400 } 401 } else { 402 Advance(opCode); 403 } 404 } else { 405 const uint8_t *refCptr = captureEnd; 406 while (refCptr > captureStart) { 407 if (GetCurrentPtr() == input_) { 408 isMatched = false; 409 break; 410 } 411 // NOLINTNEXTLINE(readability-identifier-naming) 412 uint32_t c1 = GetPrevChar(&refCptr, captureStart); 413 // NOLINTNEXTLINE(readability-identifier-naming) 414 uint32_t c2 = GetPrevChar(¤tPtr_, input_); 415 if (IsIgnoreCase()) { 416 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 417 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 418 } 419 if (c1 != c2) { 420 isMatched = false; 421 break; 422 } 423 } 424 if (!isMatched) { 425 if (MatchFailed()) { 426 return false; 427 } 428 } else { 429 Advance(opCode); 430 } 431 } 432 return true; 433 } 434 435 inline void Advance(uint8_t opCode, uint32_t offset = 0) 436 { 437 currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); 438 } 439 AdvanceOffset(uint32_t offset)440 inline void AdvanceOffset(uint32_t offset) 441 { 442 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 443 currentPc_ += offset; 444 } 445 GetCurrentChar()446 inline uint32_t GetCurrentChar() 447 { 448 return GetChar(¤tPtr_, inputEnd_); 449 } 450 AdvanceCurrentPtr()451 inline void AdvanceCurrentPtr() 452 { 453 AdvancePtr(¤tPtr_, inputEnd_); 454 } 455 GetChar(const uint8_t ** pp,const uint8_t * end)456 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 457 { 458 uint32_t c = 0; 459 const uint8_t *cptr = *pp; 460 if (!isWideChar_) { 461 c = *cptr; 462 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 463 } else { 464 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 465 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 466 c = c1; 467 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 468 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 469 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 470 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 471 if (U16_IS_TRAIL(c1)) { 472 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 473 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 474 } 475 } 476 *pp = cptr; 477 } 478 return c; 479 } 480 PeekChar(const uint8_t * p,const uint8_t * end)481 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 482 { 483 uint32_t c = 0; 484 const uint8_t *cptr = p; 485 if (!isWideChar_) { 486 c = *cptr; 487 } else { 488 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 489 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 490 c = c1; 491 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 492 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 493 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 494 if (U16_IS_TRAIL(c1)) { 495 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 496 } 497 } 498 } 499 return c; 500 } 501 AdvancePtr(const uint8_t ** pp,const uint8_t * end)502 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 503 { 504 const uint8_t *cptr = *pp; 505 if (!isWideChar_) { 506 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 507 } else { 508 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 509 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 510 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 511 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 512 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 513 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 514 if (U16_IS_TRAIL(c1)) { 515 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 516 } 517 } 518 *pp = cptr; 519 } 520 } 521 PeekPrevChar(const uint8_t * p,const uint8_t * start)522 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 523 { 524 uint32_t c = 0; 525 const uint8_t *cptr = p; 526 if (!isWideChar_) { 527 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 528 } else { 529 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 530 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 531 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 532 c = c1; 533 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 534 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 535 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 536 if (U16_IS_LEAD(c1)) { 537 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 538 } 539 } 540 } 541 return c; 542 } 543 GetPrevChar(const uint8_t ** pp,const uint8_t * start)544 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 545 { 546 uint32_t c = 0; 547 const uint8_t *cptr = *pp; 548 if (!isWideChar_) { 549 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 550 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 551 *pp = cptr; 552 } else { 553 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 554 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 555 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 556 c = c1; 557 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 558 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 559 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 560 if (U16_IS_LEAD(c1)) { 561 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 562 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 563 } 564 } 565 *pp = cptr; 566 } 567 return c; 568 } 569 PrevPtr(const uint8_t ** pp,const uint8_t * start)570 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 571 { 572 const uint8_t *cptr = *pp; 573 if (!isWideChar_) { 574 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 575 *pp = cptr; 576 } else { 577 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 578 uint16_t c1 = *(const uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 579 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 580 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 581 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 582 if (U16_IS_LEAD(c1)) { 583 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 584 } 585 } 586 *pp = cptr; 587 } 588 } 589 590 bool MatchFailed(bool isMatched = false); 591 SetCurrentPC(uint32_t pc)592 void SetCurrentPC(uint32_t pc) 593 { 594 currentPc_ = pc; 595 } 596 SetCurrentPtr(const uint8_t * ptr)597 void SetCurrentPtr(const uint8_t *ptr) 598 { 599 currentPtr_ = ptr; 600 } 601 IsEOF()602 bool IsEOF() const 603 { 604 return currentPtr_ >= inputEnd_; 605 } 606 GetCurrentPC()607 uint32_t GetCurrentPC() const 608 { 609 return currentPc_; 610 } 611 PushStack(uintptr_t val)612 void PushStack(uintptr_t val) 613 { 614 ASSERT(currentStack_ < nStack_); 615 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 616 stack_[currentStack_++] = val; 617 } 618 SetStackValue(uintptr_t val)619 void SetStackValue(uintptr_t val) const 620 { 621 ASSERT(currentStack_ >= 1); 622 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 623 stack_[currentStack_ - 1] = val; 624 } 625 PopStack()626 uintptr_t PopStack() 627 { 628 ASSERT(currentStack_ >= 1); 629 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 630 return stack_[--currentStack_]; 631 } 632 PeekStack()633 uintptr_t PeekStack() const 634 { 635 ASSERT(currentStack_ >= 1); 636 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 637 return stack_[currentStack_ - 1]; 638 } 639 GetCurrentPtr()640 const uint8_t *GetCurrentPtr() const 641 { 642 return currentPtr_; 643 } 644 GetCaptureResultList()645 CaptureState *GetCaptureResultList() const 646 { 647 return captureResultList_; 648 } 649 650 void DumpResult(std::ostream &out) const; 651 652 MatchResult GetResult(const JSThread *thread, bool isSuccess) const; 653 654 void PushRegExpState(StateType type, uint32_t pc); 655 656 RegExpState *PopRegExpState(bool copyCaptrue = true); 657 DropRegExpState()658 void DropRegExpState() 659 { 660 stateStackLen_--; 661 } 662 PeekRegExpState()663 RegExpState *PeekRegExpState() const 664 { 665 ASSERT(stateStackLen_ >= 1); 666 return reinterpret_cast<RegExpState *>( 667 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 668 (stateStackLen_ - 1) * stateSize_); 669 } 670 671 void ReAllocStack(uint32_t stackLen); 672 IsWordChar(uint8_t value)673 inline bool IsWordChar(uint8_t value) const 674 { 675 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 676 (value == '_')); 677 } 678 IsTerminator(uint32_t value)679 inline bool IsTerminator(uint32_t value) const 680 { 681 // NOLINTNEXTLINE(readability-magic-numbers) 682 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 683 } 684 IsIgnoreCase()685 inline bool IsIgnoreCase() const 686 { 687 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 688 } 689 IsUtf16()690 inline bool IsUtf16() const 691 { 692 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 693 } 694 695 private: 696 static constexpr size_t CHAR_SIZE = 1; 697 static constexpr size_t WIDE_CHAR_SIZE = 2; 698 static constexpr size_t SAVE_RESET_START = 1; 699 static constexpr size_t SAVE_RESET_END = 2; 700 static constexpr size_t LOOP_MIN_OFFSET = 5; 701 static constexpr size_t LOOP_MAX_OFFSET = 9; 702 static constexpr size_t LOOP_PC_OFFSET = 1; 703 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 704 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 705 static constexpr size_t RANGE32_MAX_OFFSET = 8; 706 static constexpr size_t RANGE32_OFFSET = 2; 707 static constexpr uint32_t STACK_MULTIPLIER = 2; 708 static constexpr uint32_t MIN_STACK_SIZE = 8; 709 uint8_t *input_ = nullptr; 710 uint8_t *inputEnd_ = nullptr; 711 bool isWideChar_ = false; 712 713 uint32_t currentPc_ = 0; 714 const uint8_t *currentPtr_ = nullptr; 715 CaptureState *captureResultList_ = nullptr; 716 uintptr_t *stack_ = nullptr; 717 uint32_t currentStack_ = 0; 718 719 uint32_t nCapture_ = 0; 720 uint32_t nStack_ = 0; 721 722 uint32_t flags_ = 0; 723 uint32_t stateStackLen_ = 0; 724 uint32_t stateStackSize_ = 0; 725 uint32_t stateSize_ = 0; 726 uint8_t *stateStack_ = nullptr; 727 Chunk *chunk_ = nullptr; 728 }; 729 } // namespace panda::ecmascript 730 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 731