1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/builtins/builtins_regexp.h" 20 #include "ecmascript/regexp/regexp_parser.h" 21 #include "ecmascript/mem/regexp_cached_chunk.h" 22 #include "ecmascript/js_handle.h" 23 24 namespace panda::ecmascript { 25 class RegExpExecutor { 26 public: 27 struct CaptureState { 28 const uint8_t *captureStart; 29 const uint8_t *captureEnd; 30 }; 31 32 enum StateType : uint8_t { 33 STATE_SPLIT = 0, /* Do not re-order. */ 34 STATE_NEGATIVE_MATCH_AHEAD, /* OP_NEGATIVE_MATCH_AHEAD - OP_SPLIT_NEXT */ 35 STATE_MATCH_AHEAD, /* OP_MATCH_AHEAD - OP_SPLIT_NEXT */ 36 STATE_SAVE, 37 STATE_PUSH, 38 STATE_POP, 39 STATE_SET, 40 STATE_INVALID, 41 }; 42 43 struct RegExpState { 44 StateType type_ = STATE_SPLIT; 45 uint32_t currentPc_ = 0; 46 const uint8_t *currentPtr_ = nullptr; 47 }; 48 RegExpExecutor(RegExpCachedChunk * chunk)49 explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk) 50 { 51 ASSERT(chunk_ != nullptr); 52 }; 53 54 ~RegExpExecutor() = default; 55 56 NO_COPY_SEMANTIC(RegExpExecutor); 57 NO_MOVE_SEMANTIC(RegExpExecutor); 58 59 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 60 61 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); HandleFirstSplit()62 inline bool HandleFirstSplit() 63 { 64 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 && 65 (flags_ & RegExpParser::FLAG_STICKY) == 0) { 66 if (IsEOF()) { 67 if (MatchFailed()) { 68 return false; 69 } 70 } else if (prefilter_ && !isWideChar_) { 71 ++currentPtr_; 72 currentPtr_ = (const uint8_t *)memchr(currentPtr_, prefilter_, inputEnd_ - currentPtr_); 73 if (currentPtr_ == nullptr) { 74 currentPtr_ = inputEnd_; 75 } 76 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 77 } else { 78 AdvanceCurrentPtr(); 79 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 80 } 81 } 82 return true; 83 } 84 HandleOpAll(uint8_t opCode)85 inline bool HandleOpAll(uint8_t opCode) 86 { 87 if (IsEOF()) { 88 return !MatchFailed(); 89 } 90 uint32_t currentChar = GetCurrentChar(); 91 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) { 92 return !MatchFailed(); 93 } 94 Advance(opCode); 95 return true; 96 } 97 HandleOpChar(const DynChunk & byteCode,uint8_t opCode)98 inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode) 99 { 100 uint32_t expectedChar = 0; 101 if (opCode == RegExpOpCode::OP_CHAR32) { 102 expectedChar = byteCode.GetU32(GetCurrentPC() + 1); 103 } else { 104 expectedChar = byteCode.GetU16(GetCurrentPC() + 1); 105 } 106 if (IsEOF()) { 107 return !MatchFailed(); 108 } 109 uint32_t currentChar = GetCurrentChar(); 110 if (IsIgnoreCase()) { 111 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 112 } 113 if (currentChar == expectedChar) { 114 Advance(opCode); 115 } else { 116 if (MatchFailed()) { 117 return false; 118 } 119 } 120 return true; 121 } 122 HandleOpWordBoundary(uint8_t opCode)123 inline bool HandleOpWordBoundary(uint8_t opCode) 124 { 125 bool preIsWord = false; 126 if (GetCurrentPtr() != input_) { 127 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 128 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_)); 129 } 130 bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_)); 131 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) && 132 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) || 133 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) && 134 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) { 135 Advance(opCode); 136 } else { 137 if (MatchFailed()) { 138 return false; 139 } 140 } 141 return true; 142 } 143 HandleOpLineStart(uint8_t opCode)144 inline bool HandleOpLineStart(uint8_t opCode) 145 { 146 if ((GetCurrentPtr() == input_) || 147 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 148 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) { 149 Advance(opCode); 150 } else { 151 if (MatchFailed()) { 152 return false; 153 } 154 } 155 return true; 156 } 157 HandleOpLineEnd(uint8_t opCode)158 inline bool HandleOpLineEnd(uint8_t opCode) 159 { 160 if (IsEOF() || 161 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 162 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 163 && (PeekChar(currentPtr_, inputEnd_) == '\n' || PeekChar(currentPtr_, inputEnd_) == '\r'))) { 164 Advance(opCode); 165 } else { 166 if (MatchFailed()) { 167 return false; 168 } 169 } 170 return true; 171 } 172 HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)173 inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode) 174 { 175 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 176 ASSERT(captureIndex < nCapture_); 177 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 178 CaptureState *captureState = &captureResultList_[captureIndex]; 179 // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 180 PushRegExpState(STATE_SAVE, captureIndex * 2, reinterpret_cast<uintptr_t>(captureState->captureStart)); 181 captureState->captureStart = GetCurrentPtr(); 182 Advance(opCode); 183 } 184 HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)185 inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode) 186 { 187 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 188 ASSERT(captureIndex < nCapture_); 189 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 190 CaptureState *captureState = &captureResultList_[captureIndex]; 191 // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 192 PushRegExpState(STATE_SAVE, captureIndex * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd)); 193 captureState->captureEnd = GetCurrentPtr(); 194 Advance(opCode); 195 } 196 HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)197 inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode) 198 { 199 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START); 200 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END); 201 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) { 202 CaptureState *captureState = 203 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 204 // 2: Even indexes store captureStart. 205 // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 206 PushRegExpState(STATE_SAVE, i * 2, reinterpret_cast<uintptr_t>(captureState->captureStart)); 207 // 2: Even indexes store captureStart. 208 // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ... 209 PushRegExpState(STATE_SAVE, i * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd)); 210 captureState->captureStart = nullptr; 211 captureState->captureEnd = nullptr; 212 } 213 Advance(opCode); 214 } 215 HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)216 inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode) 217 { 218 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT); 219 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD); 220 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 221 Advance(opCode); 222 uint32_t splitPc = GetCurrentPC() + offset; 223 PushRegExpState(type, splitPc); 224 } 225 HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)226 inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode) 227 { 228 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 229 Advance(opCode); 230 PushRegExpState(STATE_SPLIT, GetCurrentPC()); 231 AdvanceOffset(offset); 232 } 233 HandleOpPrev(uint8_t opCode)234 inline bool HandleOpPrev(uint8_t opCode) 235 { 236 if (GetCurrentPtr() == input_) { 237 if (MatchFailed()) { 238 return false; 239 } 240 } else { 241 PrevPtr(¤tPtr_, input_); 242 Advance(opCode); 243 } 244 return true; 245 } 246 HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)247 inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode) 248 { 249 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET); 250 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET); 251 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET); 252 Advance(opCode); 253 uint32_t loopPcEnd = GetCurrentPC(); 254 uint32_t loopPcStart = GetCurrentPC() + pcOffset; 255 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY; 256 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin; 257 258 uint32_t loopCount = PeekStack(); 259 PushRegExpState(StateType::STATE_SET, 0, loopCount); 260 SetStackValue(++loopCount); 261 if (loopCount < loopMax) { 262 // greedy failed, goto next 263 if (loopCount >= quantifyMin) { 264 PushRegExpState(STATE_SPLIT, loopPcEnd); 265 } 266 // Goto loop start 267 SetCurrentPC(loopPcStart); 268 } else { 269 if (!isGreedy && (loopCount < quantifyMax)) { 270 PushRegExpState(STATE_SPLIT, loopPcStart); 271 } 272 } 273 } 274 HandleOpRange32(const DynChunk & byteCode)275 inline bool HandleOpRange32(const DynChunk &byteCode) 276 { 277 if (IsEOF()) { 278 return !MatchFailed(); 279 } 280 uint32_t currentChar = GetCurrentChar(); 281 if (IsIgnoreCase()) { 282 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 283 } 284 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 285 bool isFound = false; 286 int32_t idxMin = 0; 287 int32_t idxMax = static_cast<int32_t>(rangeCount) - 1; 288 int32_t idx = 0; 289 uint32_t low = 0; 290 uint32_t high = 291 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + 292 RANGE32_MAX_HALF_OFFSET); 293 if (currentChar <= high) { 294 while (idxMin <= idxMax) { 295 idx = (idxMin + idxMax) / RANGE32_OFFSET; 296 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 297 RANGE32_MAX_OFFSET); 298 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 299 RANGE32_MAX_OFFSET + 300 RANGE32_MAX_HALF_OFFSET); 301 if (currentChar < low) { 302 idxMax = idx - 1; 303 } else if (currentChar > high) { 304 idxMin = idx + 1; 305 } else { 306 isFound = true; 307 break; 308 } 309 } 310 } 311 if (isFound) { 312 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET); 313 } else { 314 if (MatchFailed()) { 315 return false; 316 } 317 } 318 return true; 319 } 320 HandleOpRange(const DynChunk & byteCode)321 inline bool HandleOpRange(const DynChunk &byteCode) 322 { 323 if (IsEOF()) { 324 return !MatchFailed(); 325 } 326 uint32_t currentChar = GetCurrentChar(); 327 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1); 328 bool flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount); 329 if (IsIgnoreCase() && !flag) { 330 currentChar = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar)); 331 flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount); 332 } 333 if (flag) { 334 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET); 335 } else { 336 if (MatchFailed()) { 337 return false; 338 } 339 } 340 return true; 341 } 342 HandleOpSparse(const DynChunk & byteCode)343 inline bool HandleOpSparse(const DynChunk &byteCode) 344 { 345 if (IsEOF()) { 346 return !MatchFailed(); 347 } 348 uint32_t currentChar = GetCurrentChar(); 349 if (IsIgnoreCase()) { 350 currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16())); 351 } 352 uint16_t sparseCount = byteCode.GetU16(GetCurrentPC() + 1); 353 for (uint32_t i = 0; i < sparseCount; i++) { 354 uint32_t sparseChar = byteCode.GetU16(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET); 355 if (currentChar == sparseChar) { 356 uint32_t offset = byteCode.GetU32(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET + 357 SPARSE_OFF_OFFSET); 358 AdvanceOffset(offset + sparseCount * SPARSE_MAX_OFFSET + SPARSE_HEAD_OFFSET); 359 return true; 360 } 361 } 362 return !MatchFailed(); 363 } 364 HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)365 inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode) 366 { 367 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1); 368 if (captureIndex >= nCapture_) { 369 return !MatchFailed(); 370 } 371 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 372 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart; 373 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 374 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd; 375 if (captureStart == nullptr || captureEnd == nullptr) { 376 Advance(opCode); 377 return true; 378 } 379 bool isMatched = true; 380 if (opCode == RegExpOpCode::OP_BACKREFERENCE) { 381 const uint8_t *refCptr = captureStart; 382 while (refCptr < captureEnd) { 383 if (IsEOF()) { 384 isMatched = false; 385 break; 386 } 387 // NOLINTNEXTLINE(readability-identifier-naming) 388 uint32_t c1 = GetChar(&refCptr, captureEnd); 389 // NOLINTNEXTLINE(readability-identifier-naming) 390 uint32_t c2 = GetChar(¤tPtr_, inputEnd_); 391 if (IsIgnoreCase()) { 392 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 393 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 394 } 395 if (c1 != c2) { 396 isMatched = false; 397 break; 398 } 399 } 400 if (!isMatched) { 401 if (MatchFailed()) { 402 return false; 403 } 404 } else { 405 Advance(opCode); 406 } 407 } else { 408 const uint8_t *refCptr = captureEnd; 409 while (refCptr > captureStart) { 410 if (GetCurrentPtr() == input_) { 411 isMatched = false; 412 break; 413 } 414 // NOLINTNEXTLINE(readability-identifier-naming) 415 uint32_t c1 = GetPrevChar(&refCptr, captureStart); 416 // NOLINTNEXTLINE(readability-identifier-naming) 417 uint32_t c2 = GetPrevChar(¤tPtr_, input_); 418 if (IsIgnoreCase()) { 419 c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16())); 420 c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16())); 421 } 422 if (c1 != c2) { 423 isMatched = false; 424 break; 425 } 426 } 427 if (!isMatched) { 428 if (MatchFailed()) { 429 return false; 430 } 431 } else { 432 Advance(opCode); 433 } 434 } 435 return true; 436 } 437 438 inline void Advance(uint8_t opCode, uint32_t offset = 0) 439 { 440 currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize()); 441 } 442 AdvanceOffset(uint32_t offset)443 inline void AdvanceOffset(uint32_t offset) 444 { 445 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 446 currentPc_ += offset; 447 } 448 GetCurrentChar()449 inline uint32_t GetCurrentChar() 450 { 451 return GetChar(¤tPtr_, inputEnd_); 452 } 453 AdvanceCurrentPtr()454 inline void AdvanceCurrentPtr() 455 { 456 AdvancePtr(¤tPtr_, inputEnd_); 457 } 458 GetChar(const uint8_t ** pp,const uint8_t * end)459 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 460 { 461 uint32_t c = 0; 462 const uint8_t *cptr = *pp; 463 if (!isWideChar_) { 464 c = *cptr; 465 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 466 } else { 467 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 468 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 469 c = c1; 470 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 471 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 472 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 473 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 474 if (U16_IS_TRAIL(c1)) { 475 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 476 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 477 } 478 } 479 *pp = cptr; 480 } 481 return c; 482 } 483 PeekChar(const uint8_t * p,const uint8_t * end)484 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 485 { 486 uint32_t c = 0; 487 const uint8_t *cptr = p; 488 if (!isWideChar_) { 489 c = *cptr; 490 } else { 491 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 492 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 493 c = c1; 494 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 495 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 496 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 497 if (U16_IS_TRAIL(c1)) { 498 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 499 } 500 } 501 } 502 return c; 503 } 504 AdvancePtr(const uint8_t ** pp,const uint8_t * end)505 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 506 { 507 const uint8_t *cptr = *pp; 508 if (!isWideChar_) { 509 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 510 } else { 511 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 512 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 513 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 514 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 515 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 516 c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 517 if (U16_IS_TRAIL(c1)) { 518 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 519 } 520 } 521 *pp = cptr; 522 } 523 } 524 PeekPrevChar(const uint8_t * p,const uint8_t * start)525 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 526 { 527 uint32_t c = 0; 528 const uint8_t *cptr = p; 529 if (!isWideChar_) { 530 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 531 } else { 532 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 533 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 534 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 535 c = c1; 536 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 537 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 538 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 539 if (U16_IS_LEAD(c1)) { 540 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 541 } 542 } 543 } 544 return c; 545 } 546 GetPrevChar(const uint8_t ** pp,const uint8_t * start)547 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 548 { 549 uint32_t c = 0; 550 const uint8_t *cptr = *pp; 551 if (!isWideChar_) { 552 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 553 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 554 *pp = cptr; 555 } else { 556 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 557 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 558 uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr)); 559 c = c1; 560 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 561 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 562 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 563 if (U16_IS_LEAD(c1)) { 564 c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c)); // NOLINTNEXTLINE(hicpp-signed-bitwise) 565 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 566 } 567 } 568 *pp = cptr; 569 } 570 return c; 571 } 572 PrevPtr(const uint8_t ** pp,const uint8_t * start)573 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 574 { 575 const uint8_t *cptr = *pp; 576 if (!isWideChar_) { 577 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 578 *pp = cptr; 579 } else { 580 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 581 uint16_t c1 = *(const uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 582 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 583 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 584 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1]; 585 if (U16_IS_LEAD(c1)) { 586 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 587 } 588 } 589 *pp = cptr; 590 } 591 } 592 593 bool MatchFailed(bool isMatched = false); 594 SetCurrentPC(uint32_t pc)595 void SetCurrentPC(uint32_t pc) 596 { 597 currentPc_ = pc; 598 } 599 SetCurrentPtr(const uint8_t * ptr)600 void SetCurrentPtr(const uint8_t *ptr) 601 { 602 currentPtr_ = ptr; 603 } 604 IsEOF()605 bool IsEOF() const 606 { 607 return currentPtr_ >= inputEnd_; 608 } 609 IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)610 bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar, 611 const DynChunk &byteCode, const uint16_t rangeCount) 612 { 613 bool isFound = false; 614 int32_t idxMin = 0; 615 int32_t idxMax = static_cast<int32_t>(rangeCount - 1); 616 int32_t idx = 0; 617 uint32_t low = 0; 618 uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + 619 static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 620 if (nowChar <= high) { 621 while (idxMin <= idxMax) { 622 idx = (idxMin + idxMax) / RANGE32_OFFSET; 623 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 624 RANGE32_MAX_HALF_OFFSET); 625 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) * 626 RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET); 627 if (nowChar < low) { 628 idxMax = idx - 1; 629 } else if (nowChar > high) { 630 idxMin = idx + 1; 631 } else { 632 isFound = true; 633 break; 634 } 635 } 636 } 637 return isFound; 638 } 639 GetCurrentPC()640 uint32_t GetCurrentPC() const 641 { 642 return currentPc_; 643 } 644 PushStack(uintptr_t val)645 void PushStack(uintptr_t val) 646 { 647 ASSERT(currentStack_ < nStack_); 648 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 649 stack_[currentStack_++] = val; 650 } 651 SetStackValue(uintptr_t val)652 void SetStackValue(uintptr_t val) const 653 { 654 ASSERT(currentStack_ >= 1); 655 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 656 stack_[currentStack_ - 1] = val; 657 } 658 PopStack()659 uintptr_t PopStack() 660 { 661 ASSERT(currentStack_ >= 1); 662 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 663 return stack_[--currentStack_]; 664 } 665 PeekStack()666 uintptr_t PeekStack() const 667 { 668 ASSERT(currentStack_ >= 1); 669 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 670 return stack_[currentStack_ - 1]; 671 } 672 GetCurrentPtr()673 const uint8_t *GetCurrentPtr() const 674 { 675 return currentPtr_; 676 } 677 GetCaptureResultList()678 CaptureState *GetCaptureResultList() const 679 { 680 return captureResultList_; 681 } 682 683 void DumpResult(std::ostream &out) const; 684 685 void GetResult(JSThread *thread); 686 687 void PushRegExpState(StateType type, uint32_t pc); 688 void PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr); 689 690 StateType PopRegExpState(bool copyCapture = true); 691 DropRegExpState()692 void DropRegExpState() 693 { 694 stateStackLen_--; 695 } 696 PeekRegExpState()697 RegExpState *PeekRegExpState() const 698 { 699 ASSERT(stateStackLen_ >= 1); 700 return reinterpret_cast<RegExpState *>( 701 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 702 (stateStackLen_ - 1) * sizeof(RegExpState)); 703 } 704 705 void ReAllocStack(uint32_t stackLen); 706 IsWordChar(uint8_t value)707 inline bool IsWordChar(uint8_t value) const 708 { 709 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 710 (value == '_')); 711 } 712 IsTerminator(uint32_t value)713 inline bool IsTerminator(uint32_t value) const 714 { 715 // NOLINTNEXTLINE(readability-magic-numbers) 716 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 717 } 718 IsIgnoreCase()719 inline bool IsIgnoreCase() const 720 { 721 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 722 } 723 IsUtf16()724 inline bool IsUtf16() const 725 { 726 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 727 } 728 729 private: 730 static constexpr size_t CHAR_SIZE = 1; 731 static constexpr size_t WIDE_CHAR_SIZE = 2; 732 static constexpr size_t SAVE_RESET_START = 1; 733 static constexpr size_t SAVE_RESET_END = 2; 734 static constexpr size_t LOOP_MIN_OFFSET = 5; 735 static constexpr size_t LOOP_MAX_OFFSET = 9; 736 static constexpr size_t LOOP_PC_OFFSET = 1; 737 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 738 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 739 static constexpr size_t RANGE32_MAX_OFFSET = 8; 740 static constexpr size_t RANGE32_OFFSET = 2; 741 static constexpr size_t SPARSE_HEAD_OFFSET = 3; 742 static constexpr size_t SPARSE_OFF_OFFSET = 2; 743 static constexpr size_t SPARSE_MAX_OFFSET = 6; 744 static constexpr uint32_t STACK_MULTIPLIER = 2; 745 static constexpr uint32_t MIN_STACK_SIZE = 8; 746 static constexpr int TMP_BUF_SIZE = 128; 747 uint8_t *input_ = nullptr; 748 uint8_t *inputEnd_ = nullptr; 749 bool isWideChar_ = false; 750 uint16_t prefilter_ = 0; 751 752 uint32_t currentPc_ = 0; 753 const uint8_t *currentPtr_ = nullptr; 754 CaptureState *captureResultList_ = nullptr; 755 uintptr_t *stack_ = nullptr; 756 uint32_t currentStack_ = 0; 757 758 uint32_t nCapture_ = 0; 759 uint32_t nStack_ = 0; 760 761 uint32_t flags_ = 0; 762 uint32_t stateStackLen_ = 0; 763 uint32_t stateStackSize_ = 0; 764 uint8_t *stateStack_ = nullptr; 765 RegExpCachedChunk *chunk_ = nullptr; 766 }; 767 } // namespace panda::ecmascript 768 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 769