1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 18 19 #include "ecmascript/regexp/regexp_parser.h" 20 #include "ecmascript/mem/chunk.h" 21 22 namespace panda::ecmascript { 23 class RegExpExecutor { 24 public: 25 struct CaptureState { 26 const uint8_t *captureStart; 27 const uint8_t *captureEnd; 28 }; 29 30 enum StateType : uint8_t { 31 STATE_SPLIT = 0, 32 STATE_MATCH_AHEAD, 33 STATE_NEGATIVE_MATCH_AHEAD, 34 }; 35 36 struct RegExpState { 37 StateType type_ = STATE_SPLIT; 38 uint32_t currentPc_ = 0; 39 uint32_t currentStack_ = 0; 40 const uint8_t *currentPtr_ = nullptr; 41 __extension__ CaptureState *captureResultList_[0]; // NOLINT(modernize-avoid-c-arrays) 42 }; 43 44 struct MatchResult { 45 uint32_t endIndex_ = 0; 46 uint32_t index_ = 0; 47 // first value is true if result is undefined 48 std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_; 49 bool isSuccess_ = false; 50 }; 51 RegExpExecutor(Chunk * chunk)52 explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk) 53 { 54 ASSERT(chunk_ != nullptr); 55 }; 56 57 ~RegExpExecutor() = default; 58 59 NO_COPY_SEMANTIC(RegExpExecutor); 60 NO_MOVE_SEMANTIC(RegExpExecutor); 61 62 bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false); 63 64 bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd); 65 bool HandleFirstSplit(); 66 bool HandleOpAll(uint8_t opCode); 67 bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode); 68 bool HandleOpWordBoundary(uint8_t opCode); 69 bool HandleOpLineStart(uint8_t opCode); 70 bool HandleOpLineEnd(uint8_t opCode); 71 void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode); 72 void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode); 73 void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode); 74 void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode); 75 void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode); 76 bool HandleOpPrev(uint8_t opCode); 77 void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode); 78 bool HandleOpRange32(const DynChunk &byteCode); 79 bool HandleOpRange(const DynChunk &byteCode); 80 bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode); 81 82 inline void Advance(uint8_t opCode, uint32_t offset = 0) 83 { 84 currentPc_ += offset + RegExpOpCode::GetRegExpOpCode(opCode)->GetSize(); 85 } 86 AdvanceOffset(uint32_t offset)87 inline void AdvanceOffset(uint32_t offset) 88 { 89 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 90 currentPc_ += offset; 91 } 92 GetCurrentChar()93 inline uint32_t GetCurrentChar() 94 { 95 return GetChar(¤tPtr_, inputEnd_); 96 } 97 AdvanceCurrentPtr()98 inline void AdvanceCurrentPtr() 99 { 100 AdvancePtr(¤tPtr_, inputEnd_); 101 } 102 GetChar(const uint8_t ** pp,const uint8_t * end)103 uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const 104 { 105 uint32_t c; 106 const uint8_t *cptr = *pp; 107 if (!isWideChar_) { 108 c = *cptr; 109 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 110 } else { 111 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 112 c = c1; 113 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 114 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 115 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 116 if (U16_IS_TRAIL(c1)) { 117 c = U16_GET_SUPPLEMENTARY(c, c1); // NOLINTNEXTLINE(hicpp-signed-bitwise) 118 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 119 } 120 } 121 *pp = cptr; 122 } 123 return c; 124 } 125 PeekChar(const uint8_t * p,const uint8_t * end)126 uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const 127 { 128 uint32_t c; 129 const uint8_t *cptr = p; 130 if (!isWideChar_) { 131 c = *cptr; 132 } else { 133 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 134 c = c1; 135 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 136 if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) { 137 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 138 if (U16_IS_TRAIL(c1)) { 139 c = U16_GET_SUPPLEMENTARY(c, c1); // NOLINTNEXTLINE(hicpp-signed-bitwise) 140 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 141 } 142 } 143 } 144 return c; 145 } 146 AdvancePtr(const uint8_t ** pp,const uint8_t * end)147 void AdvancePtr(const uint8_t **pp, const uint8_t *end) const 148 { 149 const uint8_t *cptr = *pp; 150 if (!isWideChar_) { 151 *pp += 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 152 } else { 153 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 154 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 155 if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) { 156 c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 157 if (U16_IS_TRAIL(c1)) { 158 cptr += WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 159 } 160 } 161 *pp = cptr; 162 } 163 } 164 PeekPrevChar(const uint8_t * p,const uint8_t * start)165 uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const 166 { 167 uint32_t c; 168 const uint8_t *cptr = p; 169 if (!isWideChar_) { 170 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 171 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 172 } else { 173 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 174 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 175 c = c1; 176 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 177 c1 = ((uint16_t *)cptr)[-1]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 178 if (U16_IS_LEAD(c1)) { 179 c = U16_GET_SUPPLEMENTARY(c1, c); // NOLINTNEXTLINE(hicpp-signed-bitwise) 180 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 181 } 182 } 183 } 184 return c; 185 } 186 GetPrevChar(const uint8_t ** pp,const uint8_t * start)187 uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const 188 { 189 uint32_t c; 190 const uint8_t *cptr = *pp; 191 if (!isWideChar_) { 192 c = *(cptr - 1); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 193 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 194 *pp = cptr; 195 } else { 196 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 197 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 198 c = c1; 199 if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) { 200 c1 = ((uint16_t *)cptr)[-1]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 201 if (U16_IS_LEAD(c1)) { 202 c = U16_GET_SUPPLEMENTARY(c1, c); // NOLINTNEXTLINE(hicpp-signed-bitwise) 203 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 204 } 205 } 206 *pp = cptr; 207 } 208 return c; 209 } 210 PrevPtr(const uint8_t ** pp,const uint8_t * start)211 void PrevPtr(const uint8_t **pp, const uint8_t *start) const 212 { 213 const uint8_t *cptr = *pp; 214 if (!isWideChar_) { 215 cptr -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 216 *pp = cptr; 217 } else { 218 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 219 uint16_t c1 = *(uint16_t *)cptr; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) 220 if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) { 221 c1 = ((uint16_t *)cptr)[-1]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 222 if (U16_IS_LEAD(c1)) { 223 cptr -= WIDE_CHAR_SIZE; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 224 } 225 } 226 *pp = cptr; 227 } 228 } 229 230 bool MatchFailed(bool isMatched = false); 231 SetCurrentPC(uint32_t pc)232 void SetCurrentPC(uint32_t pc) 233 { 234 currentPc_ = pc; 235 } 236 SetCurrentPtr(const uint8_t * ptr)237 void SetCurrentPtr(const uint8_t *ptr) 238 { 239 currentPtr_ = ptr; 240 } 241 IsEOF()242 bool IsEOF() const 243 { 244 return currentPtr_ >= inputEnd_; 245 } 246 GetCurrentPC()247 uint32_t GetCurrentPC() const 248 { 249 return currentPc_; 250 } 251 PushStack(uintptr_t val)252 void PushStack(uintptr_t val) 253 { 254 ASSERT(currentStack_ < nStack_); 255 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 256 stack_[currentStack_++] = val; 257 } 258 SetStackValue(uintptr_t val)259 void SetStackValue(uintptr_t val) const 260 { 261 ASSERT(currentStack_ >= 1); 262 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 263 stack_[currentStack_ - 1] = val; 264 } 265 PopStack()266 uintptr_t PopStack() 267 { 268 ASSERT(currentStack_ >= 1); 269 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 270 return stack_[--currentStack_]; 271 } 272 PeekStack()273 uintptr_t PeekStack() const 274 { 275 ASSERT(currentStack_ >= 1); 276 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 277 return stack_[currentStack_ - 1]; 278 } 279 GetCurrentPtr()280 const uint8_t *GetCurrentPtr() const 281 { 282 return currentPtr_; 283 } 284 GetCaptureResultList()285 CaptureState *GetCaptureResultList() const 286 { 287 return captureResultList_; 288 } 289 290 void DumpResult(std::ostream &out) const; 291 292 MatchResult GetResult(const JSThread *thread, bool isSuccess) const; 293 294 void PushRegExpState(StateType type, uint32_t pc); 295 296 RegExpState *PopRegExpState(bool copyCaptrue = true); 297 DropRegExpState()298 void DropRegExpState() 299 { 300 stateStackLen_--; 301 } 302 PeekRegExpState()303 RegExpState *PeekRegExpState() const 304 { 305 ASSERT(stateStackLen_ >= 1); 306 return reinterpret_cast<RegExpState *>( 307 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 308 (stateStackLen_ - 1) * stateSize_); 309 } 310 311 void ReAllocStack(uint32_t stackLen); 312 IsWordChar(uint8_t value)313 inline bool IsWordChar(uint8_t value) const 314 { 315 return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || 316 (value == '_')); 317 } 318 IsTerminator(uint32_t value)319 inline bool IsTerminator(uint32_t value) const 320 { 321 // NOLINTNEXTLINE(readability-magic-numbers) 322 return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029); 323 } 324 IsIgnoreCase()325 inline bool IsIgnoreCase() const 326 { 327 return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0; 328 } 329 IsUtf16()330 inline bool IsUtf16() const 331 { 332 return (flags_ & RegExpParser::FLAG_UTF16) != 0; 333 } 334 335 private: 336 static constexpr size_t CHAR_SIZE = 1; 337 static constexpr size_t WIDE_CHAR_SIZE = 2; 338 static constexpr size_t SAVE_RESET_START = 1; 339 static constexpr size_t SAVE_RESET_END = 2; 340 static constexpr size_t LOOP_MIN_OFFSET = 5; 341 static constexpr size_t LOOP_MAX_OFFSET = 9; 342 static constexpr size_t LOOP_PC_OFFSET = 1; 343 static constexpr size_t RANGE32_HEAD_OFFSET = 3; 344 static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4; 345 static constexpr size_t RANGE32_MAX_OFFSET = 8; 346 static constexpr size_t RANGE32_OFFSET = 2; 347 static constexpr uint32_t STACK_MULTIPLIER = 2; 348 static constexpr uint32_t MIN_STACK_SIZE = 8; 349 uint8_t *input_ = nullptr; 350 uint8_t *inputEnd_ = nullptr; 351 bool isWideChar_ = false; 352 353 uint32_t currentPc_ = 0; 354 const uint8_t *currentPtr_ = nullptr; 355 CaptureState *captureResultList_ = nullptr; 356 uintptr_t *stack_ = nullptr; 357 uint32_t currentStack_ = 0; 358 359 uint32_t nCapture_ = 0; 360 uint32_t nStack_ = 0; 361 362 uint32_t flags_ = 0; 363 uint32_t stateStackLen_ = 0; 364 uint32_t stateStackSize_ = 0; 365 uint32_t stateSize_ = 0; 366 uint8_t *stateStack_ = nullptr; 367 Chunk *chunk_ = nullptr; 368 }; 369 } // namespace panda::ecmascript 370 #endif // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H 371