• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/builtins/builtins_regexp.h"
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/js_tagged_value-inl.h"
22 #include "ecmascript/js_handle.h"
23 #include "ecmascript/mem/regexp_cached_chunk.h"
24 #include "ecmascript/regexp/regexp_parser.h"
25 
26 namespace panda::ecmascript {
27 class RegExpExecutor {
28 public:
29     struct CaptureState {
30         const uint8_t *captureStart;
31         const uint8_t *captureEnd;
32     };
33 
34     enum StateType : uint8_t {
35         STATE_SPLIT = 0, /* Do not re-order. */
36         STATE_NEGATIVE_MATCH_AHEAD, /* OP_NEGATIVE_MATCH_AHEAD  - OP_SPLIT_NEXT */
37         STATE_MATCH_AHEAD, /* OP_MATCH_AHEAD  - OP_SPLIT_NEXT */
38         STATE_SAVE,
39         STATE_PUSH,
40         STATE_POP,
41         STATE_SET,
42         STATE_INVALID,
43     };
44 
45     struct RegExpState {
46         StateType type_ = STATE_SPLIT;
47         uint32_t currentPc_ = 0;
48         const uint8_t *currentPtr_ = nullptr;
49     };
50 
RegExpExecutor(RegExpCachedChunk * chunk)51     explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk)
52     {
53         ASSERT(chunk_ != nullptr);
54     };
55 
56     ~RegExpExecutor() = default;
57 
58     NO_COPY_SEMANTIC(RegExpExecutor);
59     NO_MOVE_SEMANTIC(RegExpExecutor);
60 
61     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
62 
63     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
HandleFirstSplit()64     inline bool HandleFirstSplit()
65     {
66         if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
67             (flags_ & RegExpParser::FLAG_STICKY) == 0) {
68             if (IsEOF()) {
69                 if (MatchFailed()) {
70                     return false;
71                 }
72             } else if (prefilter_ && !isWideChar_) {
73                 ++currentPtr_;
74                 currentPtr_ = (const uint8_t *)memchr(currentPtr_, prefilter_, inputEnd_ - currentPtr_);
75                 if (currentPtr_ == nullptr) {
76                     currentPtr_ = inputEnd_;
77                 }
78                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
79             } else {
80                 AdvanceCurrentPtr();
81                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
82             }
83         }
84         return true;
85     }
86 
HandleOpAll(uint8_t opCode)87     inline bool HandleOpAll(uint8_t opCode)
88     {
89         if (IsEOF()) {
90             return !MatchFailed();
91         }
92         uint32_t currentChar = GetCurrentChar();
93         if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
94             return !MatchFailed();
95         }
96         Advance(opCode);
97         return true;
98     }
99 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)100     inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
101     {
102         uint32_t expectedChar = 0;
103         if (opCode == RegExpOpCode::OP_CHAR32) {
104             expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
105         } else {
106             expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
107         }
108         if (IsEOF()) {
109             return !MatchFailed();
110         }
111         uint32_t currentChar = GetCurrentChar();
112         if (IsIgnoreCase()) {
113             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
114         }
115         if (currentChar == expectedChar) {
116             Advance(opCode);
117         } else {
118             if (MatchFailed()) {
119                 return false;
120             }
121         }
122         return true;
123     }
124 
HandleOpWordBoundary(uint8_t opCode)125     inline bool HandleOpWordBoundary(uint8_t opCode)
126     {
127         bool preIsWord = false;
128         if (GetCurrentPtr() != input_) {
129             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
130             preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
131         }
132         bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_));
133         if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
134             ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
135             ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
136             ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
137             Advance(opCode);
138         } else {
139             if (MatchFailed()) {
140                 return false;
141             }
142         }
143         return true;
144     }
145 
HandleOpLineStart(uint8_t opCode)146     inline bool HandleOpLineStart(uint8_t opCode)
147     {
148         if ((GetCurrentPtr() == input_) ||
149             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
150             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
151             Advance(opCode);
152         } else {
153             if (MatchFailed()) {
154                 return false;
155             }
156         }
157         return true;
158     }
159 
HandleOpLineEnd(uint8_t opCode)160     inline bool HandleOpLineEnd(uint8_t opCode)
161     {
162         if (IsEOF() ||
163             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
164             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
165             Advance(opCode);
166         } else {
167             if (MatchFailed()) {
168                 return false;
169             }
170         }
171         return true;
172     }
173 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)174     inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
175     {
176         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
177         ASSERT(captureIndex < nCapture_);
178         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
179         CaptureState *captureState = &captureResultList_[captureIndex];
180         // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
181         PushRegExpState(STATE_SAVE, captureIndex * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
182         captureState->captureStart = GetCurrentPtr();
183         Advance(opCode);
184     }
185 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)186     inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
187     {
188         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
189         ASSERT(captureIndex < nCapture_);
190         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
191         CaptureState *captureState = &captureResultList_[captureIndex];
192         // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
193         PushRegExpState(STATE_SAVE, captureIndex * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
194         captureState->captureEnd = GetCurrentPtr();
195         Advance(opCode);
196     }
197 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)198     inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
199     {
200         uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
201         uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
202         for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
203             CaptureState *captureState =
204                 &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
205             // 2: Even indexes store captureStart.
206             // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
207             PushRegExpState(STATE_SAVE, i * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
208             // 2: Even indexes store captureStart.
209             // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
210             PushRegExpState(STATE_SAVE, i * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
211             captureState->captureStart = nullptr;
212             captureState->captureEnd = nullptr;
213         }
214         Advance(opCode);
215     }
216 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)217     inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
218     {
219         auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
220         ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
221         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
222         Advance(opCode);
223         uint32_t splitPc = GetCurrentPC() + offset;
224         PushRegExpState(type, splitPc);
225     }
226 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)227     inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
228     {
229         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
230         Advance(opCode);
231         PushRegExpState(STATE_SPLIT, GetCurrentPC());
232         AdvanceOffset(offset);
233     }
234 
HandleOpPrev(uint8_t opCode)235     inline bool HandleOpPrev(uint8_t opCode)
236     {
237         if (GetCurrentPtr() == input_) {
238             if (MatchFailed()) {
239                 return false;
240             }
241         } else {
242             PrevPtr(&currentPtr_, input_);
243             Advance(opCode);
244         }
245         return true;
246     }
247 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)248     inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
249     {
250         uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
251         uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
252         uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
253         Advance(opCode);
254         uint32_t loopPcEnd = GetCurrentPC();
255         uint32_t loopPcStart = GetCurrentPC() + pcOffset;
256         bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
257         uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
258 
259         uint32_t loopCount = PeekStack();
260         PushRegExpState(StateType::STATE_SET, 0, loopCount);
261         SetStackValue(++loopCount);
262         if (loopCount < loopMax) {
263             // greedy failed, goto next
264             if (loopCount >= quantifyMin) {
265                 PushRegExpState(STATE_SPLIT, loopPcEnd);
266             }
267             // Goto loop start
268             SetCurrentPC(loopPcStart);
269         } else {
270             if (!isGreedy && (loopCount < quantifyMax)) {
271                 PushRegExpState(STATE_SPLIT, loopPcStart);
272             }
273         }
274     }
275 
HandleOpRange32(const DynChunk & byteCode)276     inline bool HandleOpRange32(const DynChunk &byteCode)
277     {
278         if (IsEOF()) {
279             return !MatchFailed();
280         }
281         uint32_t currentChar = GetCurrentChar();
282         if (IsIgnoreCase()) {
283             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
284         }
285         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
286         bool isFound = false;
287         int32_t idxMin = 0;
288         int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
289         int32_t idx = 0;
290         uint32_t low = 0;
291         uint32_t high =
292             byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
293                             RANGE32_MAX_HALF_OFFSET);
294         if (currentChar <= high) {
295             while (idxMin <= idxMax) {
296                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
297                 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
298                     RANGE32_MAX_OFFSET);
299                 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
300                     RANGE32_MAX_OFFSET +
301                     RANGE32_MAX_HALF_OFFSET);
302                 if (currentChar < low) {
303                     idxMax = idx - 1;
304                 } else if (currentChar > high) {
305                     idxMin = idx + 1;
306                 } else {
307                     isFound = true;
308                     break;
309                 }
310             }
311         }
312         if (isFound) {
313             AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
314         } else {
315             if (MatchFailed()) {
316                 return false;
317             }
318         }
319         return true;
320     }
321 
HandleOpRange(const DynChunk & byteCode)322     inline bool HandleOpRange(const DynChunk &byteCode)
323     {
324         if (IsEOF()) {
325             return !MatchFailed();
326         }
327         uint32_t currentChar = GetCurrentChar();
328         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
329         bool flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
330         if (IsIgnoreCase() && !flag) {
331             currentChar = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar));
332             flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
333         }
334         if (flag) {
335             AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
336         } else {
337             if (MatchFailed()) {
338                 return false;
339             }
340         }
341         return true;
342     }
343 
HandleOpSparse(const DynChunk & byteCode)344     inline bool HandleOpSparse(const DynChunk &byteCode)
345     {
346         if (IsEOF()) {
347             return !MatchFailed();
348         }
349         uint32_t currentChar = GetCurrentChar();
350         if (IsIgnoreCase()) {
351             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
352         }
353         uint16_t sparseCount = byteCode.GetU16(GetCurrentPC() + 1);
354         for (uint32_t i = 0; i < sparseCount; i++) {
355             uint32_t sparseChar = byteCode.GetU16(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET);
356             if (currentChar == sparseChar) {
357                 uint32_t offset = byteCode.GetU32(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET +
358                     SPARSE_OFF_OFFSET);
359                 AdvanceOffset(offset + sparseCount * SPARSE_MAX_OFFSET + SPARSE_HEAD_OFFSET);
360                 return true;
361             }
362         }
363         return !MatchFailed();
364     }
365 
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)366     inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
367     {
368         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
369         if (captureIndex >= nCapture_) {
370             return !MatchFailed();
371         }
372         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
373         const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
374         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
375         const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
376         if (captureStart == nullptr || captureEnd == nullptr) {
377             Advance(opCode);
378             return true;
379         }
380         bool isMatched = true;
381         if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
382             const uint8_t *refCptr = captureStart;
383             while (refCptr < captureEnd) {
384                 if (IsEOF()) {
385                     isMatched = false;
386                     break;
387                 }
388                 // NOLINTNEXTLINE(readability-identifier-naming)
389                 uint32_t c1 = GetChar(&refCptr, captureEnd);
390                 // NOLINTNEXTLINE(readability-identifier-naming)
391                 uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
392                 if (IsIgnoreCase()) {
393                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
394                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
395                 }
396                 if (c1 != c2) {
397                     isMatched = false;
398                     break;
399                 }
400             }
401             if (!isMatched) {
402                 if (MatchFailed()) {
403                     return false;
404                 }
405             } else {
406                 Advance(opCode);
407             }
408         } else {
409             const uint8_t *refCptr = captureEnd;
410             while (refCptr > captureStart) {
411                 if (GetCurrentPtr() == input_) {
412                     isMatched = false;
413                     break;
414                 }
415                 // NOLINTNEXTLINE(readability-identifier-naming)
416                 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
417                 // NOLINTNEXTLINE(readability-identifier-naming)
418                 uint32_t c2 = GetPrevChar(&currentPtr_, input_);
419                 if (IsIgnoreCase()) {
420                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
421                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
422                 }
423                 if (c1 != c2) {
424                     isMatched = false;
425                     break;
426                 }
427             }
428             if (!isMatched) {
429                 if (MatchFailed()) {
430                     return false;
431                 }
432             } else {
433                 Advance(opCode);
434             }
435         }
436         return true;
437     }
438 
439     inline void Advance(uint8_t opCode, uint32_t offset = 0)
440     {
441         currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
442     }
443 
AdvanceOffset(uint32_t offset)444     inline void AdvanceOffset(uint32_t offset)
445     {
446         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
447         currentPc_ += offset;
448     }
449 
GetCurrentChar()450     inline uint32_t GetCurrentChar()
451     {
452         return GetChar(&currentPtr_, inputEnd_);
453     }
454 
AdvanceCurrentPtr()455     inline void AdvanceCurrentPtr()
456     {
457         AdvancePtr(&currentPtr_, inputEnd_);
458     }
459 
GetChar(const uint8_t ** pp,const uint8_t * end)460     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
461     {
462         uint32_t c = 0;
463         const uint8_t *cptr = *pp;
464         if (!isWideChar_) {
465             c = *cptr;
466             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
467         } else {
468             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
469             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
470             c = c1;
471             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
472             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
473                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
474                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
475                 if (U16_IS_TRAIL(c1)) {
476                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
477                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
478                 }
479             }
480             *pp = cptr;
481         }
482         return c;
483     }
484 
PeekChar(const uint8_t * p,const uint8_t * end)485     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
486     {
487         uint32_t c = 0;
488         const uint8_t *cptr = p;
489         if (!isWideChar_) {
490             c = *cptr;
491         } else {
492             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
493             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
494             c = c1;
495             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
496             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
497                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
498                 if (U16_IS_TRAIL(c1)) {
499                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
500                 }
501             }
502         }
503         return c;
504     }
505 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)506     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
507     {
508         const uint8_t *cptr = *pp;
509         if (!isWideChar_) {
510             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
511         } else {
512             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
513             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
514             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
515             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
516                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
517                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
518                 if (U16_IS_TRAIL(c1)) {
519                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
520                 }
521             }
522             *pp = cptr;
523         }
524     }
525 
PeekPrevChar(const uint8_t * p,const uint8_t * start)526     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
527     {
528         uint32_t c = 0;
529         const uint8_t *cptr = p;
530         if (!isWideChar_) {
531             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
532         } else {
533             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
534             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
535             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
536             c = c1;
537             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
538                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
539                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
540                 if (U16_IS_LEAD(c1)) {
541                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
542                 }
543             }
544         }
545         return c;
546     }
547 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)548     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
549     {
550         uint32_t c = 0;
551         const uint8_t *cptr = *pp;
552         if (!isWideChar_) {
553             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
554             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
555             *pp = cptr;
556         } else {
557             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
558             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
559             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
560             c = c1;
561             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
562                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
563                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
564                 if (U16_IS_LEAD(c1)) {
565                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
566                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
567                 }
568             }
569             *pp = cptr;
570         }
571         return c;
572     }
573 
PrevPtr(const uint8_t ** pp,const uint8_t * start)574     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
575     {
576         const uint8_t *cptr = *pp;
577         if (!isWideChar_) {
578             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
579             *pp = cptr;
580         } else {
581             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
582             uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
583             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
584                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
585                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
586                 if (U16_IS_LEAD(c1)) {
587                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
588                 }
589             }
590             *pp = cptr;
591         }
592     }
593 
594     bool MatchFailed(bool isMatched = false);
595 
SetCurrentPC(uint32_t pc)596     void SetCurrentPC(uint32_t pc)
597     {
598         currentPc_ = pc;
599     }
600 
SetCurrentPtr(const uint8_t * ptr)601     void SetCurrentPtr(const uint8_t *ptr)
602     {
603         currentPtr_ = ptr;
604     }
605 
IsEOF()606     bool IsEOF() const
607     {
608         return currentPtr_ >= inputEnd_;
609     }
610 
IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)611     bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar,
612                         const DynChunk &byteCode, const uint16_t rangeCount)
613     {
614         bool isFound = false;
615         int32_t idxMin = 0;
616         int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
617         int32_t idx = 0;
618         uint32_t low = 0;
619         uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET +
620             static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
621         if (nowChar <= high) {
622             while (idxMin <= idxMax) {
623                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
624                 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
625                     RANGE32_MAX_HALF_OFFSET);
626                 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
627                     RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
628                 if (nowChar < low) {
629                     idxMax = idx - 1;
630                 } else if (nowChar > high) {
631                     idxMin = idx + 1;
632                 } else {
633                     isFound = true;
634                     break;
635                 }
636             }
637         }
638         return isFound;
639     }
640 
GetCurrentPC()641     uint32_t GetCurrentPC() const
642     {
643         return currentPc_;
644     }
645 
PushStack(uintptr_t val)646     void PushStack(uintptr_t val)
647     {
648         ASSERT(currentStack_ < nStack_);
649         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
650         stack_[currentStack_++] = val;
651     }
652 
SetStackValue(uintptr_t val)653     void SetStackValue(uintptr_t val) const
654     {
655         ASSERT(currentStack_ >= 1);
656         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
657         stack_[currentStack_ - 1] = val;
658     }
659 
PopStack()660     uintptr_t PopStack()
661     {
662         ASSERT(currentStack_ >= 1);
663         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
664         return stack_[--currentStack_];
665     }
666 
PeekStack()667     uintptr_t PeekStack() const
668     {
669         ASSERT(currentStack_ >= 1);
670         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
671         return stack_[currentStack_ - 1];
672     }
673 
GetCurrentPtr()674     const uint8_t *GetCurrentPtr() const
675     {
676         return currentPtr_;
677     }
678 
GetCaptureResultList()679     CaptureState *GetCaptureResultList() const
680     {
681         return captureResultList_;
682     }
683 
684     void DumpResult(std::ostream &out) const;
685 
686     void GetResult(JSThread *thread);
687 
688     void PushRegExpState(StateType type, uint32_t pc);
689     void PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr);
690 
691     StateType PopRegExpState(bool copyCapture = true);
692 
DropRegExpState()693     void DropRegExpState()
694     {
695         stateStackLen_--;
696     }
697 
PeekRegExpState()698     RegExpState *PeekRegExpState() const
699     {
700         ASSERT(stateStackLen_ >= 1);
701         return reinterpret_cast<RegExpState *>(
702             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
703             (stateStackLen_ - 1) * sizeof(RegExpState));
704     }
705 
706     void ReAllocStack(uint32_t stackLen);
707 
IsWordChar(uint8_t value)708     inline bool IsWordChar(uint8_t value) const
709     {
710         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
711                 (value == '_'));
712     }
713 
IsTerminator(uint32_t value)714     inline bool IsTerminator(uint32_t value) const
715     {
716         // NOLINTNEXTLINE(readability-magic-numbers)
717         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
718     }
719 
IsIgnoreCase()720     inline bool IsIgnoreCase() const
721     {
722         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
723     }
724 
IsUtf16()725     inline bool IsUtf16() const
726     {
727         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
728     }
729 
730 private:
731     static constexpr size_t CHAR_SIZE = 1;
732     static constexpr size_t WIDE_CHAR_SIZE = 2;
733     static constexpr size_t SAVE_RESET_START = 1;
734     static constexpr size_t SAVE_RESET_END = 2;
735     static constexpr size_t LOOP_MIN_OFFSET = 5;
736     static constexpr size_t LOOP_MAX_OFFSET = 9;
737     static constexpr size_t LOOP_PC_OFFSET = 1;
738     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
739     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
740     static constexpr size_t RANGE32_MAX_OFFSET = 8;
741     static constexpr size_t RANGE32_OFFSET = 2;
742     static constexpr size_t SPARSE_HEAD_OFFSET = 3;
743     static constexpr size_t SPARSE_OFF_OFFSET = 2;
744     static constexpr size_t SPARSE_MAX_OFFSET = 6;
745     static constexpr uint32_t STACK_MULTIPLIER = 2;
746     static constexpr uint32_t MIN_STACK_SIZE = 8;
747     static constexpr int TMP_BUF_SIZE = 128;
748     uint8_t *input_ = nullptr;
749     uint8_t *inputEnd_ = nullptr;
750     bool isWideChar_ = false;
751     uint16_t prefilter_ = 0;
752 
753     uint32_t currentPc_ = 0;
754     const uint8_t *currentPtr_ = nullptr;
755     CaptureState *captureResultList_ = nullptr;
756     uintptr_t *stack_ = nullptr;
757     uint32_t currentStack_ = 0;
758 
759     uint32_t nCapture_ = 0;
760     uint32_t nStack_ = 0;
761 
762     uint32_t flags_ = 0;
763     uint32_t stateStackLen_ = 0;
764     uint32_t stateStackSize_ = 0;
765     uint8_t *stateStack_ = nullptr;
766     RegExpCachedChunk *chunk_ = nullptr;
767 };
768 }  // namespace panda::ecmascript
769 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
770