• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/regexp/regexp_parser.h"
20 #include "ecmascript/mem/chunk.h"
21 
22 namespace panda::ecmascript {
23 class RegExpExecutor {
24 public:
25     struct CaptureState {
26         const uint8_t *captureStart;
27         const uint8_t *captureEnd;
28     };
29 
30     enum StateType : uint8_t {
31         STATE_SPLIT = 0,
32         STATE_MATCH_AHEAD,
33         STATE_NEGATIVE_MATCH_AHEAD,
34     };
35 
36     struct RegExpState {
37         StateType type_ = STATE_SPLIT;
38         uint32_t currentPc_ = 0;
39         uint32_t currentStack_ = 0;
40         const uint8_t *currentPtr_ = nullptr;
41         __extension__ CaptureState *captureResultList_[0];  // NOLINT(modernize-avoid-c-arrays)
42     };
43 
44     struct MatchResult {
45         uint32_t endIndex_ = 0;
46         uint32_t index_ = 0;
47         // first value is true if result is undefined
48         std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_;
49         bool isSuccess_ = false;
50     };
51 
RegExpExecutor(Chunk * chunk)52     explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk)
53     {
54         ASSERT(chunk_ != nullptr);
55     };
56 
57     ~RegExpExecutor() = default;
58 
59     NO_COPY_SEMANTIC(RegExpExecutor);
60     NO_MOVE_SEMANTIC(RegExpExecutor);
61 
62     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
63 
64     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
HandleFirstSplit()65     inline bool HandleFirstSplit()
66     {
67         if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
68             (flags_ & RegExpParser::FLAG_STICKY) == 0) {
69             if (IsEOF()) {
70                 if (MatchFailed()) {
71                     return false;
72                 }
73             } else {
74                 AdvanceCurrentPtr();
75                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
76             }
77         }
78         return true;
79     }
80 
HandleOpAll(uint8_t opCode)81     inline bool HandleOpAll(uint8_t opCode)
82     {
83         if (IsEOF()) {
84             return !MatchFailed();
85         }
86         uint32_t currentChar = GetCurrentChar();
87         if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
88             return !MatchFailed();
89         }
90         Advance(opCode);
91         return true;
92     }
93 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)94     inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
95     {
96         uint32_t expectedChar = 0;
97         if (opCode == RegExpOpCode::OP_CHAR32) {
98             expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
99         } else {
100             expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
101         }
102         if (IsEOF()) {
103             return !MatchFailed();
104         }
105         uint32_t currentChar = GetCurrentChar();
106         if (IsIgnoreCase()) {
107             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
108         }
109         if (currentChar == expectedChar) {
110             Advance(opCode);
111         } else {
112             if (MatchFailed()) {
113                 return false;
114             }
115         }
116         return true;
117     }
118 
HandleOpWordBoundary(uint8_t opCode)119     inline bool HandleOpWordBoundary(uint8_t opCode)
120     {
121         if (IsEOF()) {
122             if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) {
123                 Advance(opCode);
124             } else {
125                 if (MatchFailed()) {
126                     return false;
127                 }
128             }
129             return true;
130         }
131         bool preIsWord = false;
132         if (GetCurrentPtr() != input_) {
133             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
134             preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
135         }
136         bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_));
137         if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
138             ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
139             ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
140             ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
141             Advance(opCode);
142         } else {
143             if (MatchFailed()) {
144                 return false;
145             }
146         }
147         return true;
148     }
149 
HandleOpLineStart(uint8_t opCode)150     inline bool HandleOpLineStart(uint8_t opCode)
151     {
152         if (IsEOF()) {
153             return !MatchFailed();
154         }
155         if ((GetCurrentPtr() == input_) ||
156             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
157             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
158             Advance(opCode);
159         } else {
160             if (MatchFailed()) {
161                 return false;
162             }
163         }
164         return true;
165     }
166 
HandleOpLineEnd(uint8_t opCode)167     inline bool HandleOpLineEnd(uint8_t opCode)
168     {
169         if (IsEOF() ||
170             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
171             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
172             Advance(opCode);
173         } else {
174             if (MatchFailed()) {
175                 return false;
176             }
177         }
178         return true;
179     }
180 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)181     inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
182     {
183         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
184         ASSERT(captureIndex < nCapture_);
185         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
186         CaptureState *captureState = &captureResultList_[captureIndex];
187         captureState->captureStart = GetCurrentPtr();
188         Advance(opCode);
189     }
190 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)191     inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
192     {
193         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
194         ASSERT(captureIndex < nCapture_);
195         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
196         CaptureState *captureState = &captureResultList_[captureIndex];
197         captureState->captureEnd = GetCurrentPtr();
198         Advance(opCode);
199     }
200 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)201     inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
202     {
203         uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
204         uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
205         for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
206             CaptureState *captureState =
207                 &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
208             captureState->captureStart = nullptr;
209             captureState->captureEnd = nullptr;
210         }
211         Advance(opCode);
212     }
213 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)214     inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
215     {
216         auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
217         ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
218         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
219         Advance(opCode);
220         uint32_t splitPc = GetCurrentPC() + offset;
221         PushRegExpState(type, splitPc);
222     }
223 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)224     inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
225     {
226         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
227         Advance(opCode);
228         PushRegExpState(STATE_SPLIT, GetCurrentPC());
229         AdvanceOffset(offset);
230     }
231 
HandleOpPrev(uint8_t opCode)232     inline bool HandleOpPrev(uint8_t opCode)
233     {
234         if (GetCurrentPtr() == input_) {
235             if (MatchFailed()) {
236                 return false;
237             }
238         } else {
239             PrevPtr(&currentPtr_, input_);
240             Advance(opCode);
241         }
242         return true;
243     }
244 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)245     inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
246     {
247         uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
248         uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
249         uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
250         Advance(opCode);
251         uint32_t loopPcEnd = GetCurrentPC();
252         uint32_t loopPcStart = GetCurrentPC() + pcOffset;
253         bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
254         uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
255 
256         uint32_t loopCount = PeekStack();
257         SetStackValue(++loopCount);
258         if (loopCount < loopMax) {
259             // greedy failed, goto next
260             if (loopCount >= quantifyMin) {
261                 PushRegExpState(STATE_SPLIT, loopPcEnd);
262             }
263             // Goto loop start
264             SetCurrentPC(loopPcStart);
265         } else {
266             if (!isGreedy && (loopCount < quantifyMax)) {
267                 PushRegExpState(STATE_SPLIT, loopPcStart);
268             }
269         }
270     }
271 
HandleOpRange32(const DynChunk & byteCode)272     inline bool HandleOpRange32(const DynChunk &byteCode)
273     {
274         if (IsEOF()) {
275             return !MatchFailed();
276         }
277         uint32_t currentChar = GetCurrentChar();
278         if (IsIgnoreCase()) {
279             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
280         }
281         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
282         bool isFound = false;
283         int32_t idxMin = 0;
284         int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
285         int32_t idx = 0;
286         uint32_t low = 0;
287         uint32_t high =
288             byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
289                             RANGE32_MAX_HALF_OFFSET);
290         if (currentChar <= high) {
291             while (idxMin <= idxMax) {
292                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
293                 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
294                     RANGE32_MAX_OFFSET);
295                 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
296                     RANGE32_MAX_OFFSET +
297                     RANGE32_MAX_HALF_OFFSET);
298                 if (currentChar < low) {
299                     idxMax = idx - 1;
300                 } else if (currentChar > high) {
301                     idxMin = idx + 1;
302                 } else {
303                     isFound = true;
304                     break;
305                 }
306             }
307         }
308         if (isFound) {
309             AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
310         } else {
311             if (MatchFailed()) {
312                 return false;
313             }
314         }
315         return true;
316     }
317 
HandleOpRange(const DynChunk & byteCode)318     inline bool HandleOpRange(const DynChunk &byteCode)
319     {
320         if (IsEOF()) {
321             return !MatchFailed();
322         }
323         uint32_t currentChar = GetCurrentChar();
324         if (IsIgnoreCase()) {
325             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
326         }
327         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
328         bool isFound = false;
329         int32_t idxMin = 0;
330         int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
331         int32_t idx = 0;
332         uint32_t low = 0;
333         uint32_t high =
334             byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
335         if (currentChar <= high) {
336             while (idxMin <= idxMax) {
337                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
338                 low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
339                     RANGE32_MAX_HALF_OFFSET);
340                 high = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
341                     RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
342                 if (currentChar < low) {
343                     idxMax = idx - 1;
344                 } else if (currentChar > high) {
345                     idxMin = idx + 1;
346                 } else {
347                     isFound = true;
348                     break;
349                 }
350             }
351         }
352         if (isFound) {
353             AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
354         } else {
355             if (MatchFailed()) {
356                 return false;
357             }
358         }
359         return true;
360     }
361 
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)362     inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
363     {
364         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
365         if (captureIndex >= nCapture_) {
366             return !MatchFailed();
367         }
368         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
369         const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
370         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
371         const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
372         if (captureStart == nullptr || captureEnd == nullptr) {
373             Advance(opCode);
374             return true;
375         }
376         bool isMatched = true;
377         if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
378             const uint8_t *refCptr = captureStart;
379             while (refCptr < captureEnd) {
380                 if (IsEOF()) {
381                     isMatched = false;
382                     break;
383                 }
384                 // NOLINTNEXTLINE(readability-identifier-naming)
385                 uint32_t c1 = GetChar(&refCptr, captureEnd);
386                 // NOLINTNEXTLINE(readability-identifier-naming)
387                 uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
388                 if (IsIgnoreCase()) {
389                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
390                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
391                 }
392                 if (c1 != c2) {
393                     isMatched = false;
394                     break;
395                 }
396             }
397             if (!isMatched) {
398                 if (MatchFailed()) {
399                     return false;
400                 }
401             } else {
402                 Advance(opCode);
403             }
404         } else {
405             const uint8_t *refCptr = captureEnd;
406             while (refCptr > captureStart) {
407                 if (GetCurrentPtr() == input_) {
408                     isMatched = false;
409                     break;
410                 }
411                 // NOLINTNEXTLINE(readability-identifier-naming)
412                 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
413                 // NOLINTNEXTLINE(readability-identifier-naming)
414                 uint32_t c2 = GetPrevChar(&currentPtr_, input_);
415                 if (IsIgnoreCase()) {
416                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
417                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
418                 }
419                 if (c1 != c2) {
420                     isMatched = false;
421                     break;
422                 }
423             }
424             if (!isMatched) {
425                 if (MatchFailed()) {
426                     return false;
427                 }
428             } else {
429                 Advance(opCode);
430             }
431         }
432         return true;
433     }
434 
435     inline void Advance(uint8_t opCode, uint32_t offset = 0)
436     {
437         currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
438     }
439 
AdvanceOffset(uint32_t offset)440     inline void AdvanceOffset(uint32_t offset)
441     {
442         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
443         currentPc_ += offset;
444     }
445 
GetCurrentChar()446     inline uint32_t GetCurrentChar()
447     {
448         return GetChar(&currentPtr_, inputEnd_);
449     }
450 
AdvanceCurrentPtr()451     inline void AdvanceCurrentPtr()
452     {
453         AdvancePtr(&currentPtr_, inputEnd_);
454     }
455 
GetChar(const uint8_t ** pp,const uint8_t * end)456     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
457     {
458         uint32_t c = 0;
459         const uint8_t *cptr = *pp;
460         if (!isWideChar_) {
461             c = *cptr;
462             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
463         } else {
464             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
465             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
466             c = c1;
467             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
468             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
469                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
470                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
471                 if (U16_IS_TRAIL(c1)) {
472                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
473                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
474                 }
475             }
476             *pp = cptr;
477         }
478         return c;
479     }
480 
PeekChar(const uint8_t * p,const uint8_t * end)481     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
482     {
483         uint32_t c = 0;
484         const uint8_t *cptr = p;
485         if (!isWideChar_) {
486             c = *cptr;
487         } else {
488             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
489             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
490             c = c1;
491             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
492             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
493                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
494                 if (U16_IS_TRAIL(c1)) {
495                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
496                 }
497             }
498         }
499         return c;
500     }
501 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)502     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
503     {
504         const uint8_t *cptr = *pp;
505         if (!isWideChar_) {
506             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
507         } else {
508             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
509             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
510             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
511             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
512                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
513                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
514                 if (U16_IS_TRAIL(c1)) {
515                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
516                 }
517             }
518             *pp = cptr;
519         }
520     }
521 
PeekPrevChar(const uint8_t * p,const uint8_t * start)522     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
523     {
524         uint32_t c = 0;
525         const uint8_t *cptr = p;
526         if (!isWideChar_) {
527             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
528         } else {
529             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
530             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
531             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
532             c = c1;
533             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
534                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
535                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
536                 if (U16_IS_LEAD(c1)) {
537                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
538                 }
539             }
540         }
541         return c;
542     }
543 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)544     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
545     {
546         uint32_t c = 0;
547         const uint8_t *cptr = *pp;
548         if (!isWideChar_) {
549             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
550             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
551             *pp = cptr;
552         } else {
553             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
554             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
555             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
556             c = c1;
557             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
558                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
559                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
560                 if (U16_IS_LEAD(c1)) {
561                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
562                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
563                 }
564             }
565             *pp = cptr;
566         }
567         return c;
568     }
569 
PrevPtr(const uint8_t ** pp,const uint8_t * start)570     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
571     {
572         const uint8_t *cptr = *pp;
573         if (!isWideChar_) {
574             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
575             *pp = cptr;
576         } else {
577             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
578             uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
579             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
580                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
581                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
582                 if (U16_IS_LEAD(c1)) {
583                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
584                 }
585             }
586             *pp = cptr;
587         }
588     }
589 
590     bool MatchFailed(bool isMatched = false);
591 
SetCurrentPC(uint32_t pc)592     void SetCurrentPC(uint32_t pc)
593     {
594         currentPc_ = pc;
595     }
596 
SetCurrentPtr(const uint8_t * ptr)597     void SetCurrentPtr(const uint8_t *ptr)
598     {
599         currentPtr_ = ptr;
600     }
601 
IsEOF()602     bool IsEOF() const
603     {
604         return currentPtr_ >= inputEnd_;
605     }
606 
GetCurrentPC()607     uint32_t GetCurrentPC() const
608     {
609         return currentPc_;
610     }
611 
PushStack(uintptr_t val)612     void PushStack(uintptr_t val)
613     {
614         ASSERT(currentStack_ < nStack_);
615         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
616         stack_[currentStack_++] = val;
617     }
618 
SetStackValue(uintptr_t val)619     void SetStackValue(uintptr_t val) const
620     {
621         ASSERT(currentStack_ >= 1);
622         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
623         stack_[currentStack_ - 1] = val;
624     }
625 
PopStack()626     uintptr_t PopStack()
627     {
628         ASSERT(currentStack_ >= 1);
629         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
630         return stack_[--currentStack_];
631     }
632 
PeekStack()633     uintptr_t PeekStack() const
634     {
635         ASSERT(currentStack_ >= 1);
636         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
637         return stack_[currentStack_ - 1];
638     }
639 
GetCurrentPtr()640     const uint8_t *GetCurrentPtr() const
641     {
642         return currentPtr_;
643     }
644 
GetCaptureResultList()645     CaptureState *GetCaptureResultList() const
646     {
647         return captureResultList_;
648     }
649 
650     void DumpResult(std::ostream &out) const;
651 
652     MatchResult GetResult(const JSThread *thread, bool isSuccess) const;
653 
654     void PushRegExpState(StateType type, uint32_t pc);
655 
656     RegExpState *PopRegExpState(bool copyCaptrue = true);
657 
DropRegExpState()658     void DropRegExpState()
659     {
660         stateStackLen_--;
661     }
662 
PeekRegExpState()663     RegExpState *PeekRegExpState() const
664     {
665         ASSERT(stateStackLen_ >= 1);
666         return reinterpret_cast<RegExpState *>(
667             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
668             (stateStackLen_ - 1) * stateSize_);
669     }
670 
671     void ReAllocStack(uint32_t stackLen);
672 
IsWordChar(uint8_t value)673     inline bool IsWordChar(uint8_t value) const
674     {
675         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
676                 (value == '_'));
677     }
678 
IsTerminator(uint32_t value)679     inline bool IsTerminator(uint32_t value) const
680     {
681         // NOLINTNEXTLINE(readability-magic-numbers)
682         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
683     }
684 
IsIgnoreCase()685     inline bool IsIgnoreCase() const
686     {
687         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
688     }
689 
IsUtf16()690     inline bool IsUtf16() const
691     {
692         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
693     }
694 
695 private:
696     static constexpr size_t CHAR_SIZE = 1;
697     static constexpr size_t WIDE_CHAR_SIZE = 2;
698     static constexpr size_t SAVE_RESET_START = 1;
699     static constexpr size_t SAVE_RESET_END = 2;
700     static constexpr size_t LOOP_MIN_OFFSET = 5;
701     static constexpr size_t LOOP_MAX_OFFSET = 9;
702     static constexpr size_t LOOP_PC_OFFSET = 1;
703     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
704     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
705     static constexpr size_t RANGE32_MAX_OFFSET = 8;
706     static constexpr size_t RANGE32_OFFSET = 2;
707     static constexpr uint32_t STACK_MULTIPLIER = 2;
708     static constexpr uint32_t MIN_STACK_SIZE = 8;
709     uint8_t *input_ = nullptr;
710     uint8_t *inputEnd_ = nullptr;
711     bool isWideChar_ = false;
712 
713     uint32_t currentPc_ = 0;
714     const uint8_t *currentPtr_ = nullptr;
715     CaptureState *captureResultList_ = nullptr;
716     uintptr_t *stack_ = nullptr;
717     uint32_t currentStack_ = 0;
718 
719     uint32_t nCapture_ = 0;
720     uint32_t nStack_ = 0;
721 
722     uint32_t flags_ = 0;
723     uint32_t stateStackLen_ = 0;
724     uint32_t stateStackSize_ = 0;
725     uint32_t stateSize_ = 0;
726     uint8_t *stateStack_ = nullptr;
727     Chunk *chunk_ = nullptr;
728 };
729 }  // namespace panda::ecmascript
730 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
731