• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/regexp/regexp_parser.h"
20 #include "ecmascript/mem/chunk.h"
21 
22 namespace panda::ecmascript {
23 class RegExpExecutor {
24 public:
25     struct CaptureState {
26         const uint8_t *captureStart;
27         const uint8_t *captureEnd;
28     };
29 
30     enum StateType : uint8_t {
31         STATE_SPLIT = 0,
32         STATE_MATCH_AHEAD,
33         STATE_NEGATIVE_MATCH_AHEAD,
34     };
35 
36     struct RegExpState {
37         StateType type_ = STATE_SPLIT;
38         uint32_t currentPc_ = 0;
39         uint32_t currentStack_ = 0;
40         const uint8_t *currentPtr_ = nullptr;
41         __extension__ CaptureState *captureResultList_[0];  // NOLINT(modernize-avoid-c-arrays)
42     };
43 
44     struct MatchResult {
45         uint32_t endIndex_ = 0;
46         uint32_t index_ = 0;
47         // first value is true if result is undefined
48         std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_;
49         bool isSuccess_ = false;
50     };
51 
RegExpExecutor(Chunk * chunk)52     explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk)
53     {
54         ASSERT(chunk_ != nullptr);
55     };
56 
57     ~RegExpExecutor() = default;
58 
59     NO_COPY_SEMANTIC(RegExpExecutor);
60     NO_MOVE_SEMANTIC(RegExpExecutor);
61 
62     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
63 
64     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
HandleFirstSplit()65     inline bool HandleFirstSplit()
66     {
67         if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
68             (flags_ & RegExpParser::FLAG_STICKY) == 0) {
69             if (IsEOF()) {
70                 if (MatchFailed()) {
71                     return false;
72                 }
73             } else {
74                 AdvanceCurrentPtr();
75                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
76             }
77         }
78         return true;
79     }
80 
HandleOpAll(uint8_t opCode)81     inline bool HandleOpAll(uint8_t opCode)
82     {
83         if (IsEOF()) {
84             return !MatchFailed();
85         }
86         uint32_t currentChar = GetCurrentChar();
87         if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
88             return !MatchFailed();
89         }
90         Advance(opCode);
91         return true;
92     }
93 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)94     inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
95     {
96         uint32_t expectedChar = 0;
97         if (opCode == RegExpOpCode::OP_CHAR32) {
98             expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
99         } else {
100             expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
101         }
102         if (IsEOF()) {
103             return !MatchFailed();
104         }
105         uint32_t currentChar = GetCurrentChar();
106         if (IsIgnoreCase()) {
107             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
108         }
109         if (currentChar == expectedChar) {
110             Advance(opCode);
111         } else {
112             if (MatchFailed()) {
113                 return false;
114             }
115         }
116         return true;
117     }
118 
HandleOpWordBoundary(uint8_t opCode)119     inline bool HandleOpWordBoundary(uint8_t opCode)
120     {
121         bool preIsWord = false;
122         if (GetCurrentPtr() != input_) {
123             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
124             preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
125         }
126         bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_));
127         if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
128             ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
129             ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
130             ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
131             Advance(opCode);
132         } else {
133             if (MatchFailed()) {
134                 return false;
135             }
136         }
137         return true;
138     }
139 
HandleOpLineStart(uint8_t opCode)140     inline bool HandleOpLineStart(uint8_t opCode)
141     {
142         if ((GetCurrentPtr() == input_) ||
143             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
144             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
145             Advance(opCode);
146         } else {
147             if (MatchFailed()) {
148                 return false;
149             }
150         }
151         return true;
152     }
153 
HandleOpLineEnd(uint8_t opCode)154     inline bool HandleOpLineEnd(uint8_t opCode)
155     {
156         if (IsEOF() ||
157             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
158             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
159             Advance(opCode);
160         } else {
161             if (MatchFailed()) {
162                 return false;
163             }
164         }
165         return true;
166     }
167 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)168     inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
169     {
170         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
171         ASSERT(captureIndex < nCapture_);
172         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
173         CaptureState *captureState = &captureResultList_[captureIndex];
174         captureState->captureStart = GetCurrentPtr();
175         Advance(opCode);
176     }
177 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)178     inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
179     {
180         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
181         ASSERT(captureIndex < nCapture_);
182         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
183         CaptureState *captureState = &captureResultList_[captureIndex];
184         captureState->captureEnd = GetCurrentPtr();
185         Advance(opCode);
186     }
187 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)188     inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
189     {
190         uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
191         uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
192         for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
193             CaptureState *captureState =
194                 &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
195             captureState->captureStart = nullptr;
196             captureState->captureEnd = nullptr;
197         }
198         Advance(opCode);
199     }
200 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)201     inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
202     {
203         auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
204         ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
205         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
206         Advance(opCode);
207         uint32_t splitPc = GetCurrentPC() + offset;
208         PushRegExpState(type, splitPc);
209     }
210 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)211     inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
212     {
213         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
214         Advance(opCode);
215         PushRegExpState(STATE_SPLIT, GetCurrentPC());
216         AdvanceOffset(offset);
217     }
218 
HandleOpPrev(uint8_t opCode)219     inline bool HandleOpPrev(uint8_t opCode)
220     {
221         if (GetCurrentPtr() == input_) {
222             if (MatchFailed()) {
223                 return false;
224             }
225         } else {
226             PrevPtr(&currentPtr_, input_);
227             Advance(opCode);
228         }
229         return true;
230     }
231 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)232     inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
233     {
234         uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
235         uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
236         uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
237         Advance(opCode);
238         uint32_t loopPcEnd = GetCurrentPC();
239         uint32_t loopPcStart = GetCurrentPC() + pcOffset;
240         bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
241         uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
242 
243         uint32_t loopCount = PeekStack();
244         SetStackValue(++loopCount);
245         if (loopCount < loopMax) {
246             // greedy failed, goto next
247             if (loopCount >= quantifyMin) {
248                 PushRegExpState(STATE_SPLIT, loopPcEnd);
249             }
250             // Goto loop start
251             SetCurrentPC(loopPcStart);
252         } else {
253             if (!isGreedy && (loopCount < quantifyMax)) {
254                 PushRegExpState(STATE_SPLIT, loopPcStart);
255             }
256         }
257     }
258 
HandleOpRange32(const DynChunk & byteCode)259     inline bool HandleOpRange32(const DynChunk &byteCode)
260     {
261         if (IsEOF()) {
262             return !MatchFailed();
263         }
264         uint32_t currentChar = GetCurrentChar();
265         if (IsIgnoreCase()) {
266             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
267         }
268         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
269         bool isFound = false;
270         int32_t idxMin = 0;
271         int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
272         int32_t idx = 0;
273         uint32_t low = 0;
274         uint32_t high =
275             byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
276                             RANGE32_MAX_HALF_OFFSET);
277         if (currentChar <= high) {
278             while (idxMin <= idxMax) {
279                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
280                 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
281                     RANGE32_MAX_OFFSET);
282                 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
283                     RANGE32_MAX_OFFSET +
284                     RANGE32_MAX_HALF_OFFSET);
285                 if (currentChar < low) {
286                     idxMax = idx - 1;
287                 } else if (currentChar > high) {
288                     idxMin = idx + 1;
289                 } else {
290                     isFound = true;
291                     break;
292                 }
293             }
294         }
295         if (isFound) {
296             AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
297         } else {
298             if (MatchFailed()) {
299                 return false;
300             }
301         }
302         return true;
303     }
304 
HandleOpRange(const DynChunk & byteCode)305     inline bool HandleOpRange(const DynChunk &byteCode)
306     {
307         if (IsEOF()) {
308             return !MatchFailed();
309         }
310         uint32_t currentChar = GetCurrentChar();
311         uint32_t currentCharNext = currentChar;
312         if (IsIgnoreCase()) {
313             currentCharNext = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar));
314         }
315         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
316         if (IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount) ||
317             IsFoundOpRange(GetCurrentPC(), currentCharNext, byteCode, rangeCount)) {
318             AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
319         } else {
320             if (MatchFailed()) {
321                 return false;
322             }
323         }
324         return true;
325     }
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)326     inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
327     {
328         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
329         if (captureIndex >= nCapture_) {
330             return !MatchFailed();
331         }
332         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
333         const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
334         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
335         const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
336         if (captureStart == nullptr || captureEnd == nullptr) {
337             Advance(opCode);
338             return true;
339         }
340         bool isMatched = true;
341         if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
342             const uint8_t *refCptr = captureStart;
343             while (refCptr < captureEnd) {
344                 if (IsEOF()) {
345                     isMatched = false;
346                     break;
347                 }
348                 // NOLINTNEXTLINE(readability-identifier-naming)
349                 uint32_t c1 = GetChar(&refCptr, captureEnd);
350                 // NOLINTNEXTLINE(readability-identifier-naming)
351                 uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
352                 if (IsIgnoreCase()) {
353                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
354                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
355                 }
356                 if (c1 != c2) {
357                     isMatched = false;
358                     break;
359                 }
360             }
361             if (!isMatched) {
362                 if (MatchFailed()) {
363                     return false;
364                 }
365             } else {
366                 Advance(opCode);
367             }
368         } else {
369             const uint8_t *refCptr = captureEnd;
370             while (refCptr > captureStart) {
371                 if (GetCurrentPtr() == input_) {
372                     isMatched = false;
373                     break;
374                 }
375                 // NOLINTNEXTLINE(readability-identifier-naming)
376                 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
377                 // NOLINTNEXTLINE(readability-identifier-naming)
378                 uint32_t c2 = GetPrevChar(&currentPtr_, input_);
379                 if (IsIgnoreCase()) {
380                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
381                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
382                 }
383                 if (c1 != c2) {
384                     isMatched = false;
385                     break;
386                 }
387             }
388             if (!isMatched) {
389                 if (MatchFailed()) {
390                     return false;
391                 }
392             } else {
393                 Advance(opCode);
394             }
395         }
396         return true;
397     }
398 
399     inline void Advance(uint8_t opCode, uint32_t offset = 0)
400     {
401         currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
402     }
403 
AdvanceOffset(uint32_t offset)404     inline void AdvanceOffset(uint32_t offset)
405     {
406         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
407         currentPc_ += offset;
408     }
409 
GetCurrentChar()410     inline uint32_t GetCurrentChar()
411     {
412         return GetChar(&currentPtr_, inputEnd_);
413     }
414 
AdvanceCurrentPtr()415     inline void AdvanceCurrentPtr()
416     {
417         AdvancePtr(&currentPtr_, inputEnd_);
418     }
419 
GetChar(const uint8_t ** pp,const uint8_t * end)420     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
421     {
422         uint32_t c = 0;
423         const uint8_t *cptr = *pp;
424         if (!isWideChar_) {
425             c = *cptr;
426             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
427         } else {
428             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
429             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
430             c = c1;
431             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
432             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
433                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
434                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
435                 if (U16_IS_TRAIL(c1)) {
436                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
437                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
438                 }
439             }
440             *pp = cptr;
441         }
442         return c;
443     }
444 
PeekChar(const uint8_t * p,const uint8_t * end)445     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
446     {
447         uint32_t c = 0;
448         const uint8_t *cptr = p;
449         if (!isWideChar_) {
450             c = *cptr;
451         } else {
452             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
453             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
454             c = c1;
455             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
456             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
457                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
458                 if (U16_IS_TRAIL(c1)) {
459                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
460                 }
461             }
462         }
463         return c;
464     }
465 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)466     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
467     {
468         const uint8_t *cptr = *pp;
469         if (!isWideChar_) {
470             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
471         } else {
472             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
473             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
474             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
475             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
476                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
477                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
478                 if (U16_IS_TRAIL(c1)) {
479                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
480                 }
481             }
482             *pp = cptr;
483         }
484     }
485 
PeekPrevChar(const uint8_t * p,const uint8_t * start)486     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
487     {
488         uint32_t c = 0;
489         const uint8_t *cptr = p;
490         if (!isWideChar_) {
491             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
492         } else {
493             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
494             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
495             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
496             c = c1;
497             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
498                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
499                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
500                 if (U16_IS_LEAD(c1)) {
501                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
502                 }
503             }
504         }
505         return c;
506     }
507 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)508     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
509     {
510         uint32_t c = 0;
511         const uint8_t *cptr = *pp;
512         if (!isWideChar_) {
513             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
514             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
515             *pp = cptr;
516         } else {
517             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
518             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
519             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
520             c = c1;
521             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
522                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
523                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
524                 if (U16_IS_LEAD(c1)) {
525                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
526                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
527                 }
528             }
529             *pp = cptr;
530         }
531         return c;
532     }
533 
PrevPtr(const uint8_t ** pp,const uint8_t * start)534     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
535     {
536         const uint8_t *cptr = *pp;
537         if (!isWideChar_) {
538             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
539             *pp = cptr;
540         } else {
541             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
542             uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
543             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
544                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
545                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
546                 if (U16_IS_LEAD(c1)) {
547                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
548                 }
549             }
550             *pp = cptr;
551         }
552     }
553 
554     bool MatchFailed(bool isMatched = false);
555 
SetCurrentPC(uint32_t pc)556     void SetCurrentPC(uint32_t pc)
557     {
558         currentPc_ = pc;
559     }
560 
SetCurrentPtr(const uint8_t * ptr)561     void SetCurrentPtr(const uint8_t *ptr)
562     {
563         currentPtr_ = ptr;
564     }
565 
IsEOF()566     bool IsEOF() const
567     {
568         return currentPtr_ >= inputEnd_;
569     }
570 
IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)571     bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar,
572                         const DynChunk &byteCode, const uint16_t rangeCount)
573     {
574         bool isFound = false;
575         int32_t idxMin = 0;
576         int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
577         int32_t idx = 0;
578         uint32_t low = 0;
579         uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET +
580             static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
581         if (nowChar <= high) {
582             while (idxMin <= idxMax) {
583                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
584                 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
585                     RANGE32_MAX_HALF_OFFSET);
586                 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
587                     RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
588                 if (nowChar < low) {
589                     idxMax = idx - 1;
590                 } else if (nowChar > high) {
591                     idxMin = idx + 1;
592                 } else {
593                     isFound = true;
594                     break;
595                 }
596             }
597         }
598         return isFound;
599     }
600 
GetCurrentPC()601     uint32_t GetCurrentPC() const
602     {
603         return currentPc_;
604     }
605 
PushStack(uintptr_t val)606     void PushStack(uintptr_t val)
607     {
608         ASSERT(currentStack_ < nStack_);
609         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
610         stack_[currentStack_++] = val;
611     }
612 
SetStackValue(uintptr_t val)613     void SetStackValue(uintptr_t val) const
614     {
615         ASSERT(currentStack_ >= 1);
616         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
617         stack_[currentStack_ - 1] = val;
618     }
619 
PopStack()620     uintptr_t PopStack()
621     {
622         ASSERT(currentStack_ >= 1);
623         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
624         return stack_[--currentStack_];
625     }
626 
PeekStack()627     uintptr_t PeekStack() const
628     {
629         ASSERT(currentStack_ >= 1);
630         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
631         return stack_[currentStack_ - 1];
632     }
633 
GetCurrentPtr()634     const uint8_t *GetCurrentPtr() const
635     {
636         return currentPtr_;
637     }
638 
GetCaptureResultList()639     CaptureState *GetCaptureResultList() const
640     {
641         return captureResultList_;
642     }
643 
644     void DumpResult(std::ostream &out) const;
645 
646     MatchResult GetResult(const JSThread *thread, bool isSuccess) const;
647 
648     void PushRegExpState(StateType type, uint32_t pc);
649 
650     RegExpState *PopRegExpState(bool copyCaptrue = true);
651 
DropRegExpState()652     void DropRegExpState()
653     {
654         stateStackLen_--;
655     }
656 
PeekRegExpState()657     RegExpState *PeekRegExpState() const
658     {
659         ASSERT(stateStackLen_ >= 1);
660         return reinterpret_cast<RegExpState *>(
661             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
662             (stateStackLen_ - 1) * stateSize_);
663     }
664 
665     void ReAllocStack(uint32_t stackLen);
666 
IsWordChar(uint8_t value)667     inline bool IsWordChar(uint8_t value) const
668     {
669         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
670                 (value == '_'));
671     }
672 
IsTerminator(uint32_t value)673     inline bool IsTerminator(uint32_t value) const
674     {
675         // NOLINTNEXTLINE(readability-magic-numbers)
676         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
677     }
678 
IsIgnoreCase()679     inline bool IsIgnoreCase() const
680     {
681         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
682     }
683 
IsUtf16()684     inline bool IsUtf16() const
685     {
686         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
687     }
688 
689 private:
690     static constexpr size_t CHAR_SIZE = 1;
691     static constexpr size_t WIDE_CHAR_SIZE = 2;
692     static constexpr size_t SAVE_RESET_START = 1;
693     static constexpr size_t SAVE_RESET_END = 2;
694     static constexpr size_t LOOP_MIN_OFFSET = 5;
695     static constexpr size_t LOOP_MAX_OFFSET = 9;
696     static constexpr size_t LOOP_PC_OFFSET = 1;
697     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
698     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
699     static constexpr size_t RANGE32_MAX_OFFSET = 8;
700     static constexpr size_t RANGE32_OFFSET = 2;
701     static constexpr uint32_t STACK_MULTIPLIER = 2;
702     static constexpr uint32_t MIN_STACK_SIZE = 8;
703     static constexpr int TMP_BUF_SIZE = 128;
704     uint8_t *input_ = nullptr;
705     uint8_t *inputEnd_ = nullptr;
706     bool isWideChar_ = false;
707 
708     uint32_t currentPc_ = 0;
709     const uint8_t *currentPtr_ = nullptr;
710     CaptureState *captureResultList_ = nullptr;
711     uintptr_t *stack_ = nullptr;
712     uint32_t currentStack_ = 0;
713 
714     uint32_t nCapture_ = 0;
715     uint32_t nStack_ = 0;
716 
717     uint32_t flags_ = 0;
718     uint32_t stateStackLen_ = 0;
719     uint32_t stateStackSize_ = 0;
720     uint32_t stateSize_ = 0;
721     uint8_t *stateStack_ = nullptr;
722     Chunk *chunk_ = nullptr;
723 };
724 }  // namespace panda::ecmascript
725 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
726