• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/builtins/builtins_regexp.h"
20 #include "ecmascript/regexp/regexp_parser.h"
21 #include "ecmascript/mem/regexp_cached_chunk.h"
22 #include "ecmascript/js_handle.h"
23 
24 namespace panda::ecmascript {
25 class RegExpExecutor {
26 public:
27     struct CaptureState {
28         const uint8_t *captureStart;
29         const uint8_t *captureEnd;
30     };
31 
32     enum StateType : uint8_t {
33         STATE_SPLIT = 0,
34         STATE_MATCH_AHEAD,
35         STATE_NEGATIVE_MATCH_AHEAD,
36     };
37 
38     struct RegExpState {
39         StateType type_ = STATE_SPLIT;
40         uint32_t currentPc_ = 0;
41         uint32_t currentStack_ = 0;
42         const uint8_t *currentPtr_ = nullptr;
43         __extension__ CaptureState *captureResultList_[0];  // NOLINT(modernize-avoid-c-arrays)
44     };
45 
RegExpExecutor(RegExpCachedChunk * chunk)46     explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk)
47     {
48         ASSERT(chunk_ != nullptr);
49     };
50 
51     ~RegExpExecutor() = default;
52 
53     NO_COPY_SEMANTIC(RegExpExecutor);
54     NO_MOVE_SEMANTIC(RegExpExecutor);
55 
56     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
57 
58     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
HandleFirstSplit()59     inline bool HandleFirstSplit()
60     {
61         if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
62             (flags_ & RegExpParser::FLAG_STICKY) == 0) {
63             if (IsEOF()) {
64                 if (MatchFailed()) {
65                     return false;
66                 }
67             } else {
68                 AdvanceCurrentPtr();
69                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
70             }
71         }
72         return true;
73     }
74 
HandleOpAll(uint8_t opCode)75     inline bool HandleOpAll(uint8_t opCode)
76     {
77         if (IsEOF()) {
78             return !MatchFailed();
79         }
80         uint32_t currentChar = GetCurrentChar();
81         if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
82             return !MatchFailed();
83         }
84         Advance(opCode);
85         return true;
86     }
87 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)88     inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
89     {
90         uint32_t expectedChar = 0;
91         if (opCode == RegExpOpCode::OP_CHAR32) {
92             expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
93         } else {
94             expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
95         }
96         if (IsEOF()) {
97             return !MatchFailed();
98         }
99         uint32_t currentChar = GetCurrentChar();
100         if (IsIgnoreCase()) {
101             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
102         }
103         if (currentChar == expectedChar) {
104             Advance(opCode);
105         } else {
106             if (MatchFailed()) {
107                 return false;
108             }
109         }
110         return true;
111     }
112 
HandleOpWordBoundary(uint8_t opCode)113     inline bool HandleOpWordBoundary(uint8_t opCode)
114     {
115         bool preIsWord = false;
116         if (GetCurrentPtr() != input_) {
117             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
118             preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
119         }
120         bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_));
121         if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
122             ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
123             ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
124             ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
125             Advance(opCode);
126         } else {
127             if (MatchFailed()) {
128                 return false;
129             }
130         }
131         return true;
132     }
133 
HandleOpLineStart(uint8_t opCode)134     inline bool HandleOpLineStart(uint8_t opCode)
135     {
136         if ((GetCurrentPtr() == input_) ||
137             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
139             Advance(opCode);
140         } else {
141             if (MatchFailed()) {
142                 return false;
143             }
144         }
145         return true;
146     }
147 
HandleOpLineEnd(uint8_t opCode)148     inline bool HandleOpLineEnd(uint8_t opCode)
149     {
150         if (IsEOF() ||
151             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
152             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
153             Advance(opCode);
154         } else {
155             if (MatchFailed()) {
156                 return false;
157             }
158         }
159         return true;
160     }
161 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)162     inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
163     {
164         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
165         ASSERT(captureIndex < nCapture_);
166         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
167         CaptureState *captureState = &captureResultList_[captureIndex];
168         captureState->captureStart = GetCurrentPtr();
169         Advance(opCode);
170     }
171 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)172     inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
173     {
174         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
175         ASSERT(captureIndex < nCapture_);
176         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
177         CaptureState *captureState = &captureResultList_[captureIndex];
178         captureState->captureEnd = GetCurrentPtr();
179         Advance(opCode);
180     }
181 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)182     inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
183     {
184         uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
185         uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
186         for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
187             CaptureState *captureState =
188                 &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
189             captureState->captureStart = nullptr;
190             captureState->captureEnd = nullptr;
191         }
192         Advance(opCode);
193     }
194 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)195     inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
196     {
197         auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
198         ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
199         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
200         Advance(opCode);
201         uint32_t splitPc = GetCurrentPC() + offset;
202         PushRegExpState(type, splitPc);
203     }
204 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)205     inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
206     {
207         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
208         Advance(opCode);
209         PushRegExpState(STATE_SPLIT, GetCurrentPC());
210         AdvanceOffset(offset);
211     }
212 
HandleOpPrev(uint8_t opCode)213     inline bool HandleOpPrev(uint8_t opCode)
214     {
215         if (GetCurrentPtr() == input_) {
216             if (MatchFailed()) {
217                 return false;
218             }
219         } else {
220             PrevPtr(&currentPtr_, input_);
221             Advance(opCode);
222         }
223         return true;
224     }
225 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)226     inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
227     {
228         uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
229         uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
230         uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
231         Advance(opCode);
232         uint32_t loopPcEnd = GetCurrentPC();
233         uint32_t loopPcStart = GetCurrentPC() + pcOffset;
234         bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
235         uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
236 
237         uint32_t loopCount = PeekStack();
238         SetStackValue(++loopCount);
239         if (loopCount < loopMax) {
240             // greedy failed, goto next
241             if (loopCount >= quantifyMin) {
242                 PushRegExpState(STATE_SPLIT, loopPcEnd);
243             }
244             // Goto loop start
245             SetCurrentPC(loopPcStart);
246         } else {
247             if (!isGreedy && (loopCount < quantifyMax)) {
248                 PushRegExpState(STATE_SPLIT, loopPcStart);
249             }
250         }
251     }
252 
HandleOpRange32(const DynChunk & byteCode)253     inline bool HandleOpRange32(const DynChunk &byteCode)
254     {
255         if (IsEOF()) {
256             return !MatchFailed();
257         }
258         uint32_t currentChar = GetCurrentChar();
259         if (IsIgnoreCase()) {
260             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
261         }
262         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
263         bool isFound = false;
264         int32_t idxMin = 0;
265         int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
266         int32_t idx = 0;
267         uint32_t low = 0;
268         uint32_t high =
269             byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
270                             RANGE32_MAX_HALF_OFFSET);
271         if (currentChar <= high) {
272             while (idxMin <= idxMax) {
273                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
274                 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
275                     RANGE32_MAX_OFFSET);
276                 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
277                     RANGE32_MAX_OFFSET +
278                     RANGE32_MAX_HALF_OFFSET);
279                 if (currentChar < low) {
280                     idxMax = idx - 1;
281                 } else if (currentChar > high) {
282                     idxMin = idx + 1;
283                 } else {
284                     isFound = true;
285                     break;
286                 }
287             }
288         }
289         if (isFound) {
290             AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
291         } else {
292             if (MatchFailed()) {
293                 return false;
294             }
295         }
296         return true;
297     }
298 
HandleOpRange(const DynChunk & byteCode)299     inline bool HandleOpRange(const DynChunk &byteCode)
300     {
301         if (IsEOF()) {
302             return !MatchFailed();
303         }
304         uint32_t currentChar = GetCurrentChar();
305         uint32_t currentCharNext = currentChar;
306         if (IsIgnoreCase()) {
307             currentCharNext = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar));
308         }
309         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
310         if (IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount) ||
311             IsFoundOpRange(GetCurrentPC(), currentCharNext, byteCode, rangeCount)) {
312             AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
313         } else {
314             if (MatchFailed()) {
315                 return false;
316             }
317         }
318         return true;
319     }
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)320     inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
321     {
322         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
323         if (captureIndex >= nCapture_) {
324             return !MatchFailed();
325         }
326         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
327         const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
328         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
329         const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
330         if (captureStart == nullptr || captureEnd == nullptr) {
331             Advance(opCode);
332             return true;
333         }
334         bool isMatched = true;
335         if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
336             const uint8_t *refCptr = captureStart;
337             while (refCptr < captureEnd) {
338                 if (IsEOF()) {
339                     isMatched = false;
340                     break;
341                 }
342                 // NOLINTNEXTLINE(readability-identifier-naming)
343                 uint32_t c1 = GetChar(&refCptr, captureEnd);
344                 // NOLINTNEXTLINE(readability-identifier-naming)
345                 uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
346                 if (IsIgnoreCase()) {
347                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
348                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
349                 }
350                 if (c1 != c2) {
351                     isMatched = false;
352                     break;
353                 }
354             }
355             if (!isMatched) {
356                 if (MatchFailed()) {
357                     return false;
358                 }
359             } else {
360                 Advance(opCode);
361             }
362         } else {
363             const uint8_t *refCptr = captureEnd;
364             while (refCptr > captureStart) {
365                 if (GetCurrentPtr() == input_) {
366                     isMatched = false;
367                     break;
368                 }
369                 // NOLINTNEXTLINE(readability-identifier-naming)
370                 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
371                 // NOLINTNEXTLINE(readability-identifier-naming)
372                 uint32_t c2 = GetPrevChar(&currentPtr_, input_);
373                 if (IsIgnoreCase()) {
374                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
375                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
376                 }
377                 if (c1 != c2) {
378                     isMatched = false;
379                     break;
380                 }
381             }
382             if (!isMatched) {
383                 if (MatchFailed()) {
384                     return false;
385                 }
386             } else {
387                 Advance(opCode);
388             }
389         }
390         return true;
391     }
392 
393     inline void Advance(uint8_t opCode, uint32_t offset = 0)
394     {
395         currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
396     }
397 
AdvanceOffset(uint32_t offset)398     inline void AdvanceOffset(uint32_t offset)
399     {
400         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
401         currentPc_ += offset;
402     }
403 
GetCurrentChar()404     inline uint32_t GetCurrentChar()
405     {
406         return GetChar(&currentPtr_, inputEnd_);
407     }
408 
AdvanceCurrentPtr()409     inline void AdvanceCurrentPtr()
410     {
411         AdvancePtr(&currentPtr_, inputEnd_);
412     }
413 
GetChar(const uint8_t ** pp,const uint8_t * end)414     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
415     {
416         uint32_t c = 0;
417         const uint8_t *cptr = *pp;
418         if (!isWideChar_) {
419             c = *cptr;
420             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
421         } else {
422             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
423             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
424             c = c1;
425             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
426             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
427                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
428                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
429                 if (U16_IS_TRAIL(c1)) {
430                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
431                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
432                 }
433             }
434             *pp = cptr;
435         }
436         return c;
437     }
438 
PeekChar(const uint8_t * p,const uint8_t * end)439     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
440     {
441         uint32_t c = 0;
442         const uint8_t *cptr = p;
443         if (!isWideChar_) {
444             c = *cptr;
445         } else {
446             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
447             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
448             c = c1;
449             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
450             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
451                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
452                 if (U16_IS_TRAIL(c1)) {
453                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
454                 }
455             }
456         }
457         return c;
458     }
459 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)460     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
461     {
462         const uint8_t *cptr = *pp;
463         if (!isWideChar_) {
464             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
465         } else {
466             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
467             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
468             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
469             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
470                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
471                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
472                 if (U16_IS_TRAIL(c1)) {
473                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
474                 }
475             }
476             *pp = cptr;
477         }
478     }
479 
PeekPrevChar(const uint8_t * p,const uint8_t * start)480     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
481     {
482         uint32_t c = 0;
483         const uint8_t *cptr = p;
484         if (!isWideChar_) {
485             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
486         } else {
487             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
488             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
489             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
490             c = c1;
491             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
492                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
493                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
494                 if (U16_IS_LEAD(c1)) {
495                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
496                 }
497             }
498         }
499         return c;
500     }
501 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)502     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
503     {
504         uint32_t c = 0;
505         const uint8_t *cptr = *pp;
506         if (!isWideChar_) {
507             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
508             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
509             *pp = cptr;
510         } else {
511             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
512             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
513             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
514             c = c1;
515             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
516                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
517                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
518                 if (U16_IS_LEAD(c1)) {
519                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
520                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
521                 }
522             }
523             *pp = cptr;
524         }
525         return c;
526     }
527 
PrevPtr(const uint8_t ** pp,const uint8_t * start)528     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
529     {
530         const uint8_t *cptr = *pp;
531         if (!isWideChar_) {
532             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
533             *pp = cptr;
534         } else {
535             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
536             uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
537             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
538                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
539                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
540                 if (U16_IS_LEAD(c1)) {
541                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
542                 }
543             }
544             *pp = cptr;
545         }
546     }
547 
548     bool MatchFailed(bool isMatched = false);
549 
SetCurrentPC(uint32_t pc)550     void SetCurrentPC(uint32_t pc)
551     {
552         currentPc_ = pc;
553     }
554 
SetCurrentPtr(const uint8_t * ptr)555     void SetCurrentPtr(const uint8_t *ptr)
556     {
557         currentPtr_ = ptr;
558     }
559 
IsEOF()560     bool IsEOF() const
561     {
562         return currentPtr_ >= inputEnd_;
563     }
564 
IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)565     bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar,
566                         const DynChunk &byteCode, const uint16_t rangeCount)
567     {
568         bool isFound = false;
569         int32_t idxMin = 0;
570         int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
571         int32_t idx = 0;
572         uint32_t low = 0;
573         uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET +
574             static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
575         if (nowChar <= high) {
576             while (idxMin <= idxMax) {
577                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
578                 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
579                     RANGE32_MAX_HALF_OFFSET);
580                 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
581                     RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
582                 if (nowChar < low) {
583                     idxMax = idx - 1;
584                 } else if (nowChar > high) {
585                     idxMin = idx + 1;
586                 } else {
587                     isFound = true;
588                     break;
589                 }
590             }
591         }
592         return isFound;
593     }
594 
GetCurrentPC()595     uint32_t GetCurrentPC() const
596     {
597         return currentPc_;
598     }
599 
PushStack(uintptr_t val)600     void PushStack(uintptr_t val)
601     {
602         ASSERT(currentStack_ < nStack_);
603         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
604         stack_[currentStack_++] = val;
605     }
606 
SetStackValue(uintptr_t val)607     void SetStackValue(uintptr_t val) const
608     {
609         ASSERT(currentStack_ >= 1);
610         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
611         stack_[currentStack_ - 1] = val;
612     }
613 
PopStack()614     uintptr_t PopStack()
615     {
616         ASSERT(currentStack_ >= 1);
617         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
618         return stack_[--currentStack_];
619     }
620 
PeekStack()621     uintptr_t PeekStack() const
622     {
623         ASSERT(currentStack_ >= 1);
624         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
625         return stack_[currentStack_ - 1];
626     }
627 
GetCurrentPtr()628     const uint8_t *GetCurrentPtr() const
629     {
630         return currentPtr_;
631     }
632 
GetCaptureResultList()633     CaptureState *GetCaptureResultList() const
634     {
635         return captureResultList_;
636     }
637 
638     void DumpResult(std::ostream &out) const;
639 
640     void GetResult(JSThread *thread);
641 
642     void PushRegExpState(StateType type, uint32_t pc);
643 
644     RegExpState *PopRegExpState(bool copyCaptrue = true);
645 
DropRegExpState()646     void DropRegExpState()
647     {
648         stateStackLen_--;
649     }
650 
PeekRegExpState()651     RegExpState *PeekRegExpState() const
652     {
653         ASSERT(stateStackLen_ >= 1);
654         return reinterpret_cast<RegExpState *>(
655             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
656             (stateStackLen_ - 1) * stateSize_);
657     }
658 
659     void ReAllocStack(uint32_t stackLen);
660 
IsWordChar(uint8_t value)661     inline bool IsWordChar(uint8_t value) const
662     {
663         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
664                 (value == '_'));
665     }
666 
IsTerminator(uint32_t value)667     inline bool IsTerminator(uint32_t value) const
668     {
669         // NOLINTNEXTLINE(readability-magic-numbers)
670         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
671     }
672 
IsIgnoreCase()673     inline bool IsIgnoreCase() const
674     {
675         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
676     }
677 
IsUtf16()678     inline bool IsUtf16() const
679     {
680         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
681     }
682 
683 private:
684     static constexpr size_t CHAR_SIZE = 1;
685     static constexpr size_t WIDE_CHAR_SIZE = 2;
686     static constexpr size_t SAVE_RESET_START = 1;
687     static constexpr size_t SAVE_RESET_END = 2;
688     static constexpr size_t LOOP_MIN_OFFSET = 5;
689     static constexpr size_t LOOP_MAX_OFFSET = 9;
690     static constexpr size_t LOOP_PC_OFFSET = 1;
691     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
692     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
693     static constexpr size_t RANGE32_MAX_OFFSET = 8;
694     static constexpr size_t RANGE32_OFFSET = 2;
695     static constexpr uint32_t STACK_MULTIPLIER = 2;
696     static constexpr uint32_t MIN_STACK_SIZE = 8;
697     static constexpr int TMP_BUF_SIZE = 128;
698     uint8_t *input_ = nullptr;
699     uint8_t *inputEnd_ = nullptr;
700     bool isWideChar_ = false;
701 
702     uint32_t currentPc_ = 0;
703     const uint8_t *currentPtr_ = nullptr;
704     CaptureState *captureResultList_ = nullptr;
705     uintptr_t *stack_ = nullptr;
706     uint32_t currentStack_ = 0;
707 
708     uint32_t nCapture_ = 0;
709     uint32_t nStack_ = 0;
710 
711     uint32_t flags_ = 0;
712     uint32_t stateStackLen_ = 0;
713     uint32_t stateStackSize_ = 0;
714     uint32_t stateSize_ = 0;
715     uint8_t *stateStack_ = nullptr;
716     RegExpCachedChunk *chunk_ = nullptr;
717 };
718 }  // namespace panda::ecmascript
719 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
720