• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/builtins/builtins_regexp.h"
20 #include "ecmascript/regexp/regexp_parser.h"
21 #include "ecmascript/mem/regexp_cached_chunk.h"
22 #include "ecmascript/js_handle.h"
23 
24 namespace panda::ecmascript {
25 class RegExpExecutor {
26 public:
27     struct CaptureState {
28         const uint8_t *captureStart;
29         const uint8_t *captureEnd;
30     };
31 
32     enum StateType : uint8_t {
33         STATE_SPLIT = 0, /* Do not re-order. */
34         STATE_NEGATIVE_MATCH_AHEAD, /* OP_NEGATIVE_MATCH_AHEAD  - OP_SPLIT_NEXT */
35         STATE_MATCH_AHEAD, /* OP_MATCH_AHEAD  - OP_SPLIT_NEXT */
36         STATE_SAVE,
37         STATE_PUSH,
38         STATE_POP,
39         STATE_SET,
40         STATE_INVALID,
41     };
42 
43     struct RegExpState {
44         StateType type_ = STATE_SPLIT;
45         uint32_t currentPc_ = 0;
46         const uint8_t *currentPtr_ = nullptr;
47     };
48 
RegExpExecutor(RegExpCachedChunk * chunk)49     explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk)
50     {
51         ASSERT(chunk_ != nullptr);
52     };
53 
54     ~RegExpExecutor() = default;
55 
56     NO_COPY_SEMANTIC(RegExpExecutor);
57     NO_MOVE_SEMANTIC(RegExpExecutor);
58 
59     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
60 
61     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
HandleFirstSplit()62     inline bool HandleFirstSplit()
63     {
64         if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
65             (flags_ & RegExpParser::FLAG_STICKY) == 0) {
66             if (IsEOF()) {
67                 if (MatchFailed()) {
68                     return false;
69                 }
70             } else if (prefilter_ && !isWideChar_) {
71                 ++currentPtr_;
72                 currentPtr_ = (const uint8_t *)memchr(currentPtr_, prefilter_, inputEnd_ - currentPtr_);
73                 if (currentPtr_ == nullptr) {
74                     currentPtr_ = inputEnd_;
75                 }
76                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
77             } else {
78                 AdvanceCurrentPtr();
79                 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
80             }
81         }
82         return true;
83     }
84 
HandleOpAll(uint8_t opCode)85     inline bool HandleOpAll(uint8_t opCode)
86     {
87         if (IsEOF()) {
88             return !MatchFailed();
89         }
90         uint32_t currentChar = GetCurrentChar();
91         if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
92             return !MatchFailed();
93         }
94         Advance(opCode);
95         return true;
96     }
97 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)98     inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
99     {
100         uint32_t expectedChar = 0;
101         if (opCode == RegExpOpCode::OP_CHAR32) {
102             expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
103         } else {
104             expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
105         }
106         if (IsEOF()) {
107             return !MatchFailed();
108         }
109         uint32_t currentChar = GetCurrentChar();
110         if (IsIgnoreCase()) {
111             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
112         }
113         if (currentChar == expectedChar) {
114             Advance(opCode);
115         } else {
116             if (MatchFailed()) {
117                 return false;
118             }
119         }
120         return true;
121     }
122 
HandleOpWordBoundary(uint8_t opCode)123     inline bool HandleOpWordBoundary(uint8_t opCode)
124     {
125         bool preIsWord = false;
126         if (GetCurrentPtr() != input_) {
127             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
128             preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
129         }
130         bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_));
131         if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
132             ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
133             ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
134             ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
135             Advance(opCode);
136         } else {
137             if (MatchFailed()) {
138                 return false;
139             }
140         }
141         return true;
142     }
143 
HandleOpLineStart(uint8_t opCode)144     inline bool HandleOpLineStart(uint8_t opCode)
145     {
146         if ((GetCurrentPtr() == input_) ||
147             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
148             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
149             Advance(opCode);
150         } else {
151             if (MatchFailed()) {
152                 return false;
153             }
154         }
155         return true;
156     }
157 
HandleOpLineEnd(uint8_t opCode)158     inline bool HandleOpLineEnd(uint8_t opCode)
159     {
160         if (IsEOF() ||
161             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
162             ((flags_ & RegExpParser::FLAG_MULTILINE) != 0
163              && (PeekChar(currentPtr_, inputEnd_) == '\n' || PeekChar(currentPtr_, inputEnd_) == '\r'))) {
164             Advance(opCode);
165         } else {
166             if (MatchFailed()) {
167                 return false;
168             }
169         }
170         return true;
171     }
172 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)173     inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
174     {
175         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
176         ASSERT(captureIndex < nCapture_);
177         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178         CaptureState *captureState = &captureResultList_[captureIndex];
179         // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
180         PushRegExpState(STATE_SAVE, captureIndex * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
181         captureState->captureStart = GetCurrentPtr();
182         Advance(opCode);
183     }
184 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)185     inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
186     {
187         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
188         ASSERT(captureIndex < nCapture_);
189         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
190         CaptureState *captureState = &captureResultList_[captureIndex];
191         // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
192         PushRegExpState(STATE_SAVE, captureIndex * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
193         captureState->captureEnd = GetCurrentPtr();
194         Advance(opCode);
195     }
196 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)197     inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
198     {
199         uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
200         uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
201         for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
202             CaptureState *captureState =
203                 &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204             // 2: Even indexes store captureStart.
205             // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
206             PushRegExpState(STATE_SAVE, i * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
207             // 2: Even indexes store captureStart.
208             // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
209             PushRegExpState(STATE_SAVE, i * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
210             captureState->captureStart = nullptr;
211             captureState->captureEnd = nullptr;
212         }
213         Advance(opCode);
214     }
215 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)216     inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
217     {
218         auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
219         ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
220         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
221         Advance(opCode);
222         uint32_t splitPc = GetCurrentPC() + offset;
223         PushRegExpState(type, splitPc);
224     }
225 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)226     inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
227     {
228         uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
229         Advance(opCode);
230         PushRegExpState(STATE_SPLIT, GetCurrentPC());
231         AdvanceOffset(offset);
232     }
233 
HandleOpPrev(uint8_t opCode)234     inline bool HandleOpPrev(uint8_t opCode)
235     {
236         if (GetCurrentPtr() == input_) {
237             if (MatchFailed()) {
238                 return false;
239             }
240         } else {
241             PrevPtr(&currentPtr_, input_);
242             Advance(opCode);
243         }
244         return true;
245     }
246 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)247     inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
248     {
249         uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
250         uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
251         uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
252         Advance(opCode);
253         uint32_t loopPcEnd = GetCurrentPC();
254         uint32_t loopPcStart = GetCurrentPC() + pcOffset;
255         bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
256         uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
257 
258         uint32_t loopCount = PeekStack();
259         PushRegExpState(StateType::STATE_SET, 0, loopCount);
260         SetStackValue(++loopCount);
261         if (loopCount < loopMax) {
262             // greedy failed, goto next
263             if (loopCount >= quantifyMin) {
264                 PushRegExpState(STATE_SPLIT, loopPcEnd);
265             }
266             // Goto loop start
267             SetCurrentPC(loopPcStart);
268         } else {
269             if (!isGreedy && (loopCount < quantifyMax)) {
270                 PushRegExpState(STATE_SPLIT, loopPcStart);
271             }
272         }
273     }
274 
HandleOpRange32(const DynChunk & byteCode)275     inline bool HandleOpRange32(const DynChunk &byteCode)
276     {
277         if (IsEOF()) {
278             return !MatchFailed();
279         }
280         uint32_t currentChar = GetCurrentChar();
281         if (IsIgnoreCase()) {
282             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
283         }
284         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
285         bool isFound = false;
286         int32_t idxMin = 0;
287         int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
288         int32_t idx = 0;
289         uint32_t low = 0;
290         uint32_t high =
291             byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
292                             RANGE32_MAX_HALF_OFFSET);
293         if (currentChar <= high) {
294             while (idxMin <= idxMax) {
295                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
296                 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
297                     RANGE32_MAX_OFFSET);
298                 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
299                     RANGE32_MAX_OFFSET +
300                     RANGE32_MAX_HALF_OFFSET);
301                 if (currentChar < low) {
302                     idxMax = idx - 1;
303                 } else if (currentChar > high) {
304                     idxMin = idx + 1;
305                 } else {
306                     isFound = true;
307                     break;
308                 }
309             }
310         }
311         if (isFound) {
312             AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
313         } else {
314             if (MatchFailed()) {
315                 return false;
316             }
317         }
318         return true;
319     }
320 
HandleOpRange(const DynChunk & byteCode)321     inline bool HandleOpRange(const DynChunk &byteCode)
322     {
323         if (IsEOF()) {
324             return !MatchFailed();
325         }
326         uint32_t currentChar = GetCurrentChar();
327         uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
328         bool flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
329         if (IsIgnoreCase() && !flag) {
330             currentChar = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar));
331             flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
332         }
333         if (flag) {
334             AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
335         } else {
336             if (MatchFailed()) {
337                 return false;
338             }
339         }
340         return true;
341     }
342 
HandleOpSparse(const DynChunk & byteCode)343     inline bool HandleOpSparse(const DynChunk &byteCode)
344     {
345         if (IsEOF()) {
346             return !MatchFailed();
347         }
348         uint32_t currentChar = GetCurrentChar();
349         if (IsIgnoreCase()) {
350             currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
351         }
352         uint16_t sparseCount = byteCode.GetU16(GetCurrentPC() + 1);
353         for (uint32_t i = 0; i < sparseCount; i++) {
354             uint32_t sparseChar = byteCode.GetU16(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET);
355             if (currentChar == sparseChar) {
356                 uint32_t offset = byteCode.GetU32(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET +
357                     SPARSE_OFF_OFFSET);
358                 AdvanceOffset(offset + sparseCount * SPARSE_MAX_OFFSET + SPARSE_HEAD_OFFSET);
359                 return true;
360             }
361         }
362         return !MatchFailed();
363     }
364 
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)365     inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
366     {
367         uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
368         if (captureIndex >= nCapture_) {
369             return !MatchFailed();
370         }
371         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
372         const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
373         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
374         const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
375         if (captureStart == nullptr || captureEnd == nullptr) {
376             Advance(opCode);
377             return true;
378         }
379         bool isMatched = true;
380         if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
381             const uint8_t *refCptr = captureStart;
382             while (refCptr < captureEnd) {
383                 if (IsEOF()) {
384                     isMatched = false;
385                     break;
386                 }
387                 // NOLINTNEXTLINE(readability-identifier-naming)
388                 uint32_t c1 = GetChar(&refCptr, captureEnd);
389                 // NOLINTNEXTLINE(readability-identifier-naming)
390                 uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
391                 if (IsIgnoreCase()) {
392                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
393                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
394                 }
395                 if (c1 != c2) {
396                     isMatched = false;
397                     break;
398                 }
399             }
400             if (!isMatched) {
401                 if (MatchFailed()) {
402                     return false;
403                 }
404             } else {
405                 Advance(opCode);
406             }
407         } else {
408             const uint8_t *refCptr = captureEnd;
409             while (refCptr > captureStart) {
410                 if (GetCurrentPtr() == input_) {
411                     isMatched = false;
412                     break;
413                 }
414                 // NOLINTNEXTLINE(readability-identifier-naming)
415                 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
416                 // NOLINTNEXTLINE(readability-identifier-naming)
417                 uint32_t c2 = GetPrevChar(&currentPtr_, input_);
418                 if (IsIgnoreCase()) {
419                     c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
420                     c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
421                 }
422                 if (c1 != c2) {
423                     isMatched = false;
424                     break;
425                 }
426             }
427             if (!isMatched) {
428                 if (MatchFailed()) {
429                     return false;
430                 }
431             } else {
432                 Advance(opCode);
433             }
434         }
435         return true;
436     }
437 
438     inline void Advance(uint8_t opCode, uint32_t offset = 0)
439     {
440         currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
441     }
442 
AdvanceOffset(uint32_t offset)443     inline void AdvanceOffset(uint32_t offset)
444     {
445         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
446         currentPc_ += offset;
447     }
448 
GetCurrentChar()449     inline uint32_t GetCurrentChar()
450     {
451         return GetChar(&currentPtr_, inputEnd_);
452     }
453 
AdvanceCurrentPtr()454     inline void AdvanceCurrentPtr()
455     {
456         AdvancePtr(&currentPtr_, inputEnd_);
457     }
458 
GetChar(const uint8_t ** pp,const uint8_t * end)459     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
460     {
461         uint32_t c = 0;
462         const uint8_t *cptr = *pp;
463         if (!isWideChar_) {
464             c = *cptr;
465             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
466         } else {
467             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
468             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
469             c = c1;
470             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
471             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
472                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
473                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
474                 if (U16_IS_TRAIL(c1)) {
475                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
476                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
477                 }
478             }
479             *pp = cptr;
480         }
481         return c;
482     }
483 
PeekChar(const uint8_t * p,const uint8_t * end)484     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
485     {
486         uint32_t c = 0;
487         const uint8_t *cptr = p;
488         if (!isWideChar_) {
489             c = *cptr;
490         } else {
491             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
492             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
493             c = c1;
494             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
495             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
496                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
497                 if (U16_IS_TRAIL(c1)) {
498                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
499                 }
500             }
501         }
502         return c;
503     }
504 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)505     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
506     {
507         const uint8_t *cptr = *pp;
508         if (!isWideChar_) {
509             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
510         } else {
511             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
512             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
513             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
514             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
515                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
516                 c1 = *(reinterpret_cast<const uint16_t *>(cptr));
517                 if (U16_IS_TRAIL(c1)) {
518                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
519                 }
520             }
521             *pp = cptr;
522         }
523     }
524 
PeekPrevChar(const uint8_t * p,const uint8_t * start)525     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
526     {
527         uint32_t c = 0;
528         const uint8_t *cptr = p;
529         if (!isWideChar_) {
530             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
531         } else {
532             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
533             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
534             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
535             c = c1;
536             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
537                  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
538                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
539                 if (U16_IS_LEAD(c1)) {
540                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
541                 }
542             }
543         }
544         return c;
545     }
546 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)547     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
548     {
549         uint32_t c = 0;
550         const uint8_t *cptr = *pp;
551         if (!isWideChar_) {
552             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
553             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
554             *pp = cptr;
555         } else {
556             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
557             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
558             uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
559             c = c1;
560             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
561                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
562                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
563                 if (U16_IS_LEAD(c1)) {
564                     c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
565                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
566                 }
567             }
568             *pp = cptr;
569         }
570         return c;
571     }
572 
PrevPtr(const uint8_t ** pp,const uint8_t * start)573     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
574     {
575         const uint8_t *cptr = *pp;
576         if (!isWideChar_) {
577             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
578             *pp = cptr;
579         } else {
580             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
581             uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
582             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
583                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
584                 c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
585                 if (U16_IS_LEAD(c1)) {
586                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
587                 }
588             }
589             *pp = cptr;
590         }
591     }
592 
593     bool MatchFailed(bool isMatched = false);
594 
SetCurrentPC(uint32_t pc)595     void SetCurrentPC(uint32_t pc)
596     {
597         currentPc_ = pc;
598     }
599 
SetCurrentPtr(const uint8_t * ptr)600     void SetCurrentPtr(const uint8_t *ptr)
601     {
602         currentPtr_ = ptr;
603     }
604 
IsEOF()605     bool IsEOF() const
606     {
607         return currentPtr_ >= inputEnd_;
608     }
609 
IsFoundOpRange(const uint32_t currentPc,const uint32_t nowChar,const DynChunk & byteCode,const uint16_t rangeCount)610     bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar,
611                         const DynChunk &byteCode, const uint16_t rangeCount)
612     {
613         bool isFound = false;
614         int32_t idxMin = 0;
615         int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
616         int32_t idx = 0;
617         uint32_t low = 0;
618         uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET +
619             static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
620         if (nowChar <= high) {
621             while (idxMin <= idxMax) {
622                 idx = (idxMin + idxMax) / RANGE32_OFFSET;
623                 low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
624                     RANGE32_MAX_HALF_OFFSET);
625                 high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
626                     RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
627                 if (nowChar < low) {
628                     idxMax = idx - 1;
629                 } else if (nowChar > high) {
630                     idxMin = idx + 1;
631                 } else {
632                     isFound = true;
633                     break;
634                 }
635             }
636         }
637         return isFound;
638     }
639 
GetCurrentPC()640     uint32_t GetCurrentPC() const
641     {
642         return currentPc_;
643     }
644 
PushStack(uintptr_t val)645     void PushStack(uintptr_t val)
646     {
647         ASSERT(currentStack_ < nStack_);
648         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
649         stack_[currentStack_++] = val;
650     }
651 
SetStackValue(uintptr_t val)652     void SetStackValue(uintptr_t val) const
653     {
654         ASSERT(currentStack_ >= 1);
655         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
656         stack_[currentStack_ - 1] = val;
657     }
658 
PopStack()659     uintptr_t PopStack()
660     {
661         ASSERT(currentStack_ >= 1);
662         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
663         return stack_[--currentStack_];
664     }
665 
PeekStack()666     uintptr_t PeekStack() const
667     {
668         ASSERT(currentStack_ >= 1);
669         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
670         return stack_[currentStack_ - 1];
671     }
672 
GetCurrentPtr()673     const uint8_t *GetCurrentPtr() const
674     {
675         return currentPtr_;
676     }
677 
GetCaptureResultList()678     CaptureState *GetCaptureResultList() const
679     {
680         return captureResultList_;
681     }
682 
683     void DumpResult(std::ostream &out) const;
684 
685     void GetResult(JSThread *thread);
686 
687     void PushRegExpState(StateType type, uint32_t pc);
688     void PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr);
689 
690     StateType PopRegExpState(bool copyCapture = true);
691 
DropRegExpState()692     void DropRegExpState()
693     {
694         stateStackLen_--;
695     }
696 
PeekRegExpState()697     RegExpState *PeekRegExpState() const
698     {
699         ASSERT(stateStackLen_ >= 1);
700         return reinterpret_cast<RegExpState *>(
701             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
702             (stateStackLen_ - 1) * sizeof(RegExpState));
703     }
704 
705     void ReAllocStack(uint32_t stackLen);
706 
IsWordChar(uint8_t value)707     inline bool IsWordChar(uint8_t value) const
708     {
709         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
710                 (value == '_'));
711     }
712 
IsTerminator(uint32_t value)713     inline bool IsTerminator(uint32_t value) const
714     {
715         // NOLINTNEXTLINE(readability-magic-numbers)
716         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
717     }
718 
IsIgnoreCase()719     inline bool IsIgnoreCase() const
720     {
721         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
722     }
723 
IsUtf16()724     inline bool IsUtf16() const
725     {
726         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
727     }
728 
729 private:
730     static constexpr size_t CHAR_SIZE = 1;
731     static constexpr size_t WIDE_CHAR_SIZE = 2;
732     static constexpr size_t SAVE_RESET_START = 1;
733     static constexpr size_t SAVE_RESET_END = 2;
734     static constexpr size_t LOOP_MIN_OFFSET = 5;
735     static constexpr size_t LOOP_MAX_OFFSET = 9;
736     static constexpr size_t LOOP_PC_OFFSET = 1;
737     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
738     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
739     static constexpr size_t RANGE32_MAX_OFFSET = 8;
740     static constexpr size_t RANGE32_OFFSET = 2;
741     static constexpr size_t SPARSE_HEAD_OFFSET = 3;
742     static constexpr size_t SPARSE_OFF_OFFSET = 2;
743     static constexpr size_t SPARSE_MAX_OFFSET = 6;
744     static constexpr uint32_t STACK_MULTIPLIER = 2;
745     static constexpr uint32_t MIN_STACK_SIZE = 8;
746     static constexpr int TMP_BUF_SIZE = 128;
747     uint8_t *input_ = nullptr;
748     uint8_t *inputEnd_ = nullptr;
749     bool isWideChar_ = false;
750     uint16_t prefilter_ = 0;
751 
752     uint32_t currentPc_ = 0;
753     const uint8_t *currentPtr_ = nullptr;
754     CaptureState *captureResultList_ = nullptr;
755     uintptr_t *stack_ = nullptr;
756     uint32_t currentStack_ = 0;
757 
758     uint32_t nCapture_ = 0;
759     uint32_t nStack_ = 0;
760 
761     uint32_t flags_ = 0;
762     uint32_t stateStackLen_ = 0;
763     uint32_t stateStackSize_ = 0;
764     uint8_t *stateStack_ = nullptr;
765     RegExpCachedChunk *chunk_ = nullptr;
766 };
767 }  // namespace panda::ecmascript
768 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
769