• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17 #define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18 
19 #include "ecmascript/regexp/regexp_parser.h"
20 #include "ecmascript/mem/chunk.h"
21 
22 namespace panda::ecmascript {
23 class RegExpExecutor {
24 public:
25     struct CaptureState {
26         const uint8_t *captureStart;
27         const uint8_t *captureEnd;
28     };
29 
30     enum StateType : uint8_t {
31         STATE_SPLIT = 0,
32         STATE_MATCH_AHEAD,
33         STATE_NEGATIVE_MATCH_AHEAD,
34     };
35 
36     struct RegExpState {
37         StateType type_ = STATE_SPLIT;
38         uint32_t currentPc_ = 0;
39         uint32_t currentStack_ = 0;
40         const uint8_t *currentPtr_ = nullptr;
41         __extension__ CaptureState *captureResultList_[0];  // NOLINT(modernize-avoid-c-arrays)
42     };
43 
44     struct MatchResult {
45         uint32_t endIndex_ = 0;
46         uint32_t index_ = 0;
47         // first value is true if result is undefined
48         std::vector<std::pair<bool, JSHandle<EcmaString>>> captures_;
49         bool isSuccess_ = false;
50     };
51 
RegExpExecutor(Chunk * chunk)52     explicit RegExpExecutor(Chunk *chunk) : chunk_(chunk)
53     {
54         ASSERT(chunk_ != nullptr);
55     };
56 
57     ~RegExpExecutor() = default;
58 
59     NO_COPY_SEMANTIC(RegExpExecutor);
60     NO_MOVE_SEMANTIC(RegExpExecutor);
61 
62     bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
63 
64     bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
65     bool HandleFirstSplit();
66     bool HandleOpAll(uint8_t opCode);
67     bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode);
68     bool HandleOpWordBoundary(uint8_t opCode);
69     bool HandleOpLineStart(uint8_t opCode);
70     bool HandleOpLineEnd(uint8_t opCode);
71     void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode);
72     void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode);
73     void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode);
74     void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode);
75     void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode);
76     bool HandleOpPrev(uint8_t opCode);
77     void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode);
78     bool HandleOpRange32(const DynChunk &byteCode);
79     bool HandleOpRange(const DynChunk &byteCode);
80     bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode);
81 
82     inline void Advance(uint8_t opCode, uint32_t offset = 0)
83     {
84         currentPc_ += offset + RegExpOpCode::GetRegExpOpCode(opCode)->GetSize();
85     }
86 
AdvanceOffset(uint32_t offset)87     inline void AdvanceOffset(uint32_t offset)
88     {
89         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
90         currentPc_ += offset;
91     }
92 
GetCurrentChar()93     inline uint32_t GetCurrentChar()
94     {
95         return GetChar(&currentPtr_, inputEnd_);
96     }
97 
AdvanceCurrentPtr()98     inline void AdvanceCurrentPtr()
99     {
100         AdvancePtr(&currentPtr_, inputEnd_);
101     }
102 
GetChar(const uint8_t ** pp,const uint8_t * end)103     uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
104     {
105         uint32_t c;
106         const uint8_t *cptr = *pp;
107         if (!isWideChar_) {
108             c = *cptr;
109             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
110         } else {
111             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
112             c = c1;
113             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
114             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
115                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
116                 if (U16_IS_TRAIL(c1)) {
117                     c = U16_GET_SUPPLEMENTARY(c, c1);  // NOLINTNEXTLINE(hicpp-signed-bitwise)
118                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
119                 }
120             }
121             *pp = cptr;
122         }
123         return c;
124     }
125 
PeekChar(const uint8_t * p,const uint8_t * end)126     uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
127     {
128         uint32_t c;
129         const uint8_t *cptr = p;
130         if (!isWideChar_) {
131             c = *cptr;
132         } else {
133             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
134             c = c1;
135             cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
136             if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
137                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
138                 if (U16_IS_TRAIL(c1)) {
139                     c = U16_GET_SUPPLEMENTARY(c, c1);  // NOLINTNEXTLINE(hicpp-signed-bitwise)
140                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
141                 }
142             }
143         }
144         return c;
145     }
146 
AdvancePtr(const uint8_t ** pp,const uint8_t * end)147     void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
148     {
149         const uint8_t *cptr = *pp;
150         if (!isWideChar_) {
151             *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
152         } else {
153             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
154             cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
155             if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
156                 c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
157                 if (U16_IS_TRAIL(c1)) {
158                     cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
159                 }
160             }
161             *pp = cptr;
162         }
163     }
164 
PeekPrevChar(const uint8_t * p,const uint8_t * start)165     uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
166     {
167         uint32_t c;
168         const uint8_t *cptr = p;
169         if (!isWideChar_) {
170             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
171             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
172         } else {
173             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
174             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
175             c = c1;
176             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
177                 c1 = ((uint16_t *)cptr)[-1];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178                 if (U16_IS_LEAD(c1)) {
179                     c = U16_GET_SUPPLEMENTARY(c1, c);  // NOLINTNEXTLINE(hicpp-signed-bitwise)
180                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
181                 }
182             }
183         }
184         return c;
185     }
186 
GetPrevChar(const uint8_t ** pp,const uint8_t * start)187     uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
188     {
189         uint32_t c;
190         const uint8_t *cptr = *pp;
191         if (!isWideChar_) {
192             c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
193             cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
194             *pp = cptr;
195         } else {
196             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
197             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
198             c = c1;
199             if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
200                 c1 = ((uint16_t *)cptr)[-1];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
201                 if (U16_IS_LEAD(c1)) {
202                     c = U16_GET_SUPPLEMENTARY(c1, c);  // NOLINTNEXTLINE(hicpp-signed-bitwise)
203                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204                 }
205             }
206             *pp = cptr;
207         }
208         return c;
209     }
210 
PrevPtr(const uint8_t ** pp,const uint8_t * start)211     void PrevPtr(const uint8_t **pp, const uint8_t *start) const
212     {
213         const uint8_t *cptr = *pp;
214         if (!isWideChar_) {
215             cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
216             *pp = cptr;
217         } else {
218             cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
219             uint16_t c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
220             if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
221                 c1 = ((uint16_t *)cptr)[-1];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
222                 if (U16_IS_LEAD(c1)) {
223                     cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
224                 }
225             }
226             *pp = cptr;
227         }
228     }
229 
230     bool MatchFailed(bool isMatched = false);
231 
SetCurrentPC(uint32_t pc)232     void SetCurrentPC(uint32_t pc)
233     {
234         currentPc_ = pc;
235     }
236 
SetCurrentPtr(const uint8_t * ptr)237     void SetCurrentPtr(const uint8_t *ptr)
238     {
239         currentPtr_ = ptr;
240     }
241 
IsEOF()242     bool IsEOF() const
243     {
244         return currentPtr_ >= inputEnd_;
245     }
246 
GetCurrentPC()247     uint32_t GetCurrentPC() const
248     {
249         return currentPc_;
250     }
251 
PushStack(uintptr_t val)252     void PushStack(uintptr_t val)
253     {
254         ASSERT(currentStack_ < nStack_);
255         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
256         stack_[currentStack_++] = val;
257     }
258 
SetStackValue(uintptr_t val)259     void SetStackValue(uintptr_t val) const
260     {
261         ASSERT(currentStack_ >= 1);
262         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
263         stack_[currentStack_ - 1] = val;
264     }
265 
PopStack()266     uintptr_t PopStack()
267     {
268         ASSERT(currentStack_ >= 1);
269         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
270         return stack_[--currentStack_];
271     }
272 
PeekStack()273     uintptr_t PeekStack() const
274     {
275         ASSERT(currentStack_ >= 1);
276         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
277         return stack_[currentStack_ - 1];
278     }
279 
GetCurrentPtr()280     const uint8_t *GetCurrentPtr() const
281     {
282         return currentPtr_;
283     }
284 
GetCaptureResultList()285     CaptureState *GetCaptureResultList() const
286     {
287         return captureResultList_;
288     }
289 
290     void DumpResult(std::ostream &out) const;
291 
292     MatchResult GetResult(const JSThread *thread, bool isSuccess) const;
293 
294     void PushRegExpState(StateType type, uint32_t pc);
295 
296     RegExpState *PopRegExpState(bool copyCaptrue = true);
297 
DropRegExpState()298     void DropRegExpState()
299     {
300         stateStackLen_--;
301     }
302 
PeekRegExpState()303     RegExpState *PeekRegExpState() const
304     {
305         ASSERT(stateStackLen_ >= 1);
306         return reinterpret_cast<RegExpState *>(
307             stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
308             (stateStackLen_ - 1) * stateSize_);
309     }
310 
311     void ReAllocStack(uint32_t stackLen);
312 
IsWordChar(uint8_t value)313     inline bool IsWordChar(uint8_t value) const
314     {
315         return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
316                 (value == '_'));
317     }
318 
IsTerminator(uint32_t value)319     inline bool IsTerminator(uint32_t value) const
320     {
321         // NOLINTNEXTLINE(readability-magic-numbers)
322         return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
323     }
324 
IsIgnoreCase()325     inline bool IsIgnoreCase() const
326     {
327         return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
328     }
329 
IsUtf16()330     inline bool IsUtf16() const
331     {
332         return (flags_ & RegExpParser::FLAG_UTF16) != 0;
333     }
334 
335 private:
336     static constexpr size_t CHAR_SIZE = 1;
337     static constexpr size_t WIDE_CHAR_SIZE = 2;
338     static constexpr size_t SAVE_RESET_START = 1;
339     static constexpr size_t SAVE_RESET_END = 2;
340     static constexpr size_t LOOP_MIN_OFFSET = 5;
341     static constexpr size_t LOOP_MAX_OFFSET = 9;
342     static constexpr size_t LOOP_PC_OFFSET = 1;
343     static constexpr size_t RANGE32_HEAD_OFFSET = 3;
344     static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
345     static constexpr size_t RANGE32_MAX_OFFSET = 8;
346     static constexpr size_t RANGE32_OFFSET = 2;
347     static constexpr uint32_t STACK_MULTIPLIER = 2;
348     static constexpr uint32_t MIN_STACK_SIZE = 8;
349     uint8_t *input_ = nullptr;
350     uint8_t *inputEnd_ = nullptr;
351     bool isWideChar_ = false;
352 
353     uint32_t currentPc_ = 0;
354     const uint8_t *currentPtr_ = nullptr;
355     CaptureState *captureResultList_ = nullptr;
356     uintptr_t *stack_ = nullptr;
357     uint32_t currentStack_ = 0;
358 
359     uint32_t nCapture_ = 0;
360     uint32_t nStack_ = 0;
361 
362     uint32_t flags_ = 0;
363     uint32_t stateStackLen_ = 0;
364     uint32_t stateStackSize_ = 0;
365     uint32_t stateSize_ = 0;
366     uint8_t *stateStack_ = nullptr;
367     Chunk *chunk_ = nullptr;
368 };
369 }  // namespace panda::ecmascript
370 #endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
371