• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_executor.h"
17 
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/mem/c_string.h"
20 #include "ecmascript/mem/dyn_chunk.h"
21 #include "ecmascript/regexp/regexp_opcode.h"
22 #include "securec.h"
23 
24 namespace panda::ecmascript {
25 using RegExpState = RegExpExecutor::RegExpState;
26 using MatchResult = RegExpExecutor::MatchResult;
Execute(const uint8_t * input,uint32_t lastIndex,uint32_t length,uint8_t * buf,bool isWideChar)27 bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar)
28 {
29     DynChunk buffer(buf, chunk_);
30     input_ = const_cast<uint8_t *>(input);
31     inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
32     uint32_t size = buffer.GetU32(0);
33     nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET);
34     nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET);
35     flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET);
36     isWideChar_ = isWideChar;
37 
38     uint32_t captureResultSize = sizeof(CaptureState) * nCapture_;
39     uint32_t stackSize = sizeof(uintptr_t) * nStack_;
40     stateSize_ = sizeof(RegExpState) + captureResultSize + stackSize;
41     stateStackLen_ = 0;
42 
43     if (captureResultSize != 0) {
44         captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_);
45         if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) {
46             LOG_FULL(FATAL) << "memset_s failed";
47             UNREACHABLE();
48         }
49     }
50     if (stackSize != 0) {
51         stack_ = chunk_->NewArray<uintptr_t>(nStack_);
52         if (memset_s(stack_, stackSize, 0, stackSize) != EOK) {
53             LOG_FULL(FATAL) << "memset_s failed";
54             UNREACHABLE();
55         }
56     }
57     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
58     SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
59     SetCurrentPC(RegExpParser::OP_START_OFFSET);
60 
61     // first split
62     if ((flags_ & RegExpParser::FLAG_STICKY) == 0) {
63         PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
64     }
65     return ExecuteInternal(buffer, size);
66 }
67 
MatchFailed(bool isMatched)68 bool RegExpExecutor::MatchFailed(bool isMatched)
69 {
70     while (true) {
71         if (stateStackLen_ == 0) {
72             return true;
73         }
74         RegExpState *state = PeekRegExpState();
75         if (state->type_ == StateType::STATE_SPLIT) {
76             if (!isMatched) {
77                 PopRegExpState();
78                 return false;
79             }
80         } else {
81             isMatched = (state->type_ == StateType::STATE_MATCH_AHEAD && isMatched) ||
82                         (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD && !isMatched);
83             if (isMatched) {
84                 if (state->type_ == StateType::STATE_MATCH_AHEAD) {
85                     PopRegExpState(false);
86                     return false;
87                 }
88                 if (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD) {
89                     PopRegExpState();
90                     return false;
91                 }
92             }
93         }
94         DropRegExpState();
95     }
96 
97     return true;
98 }
99 
100 // NOLINTNEXTLINE(readability-function-size)
ExecuteInternal(const DynChunk & byteCode,uint32_t pcEnd)101 bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd)
102 {
103     while (GetCurrentPC() < pcEnd) {
104         // first split
105         if (!HandleFirstSplit()) {
106             return false;
107         }
108         uint8_t opCode = byteCode.GetU8(GetCurrentPC());
109         switch (opCode) {
110             case RegExpOpCode::OP_DOTS:
111             case RegExpOpCode::OP_ALL: {
112                 if (!HandleOpAll(opCode)) {
113                     return false;
114                 }
115                 break;
116             }
117             case RegExpOpCode::OP_CHAR32:
118             case RegExpOpCode::OP_CHAR: {
119                 if (!HandleOpChar(byteCode, opCode)) {
120                     return false;
121                 }
122                 break;
123             }
124             case RegExpOpCode::OP_NOT_WORD_BOUNDARY:
125             case RegExpOpCode::OP_WORD_BOUNDARY: {
126                 if (!HandleOpWordBoundary(opCode)) {
127                     return false;
128                 }
129                 break;
130             }
131             case RegExpOpCode::OP_LINE_START: {
132                 if (!HandleOpLineStart(opCode)) {
133                     return false;
134                 }
135                 break;
136             }
137             case RegExpOpCode::OP_LINE_END: {
138                 if (!HandleOpLineEnd(opCode)) {
139                     return false;
140                 }
141                 break;
142             }
143             case RegExpOpCode::OP_SAVE_START:
144                 HandleOpSaveStart(byteCode, opCode);
145                 break;
146             case RegExpOpCode::OP_SAVE_END:
147                 HandleOpSaveEnd(byteCode, opCode);
148                 break;
149             case RegExpOpCode::OP_GOTO: {
150                 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
151                 Advance(opCode, offset);
152                 break;
153             }
154             case RegExpOpCode::OP_MATCH: {
155                 // jump to match ahead
156                 if (MatchFailed(true)) {
157                     return false;
158                 }
159                 break;
160             }
161             case RegExpOpCode::OP_MATCH_END:
162                 return true;
163             case RegExpOpCode::OP_SAVE_RESET:
164                 HandleOpSaveReset(byteCode, opCode);
165                 break;
166             case RegExpOpCode::OP_SPLIT_NEXT:
167             case RegExpOpCode::OP_MATCH_AHEAD:
168             case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD:
169                 HandleOpMatch(byteCode, opCode);
170                 break;
171             case RegExpOpCode::OP_SPLIT_FIRST:
172                 HandleOpSplitFirst(byteCode, opCode);
173                 break;
174             case RegExpOpCode::OP_PREV: {
175                 if (!HandleOpPrev(opCode)) {
176                     return false;
177                 }
178                 break;
179             }
180             case RegExpOpCode::OP_LOOP_GREEDY:
181             case RegExpOpCode::OP_LOOP:
182                 HandleOpLoop(byteCode, opCode);
183                 break;
184             case RegExpOpCode::OP_PUSH_CHAR: {
185                 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr()));
186                 Advance(opCode);
187                 break;
188             }
189             case RegExpOpCode::OP_CHECK_CHAR: {
190                 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) {
191                     Advance(opCode);
192                 } else {
193                     uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
194                     Advance(opCode, offset);
195                 }
196                 break;
197             }
198             case RegExpOpCode::OP_PUSH: {
199                 PushStack(0);
200                 Advance(opCode);
201                 break;
202             }
203             case RegExpOpCode::OP_POP: {
204                 PopStack();
205                 Advance(opCode);
206                 break;
207             }
208             case RegExpOpCode::OP_RANGE32: {
209                 if (!HandleOpRange32(byteCode)) {
210                     return false;
211                 }
212                 break;
213             }
214             case RegExpOpCode::OP_RANGE: {
215                 if (!HandleOpRange(byteCode)) {
216                     return false;
217                 }
218                 break;
219             }
220             case RegExpOpCode::OP_BACKREFERENCE:
221             case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: {
222                 if (!HandleOpBackReference(byteCode, opCode)) {
223                     return false;
224                 }
225                 break;
226             }
227             default:
228                 UNREACHABLE();
229         }
230     }
231     // for loop match
232     return true;
233 }
234 
DumpResult(std::ostream & out) const235 void RegExpExecutor::DumpResult(std::ostream &out) const
236 {
237     out << "captures:" << std::endl;
238     for (uint32_t i = 0; i < nCapture_; i++) {
239         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
240         CaptureState *captureState = &captureResultList_[i];
241         int32_t len = captureState->captureEnd - captureState->captureStart;
242         if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
243             out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl;
244         } else {
245             out << i << ":\t"
246                 << "undefined" << std::endl;
247         }
248     }
249 }
250 
GetResult(const JSThread * thread,bool isSuccess) const251 MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) const
252 {
253     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
254     MatchResult result;
255     std::vector<std::pair<bool, JSHandle<EcmaString>>> captures;
256     result.isSuccess_ = isSuccess;
257     if (isSuccess) {
258         for (uint32_t i = 0; i < nCapture_; i++) {
259             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
260             CaptureState *captureState = &captureResultList_[i];
261             if (i == 0) {
262                 result.index_ = captureState->captureStart - input_;
263                 if (isWideChar_) {
264                     result.index_ /= WIDE_CHAR_SIZE;
265                 }
266             }
267             int32_t len = captureState->captureEnd - captureState->captureStart;
268             std::pair<bool, JSHandle<EcmaString>> pair;
269             if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
270                 pair.first = false;
271                 if (isWideChar_) {
272                     // create utf-16 string
273                     pair.second = factory->NewFromUtf16(
274                         reinterpret_cast<const uint16_t *>(captureState->captureStart), len / 2);
275                 } else {
276                     // create utf-8 string
277                     CVector<uint8_t> buffer(len + 1);
278                     uint8_t *dest = buffer.data();
279                     if (memcpy_s(dest, len + 1, reinterpret_cast<const uint8_t *>(captureState->captureStart), len) !=
280                         EOK) {
281                         LOG_FULL(FATAL) << "memcpy_s failed";
282                         UNREACHABLE();
283                     }
284                     dest[len] = '\0';  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
285                     pair.second =
286                         factory->NewFromUtf8(reinterpret_cast<const uint8_t *>(buffer.data()), len);
287                 }
288             } else {
289                 // undefined
290                 pair.first = true;
291             }
292             captures.emplace_back(pair);
293         }
294         result.captures_ = captures;
295         result.endIndex_ = currentPtr_ - input_;
296         if (isWideChar_) {
297             result.endIndex_ /= WIDE_CHAR_SIZE;
298         }
299     }
300     return result;
301 }
302 
PushRegExpState(StateType type,uint32_t pc)303 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc)
304 {
305     ReAllocStack(stateStackLen_ + 1);
306     auto state = reinterpret_cast<RegExpState *>(
307         stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
308         stateStackLen_ * stateSize_);
309     state->type_ = type;
310     state->currentPc_ = pc;
311     state->currentStack_ = currentStack_;
312     state->currentPtr_ = GetCurrentPtr();
313     size_t listSize = sizeof(CaptureState) * nCapture_;
314     if (memcpy_s(state->captureResultList_, listSize, GetCaptureResultList(), listSize) != EOK) {
315         LOG_FULL(FATAL) << "memcpy_s failed";
316         UNREACHABLE();
317     }
318     uint8_t *stackStart =
319         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
320         reinterpret_cast<uint8_t *>(state->captureResultList_) + sizeof(CaptureState) * nCapture_;
321     if (stack_ != nullptr) {
322         size_t stackSize = sizeof(uintptr_t) * nStack_;
323         if (memcpy_s(stackStart, stackSize, stack_, stackSize) != EOK) {
324             LOG_FULL(FATAL) << "memcpy_s failed";
325             UNREACHABLE();
326         }
327     }
328     stateStackLen_++;
329 }
330 
PopRegExpState(bool copyCaptrue)331 RegExpState *RegExpExecutor::PopRegExpState(bool copyCaptrue)
332 {
333     if (stateStackLen_ != 0) {
334         auto state = PeekRegExpState();
335         size_t listSize = sizeof(CaptureState) * nCapture_;
336         if (copyCaptrue) {
337             if (memcpy_s(GetCaptureResultList(), listSize, state->captureResultList_, listSize) != EOK) {
338                 LOG_FULL(FATAL) << "memcpy_s failed";
339                 UNREACHABLE();
340             }
341         }
342         SetCurrentPtr(state->currentPtr_);
343         SetCurrentPC(state->currentPc_);
344         currentStack_ = state->currentStack_;
345         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
346         uint8_t *stackStart = reinterpret_cast<uint8_t *>(state->captureResultList_) + listSize;
347         if (stack_ != nullptr) {
348             size_t stackSize = sizeof(uintptr_t) * nStack_;
349             if (memcpy_s(stack_, stackSize, stackStart, stackSize) != EOK) {
350                 LOG_FULL(FATAL) << "memcpy_s failed";
351                 UNREACHABLE();
352             }
353         }
354         stateStackLen_--;
355         return state;
356     }
357     return nullptr;
358 }
359 
ReAllocStack(uint32_t stackLen)360 void RegExpExecutor::ReAllocStack(uint32_t stackLen)
361 {
362     if (stackLen > stateStackSize_) {
363         ASSERT((static_cast<size_t>(stateStackSize_) * 2) <= static_cast<size_t>(UINT32_MAX)); // 2: double the size
364         uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE);  // 2: double the size
365         ASSERT((static_cast<size_t>(newStackSize) * static_cast<size_t>(stateSize_)) <=
366             static_cast<size_t>(UINT32_MAX));
367         uint32_t stackByteSize = newStackSize * stateSize_;
368         auto newStack = chunk_->NewArray<uint8_t>(stackByteSize);
369         if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) {
370             LOG_FULL(FATAL) << "memset_s failed";
371             UNREACHABLE();
372         }
373         if (stateStack_ != nullptr) {
374             auto stackSize = stateStackSize_ * stateSize_;
375             if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) {
376                 return;
377             }
378         }
379         stateStack_ = newStack;
380         stateStackSize_ = newStackSize;
381     }
382 }
383 }  // namespace panda::ecmascript
384