• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_executor.h"
17 
18 namespace panda::ecmascript {
19 using RegExpState = RegExpExecutor::RegExpState;
20 using RegExpGlobalResult = builtins::RegExpGlobalResult;
Execute(const uint8_t * input,uint32_t lastIndex,uint32_t length,uint8_t * buf,bool isWideChar)21 bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar)
22 {
23     DynChunk buffer(buf, chunk_);
24     input_ = const_cast<uint8_t *>(input);
25     inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
26     uint32_t size = buffer.GetU32(0);
27     nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET);
28     nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET);
29     flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET);
30     prefilter_ = buffer.GetU32(RegExpParser::PREFILTER_OFFSET);
31     isWideChar_ = isWideChar;
32 
33     uint32_t captureResultSize = sizeof(CaptureState) * nCapture_;
34     uint32_t stackSize = sizeof(uintptr_t) * nStack_;
35     stateStackLen_ = 0;
36     currentStack_ = 0;
37 
38     if (captureResultSize != 0) {
39         if (captureResultList_ == nullptr) {
40             captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_);
41         }
42         if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) {
43             LOG_FULL(FATAL) << "memset_s failed";
44             UNREACHABLE();
45         }
46     }
47     if (stackSize != 0 && stack_ == nullptr) {
48         stack_ = chunk_->NewArray<uintptr_t>(nStack_);
49         if (memset_s(stack_, stackSize, 0, stackSize) != EOK) {
50             LOG_FULL(FATAL) << "memset_s failed";
51             UNREACHABLE();
52         }
53     }
54     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
55     SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
56     SetCurrentPC(RegExpParser::OP_START_OFFSET);
57 
58     // first split
59     if ((flags_ & RegExpParser::FLAG_STICKY) == 0) {
60         PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
61     }
62     return ExecuteInternal(buffer, size);
63 }
64 
MatchFailed(bool isMatched)65 bool RegExpExecutor::MatchFailed(bool isMatched)
66 {
67     if (isMatched) {
68         stateStackLen_ = 0;
69         return true;
70     }
71     while (stateStackLen_ > 0) {
72         // StateType::STATE_SPLIT or STATE_NEGATIVE_MATCH_AHEAD
73         if (PopRegExpState() <= StateType::STATE_NEGATIVE_MATCH_AHEAD) {
74             return false;
75         }
76     }
77     return true;
78 }
79 
80 // NOLINTNEXTLINE(readability-function-size)
ExecuteInternal(const DynChunk & byteCode,uint32_t pcEnd)81 bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd)
82 {
83     while (GetCurrentPC() < pcEnd) {
84         // first split
85         if (!HandleFirstSplit()) {
86             return false;
87         }
88         uint8_t opCode = byteCode.GetU8(GetCurrentPC());
89         switch (opCode) {
90             case RegExpOpCode::OP_DOTS:
91             case RegExpOpCode::OP_ALL: {
92                 if (!HandleOpAll(opCode)) {
93                     return false;
94                 }
95                 break;
96             }
97             case RegExpOpCode::OP_CHAR32:
98             case RegExpOpCode::OP_CHAR: {
99                 if (!HandleOpChar(byteCode, opCode)) {
100                     return false;
101                 }
102                 break;
103             }
104             case RegExpOpCode::OP_NOT_WORD_BOUNDARY:
105             case RegExpOpCode::OP_WORD_BOUNDARY: {
106                 if (!HandleOpWordBoundary(opCode)) {
107                     return false;
108                 }
109                 break;
110             }
111             case RegExpOpCode::OP_LINE_START: {
112                 if (!HandleOpLineStart(opCode)) {
113                     return false;
114                 }
115                 break;
116             }
117             case RegExpOpCode::OP_LINE_END: {
118                 if (!HandleOpLineEnd(opCode)) {
119                     return false;
120                 }
121                 break;
122             }
123             case RegExpOpCode::OP_SAVE_START:
124                 HandleOpSaveStart(byteCode, opCode);
125                 break;
126             case RegExpOpCode::OP_SAVE_END:
127                 HandleOpSaveEnd(byteCode, opCode);
128                 break;
129             case RegExpOpCode::OP_GOTO: {
130                 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
131                 Advance(opCode, offset);
132                 break;
133             }
134             case RegExpOpCode::OP_MATCH: {
135                 ASSERT(stateStackLen_ > 0);
136                 // jump to match ahead
137                 uint32_t ahead = stateStackLen_ - 1;
138                 auto stateStack = reinterpret_cast<RegExpState *>(stateStack_);
139                 while (ahead != 0 && stateStack[ahead].type_ != StateType::STATE_MATCH_AHEAD &&
140                     stateStack[ahead].type_ != StateType::STATE_NEGATIVE_MATCH_AHEAD) {
141                     --ahead;
142                 }
143                 bool isNegative = stateStack[ahead].type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD;
144                 while (stateStackLen_ > ahead) {
145                     PopRegExpState(isNegative);
146                 }
147                 if (isNegative && MatchFailed(false)) {
148                     return false;
149                 }
150                 break;
151             }
152             case RegExpOpCode::OP_MATCH_END:
153                 return true;
154             case RegExpOpCode::OP_SAVE_RESET:
155                 HandleOpSaveReset(byteCode, opCode);
156                 break;
157             case RegExpOpCode::OP_SPLIT_NEXT:
158             case RegExpOpCode::OP_MATCH_AHEAD:
159             case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD:
160                 HandleOpMatch(byteCode, opCode);
161                 break;
162             case RegExpOpCode::OP_SPLIT_FIRST:
163                 HandleOpSplitFirst(byteCode, opCode);
164                 break;
165             case RegExpOpCode::OP_PREV: {
166                 if (!HandleOpPrev(opCode)) {
167                     return false;
168                 }
169                 break;
170             }
171             case RegExpOpCode::OP_LOOP_GREEDY:
172             case RegExpOpCode::OP_LOOP:
173                 HandleOpLoop(byteCode, opCode);
174                 break;
175             case RegExpOpCode::OP_PUSH_CHAR: {
176                 PushRegExpState(StateType::STATE_PUSH, 0, 0);
177                 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr()));
178                 Advance(opCode);
179                 break;
180             }
181             case RegExpOpCode::OP_CHECK_CHAR: {
182                 if (stateStackLen_ > 0 && PeekRegExpState()->type_ == StateType::STATE_PUSH) {
183                     DropRegExpState();
184                 } else {
185                     ASSERT(currentStack_ > 0);
186                     PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]);
187                 }
188                 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) {
189                     Advance(opCode);
190                 } else {
191                     uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
192                     Advance(opCode, offset);
193                 }
194                 break;
195             }
196             case RegExpOpCode::OP_PUSH: {
197                 PushRegExpState(StateType::STATE_PUSH, 0, 0);
198                 PushStack(0);
199                 Advance(opCode);
200                 break;
201             }
202             case RegExpOpCode::OP_POP: {
203                 ASSERT(currentStack_ > 0);
204                 PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]);
205                 PopStack();
206                 Advance(opCode);
207                 break;
208             }
209             case RegExpOpCode::OP_RANGE32: {
210                 if (!HandleOpRange32(byteCode)) {
211                     return false;
212                 }
213                 break;
214             }
215             case RegExpOpCode::OP_RANGE: {
216                 if (!HandleOpRange(byteCode)) {
217                     return false;
218                 }
219                 break;
220             }
221             case RegExpOpCode::OP_SPARSE: {
222                 if (!HandleOpSparse(byteCode)) {
223                     return false;
224                 }
225                 break;
226             }
227             case RegExpOpCode::OP_BACKREFERENCE:
228             case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: {
229                 if (!HandleOpBackReference(byteCode, opCode)) {
230                     return false;
231                 }
232                 break;
233             }
234             default: {
235                 LOG_ECMA(FATAL) << "opCode overflow, OpCode: " << static_cast<int32_t>(opCode)
236                                 << " CurrentPc: " << static_cast<int32_t>(GetCurrentPC()) << " pcEnd: " << pcEnd;
237                 UNREACHABLE();
238             }
239         }
240     }
241     // for loop match
242     return true;
243 }
244 
DumpResult(std::ostream & out) const245 void RegExpExecutor::DumpResult(std::ostream &out) const
246 {
247     out << "captures:" << std::endl;
248     for (uint32_t i = 0; i < nCapture_; i++) {
249         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
250         CaptureState *captureState = &captureResultList_[i];
251         int32_t len = captureState->captureEnd - captureState->captureStart;
252         if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
253             out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl;
254         } else {
255             out << i << ":\t"
256                 << "undefined" << std::endl;
257         }
258     }
259 }
260 
GetResult(JSThread * thread)261 void RegExpExecutor::GetResult(JSThread *thread)
262 {
263     JSHandle<RegExpGlobalResult> matchResult(thread->GetCurrentEcmaContext()->GetRegExpGlobalResult());
264     matchResult->SetTotalCaptureCounts(thread, JSTaggedValue(nCapture_));
265     uint32_t firstIndex = RegExpGlobalResult::FIRST_CAPTURE_INDEX;
266     uint32_t availableCaptureSlot = matchResult->GetLength() - firstIndex;
267     uint32_t requiredLength =  nCapture_ * 2;
268     if (requiredLength > availableCaptureSlot) {
269         matchResult = RegExpGlobalResult::GrowCapturesCapacity(thread, matchResult, requiredLength + firstIndex);
270     }
271     for (uint32_t i = 0; i < nCapture_; i++) {
272         CaptureState *captureState = &captureResultList_[i];
273         int32_t len = captureState->captureEnd - captureState->captureStart;
274         if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
275             if (isWideChar_) {
276                 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(
277                     static_cast<int32_t>((captureState->captureStart - input_) / WIDE_CHAR_SIZE)));
278                 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(
279                     static_cast<int32_t>((captureState->captureEnd - input_) / WIDE_CHAR_SIZE)));
280             } else {
281                 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(
282                     static_cast<int32_t>(captureState->captureStart - input_)));
283                 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(
284                     static_cast<int32_t>(captureState->captureEnd - input_)));
285             }
286         } else {
287             // undefined
288             matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(0));
289             matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(-1));
290         }
291     }
292     uint32_t endIndex = currentPtr_ - input_;
293     if (isWideChar_) {
294         endIndex /= WIDE_CHAR_SIZE;
295     }
296     matchResult->SetEndIndex(thread, JSTaggedValue(endIndex));
297 }
298 
PushRegExpState(StateType type,uint32_t pc)299 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc)
300 {
301     ReAllocStack(stateStackLen_ + 1);
302     auto state = reinterpret_cast<RegExpState *>(
303         stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
304         stateStackLen_ * sizeof(RegExpState));
305     state->type_ = type;
306     state->currentPc_ = pc;
307     state->currentPtr_ = GetCurrentPtr();
308     stateStackLen_++;
309 }
310 
PushRegExpState(StateType type,uint32_t pc,uintptr_t ptr)311 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr)
312 {
313     ReAllocStack(stateStackLen_ + 1);
314     auto state = reinterpret_cast<RegExpState *>(
315         stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
316         stateStackLen_ * sizeof(RegExpState));
317     state->type_ = type;
318     state->currentPc_ = pc;
319     state->currentPtr_ = reinterpret_cast<const uint8_t *>(ptr);
320     stateStackLen_++;
321 }
322 
PopRegExpState(bool copyCapture)323 RegExpExecutor::StateType RegExpExecutor::PopRegExpState(bool copyCapture)
324 {
325     if (stateStackLen_ != 0) {
326         auto state = PeekRegExpState();
327         stateStackLen_--;
328         switch (state->type_) {
329             case StateType::STATE_SPLIT:
330             case StateType::STATE_NEGATIVE_MATCH_AHEAD:
331             case StateType::STATE_MATCH_AHEAD:
332                 SetCurrentPC(state->currentPc_);
333                 SetCurrentPtr(state->currentPtr_);
334                 break;
335             case StateType::STATE_SAVE:
336                 if (copyCapture) {
337                     *(reinterpret_cast<const uint8_t **>(GetCaptureResultList()) + state->currentPc_) =
338                         state->currentPtr_;
339                 }
340                 break;
341             case StateType::STATE_PUSH:
342                 PopStack();
343                 break;
344             case StateType::STATE_POP:
345                 PushStack((uintptr_t)state->currentPtr_);
346                 break;
347             case StateType::STATE_SET:
348                 SetStackValue((uintptr_t)state->currentPtr_);
349                 break;
350             default:
351                 UNREACHABLE();
352                 break;
353         }
354         return state->type_;
355     }
356     return StateType::STATE_INVALID;
357 }
358 
ReAllocStack(uint32_t stackLen)359 void RegExpExecutor::ReAllocStack(uint32_t stackLen)
360 {
361     if (stackLen > stateStackSize_) {
362         ASSERT((static_cast<size_t>(stateStackSize_) * 2) <= static_cast<size_t>(UINT32_MAX)); // 2: double the size
363         uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE);  // 2: double the size
364         ASSERT((static_cast<size_t>(newStackSize) * static_cast<size_t>(sizeof(RegExpState))) <=
365             static_cast<size_t>(UINT32_MAX));
366         uint32_t stackByteSize = newStackSize * sizeof(RegExpState);
367         auto newStack = chunk_->NewArray<uint8_t>(stackByteSize);
368         if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) {
369             LOG_FULL(FATAL) << "memset_s failed";
370             UNREACHABLE();
371         }
372         if (stateStack_ != nullptr) {
373             auto stackSize = stateStackSize_ * sizeof(RegExpState);
374             if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) {
375                 return;
376             }
377         }
378         stateStack_ = newStack;
379         stateStackSize_ = newStackSize;
380     }
381 }
382 }  // namespace panda::ecmascript
383