1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_executor.h"
17
18 namespace panda::ecmascript {
19 using RegExpState = RegExpExecutor::RegExpState;
20 using RegExpGlobalResult = builtins::RegExpGlobalResult;
Execute(const uint8_t * input,uint32_t lastIndex,uint32_t length,uint8_t * buf,bool isWideChar)21 bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar)
22 {
23 DynChunk buffer(buf, chunk_);
24 input_ = const_cast<uint8_t *>(input);
25 inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
26 uint32_t size = buffer.GetU32(0);
27 nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET);
28 nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET);
29 flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET);
30 prefilter_ = buffer.GetU32(RegExpParser::PREFILTER_OFFSET);
31 isWideChar_ = isWideChar;
32
33 uint32_t captureResultSize = sizeof(CaptureState) * nCapture_;
34 uint32_t stackSize = sizeof(uintptr_t) * nStack_;
35 stateStackLen_ = 0;
36 currentStack_ = 0;
37
38 if (captureResultSize != 0) {
39 if (captureResultList_ == nullptr) {
40 captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_);
41 }
42 if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) {
43 LOG_FULL(FATAL) << "memset_s failed";
44 UNREACHABLE();
45 }
46 }
47 if (stackSize != 0 && stack_ == nullptr) {
48 stack_ = chunk_->NewArray<uintptr_t>(nStack_);
49 if (memset_s(stack_, stackSize, 0, stackSize) != EOK) {
50 LOG_FULL(FATAL) << "memset_s failed";
51 UNREACHABLE();
52 }
53 }
54 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
55 SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
56 SetCurrentPC(RegExpParser::OP_START_OFFSET);
57
58 // first split
59 if ((flags_ & RegExpParser::FLAG_STICKY) == 0) {
60 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
61 }
62 return ExecuteInternal(buffer, size);
63 }
64
MatchFailed(bool isMatched)65 bool RegExpExecutor::MatchFailed(bool isMatched)
66 {
67 if (isMatched) {
68 stateStackLen_ = 0;
69 return true;
70 }
71 while (stateStackLen_ > 0) {
72 // StateType::STATE_SPLIT or STATE_NEGATIVE_MATCH_AHEAD
73 if (PopRegExpState() <= StateType::STATE_NEGATIVE_MATCH_AHEAD) {
74 return false;
75 }
76 }
77 return true;
78 }
79
80 // NOLINTNEXTLINE(readability-function-size)
ExecuteInternal(const DynChunk & byteCode,uint32_t pcEnd)81 bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd)
82 {
83 while (GetCurrentPC() < pcEnd) {
84 // first split
85 if (!HandleFirstSplit()) {
86 return false;
87 }
88 uint8_t opCode = byteCode.GetU8(GetCurrentPC());
89 switch (opCode) {
90 case RegExpOpCode::OP_DOTS:
91 case RegExpOpCode::OP_ALL: {
92 if (!HandleOpAll(opCode)) {
93 return false;
94 }
95 break;
96 }
97 case RegExpOpCode::OP_CHAR32:
98 case RegExpOpCode::OP_CHAR: {
99 if (!HandleOpChar(byteCode, opCode)) {
100 return false;
101 }
102 break;
103 }
104 case RegExpOpCode::OP_NOT_WORD_BOUNDARY:
105 case RegExpOpCode::OP_WORD_BOUNDARY: {
106 if (!HandleOpWordBoundary(opCode)) {
107 return false;
108 }
109 break;
110 }
111 case RegExpOpCode::OP_LINE_START: {
112 if (!HandleOpLineStart(opCode)) {
113 return false;
114 }
115 break;
116 }
117 case RegExpOpCode::OP_LINE_END: {
118 if (!HandleOpLineEnd(opCode)) {
119 return false;
120 }
121 break;
122 }
123 case RegExpOpCode::OP_SAVE_START:
124 HandleOpSaveStart(byteCode, opCode);
125 break;
126 case RegExpOpCode::OP_SAVE_END:
127 HandleOpSaveEnd(byteCode, opCode);
128 break;
129 case RegExpOpCode::OP_GOTO: {
130 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
131 Advance(opCode, offset);
132 break;
133 }
134 case RegExpOpCode::OP_MATCH: {
135 ASSERT(stateStackLen_ > 0);
136 // jump to match ahead
137 uint32_t ahead = stateStackLen_ - 1;
138 auto stateStack = reinterpret_cast<RegExpState *>(stateStack_);
139 while (ahead != 0 && stateStack[ahead].type_ != StateType::STATE_MATCH_AHEAD &&
140 stateStack[ahead].type_ != StateType::STATE_NEGATIVE_MATCH_AHEAD) {
141 --ahead;
142 }
143 bool isNegative = stateStack[ahead].type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD;
144 while (stateStackLen_ > ahead) {
145 PopRegExpState(isNegative);
146 }
147 if (isNegative && MatchFailed(false)) {
148 return false;
149 }
150 break;
151 }
152 case RegExpOpCode::OP_MATCH_END:
153 return true;
154 case RegExpOpCode::OP_SAVE_RESET:
155 HandleOpSaveReset(byteCode, opCode);
156 break;
157 case RegExpOpCode::OP_SPLIT_NEXT:
158 case RegExpOpCode::OP_MATCH_AHEAD:
159 case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD:
160 HandleOpMatch(byteCode, opCode);
161 break;
162 case RegExpOpCode::OP_SPLIT_FIRST:
163 HandleOpSplitFirst(byteCode, opCode);
164 break;
165 case RegExpOpCode::OP_PREV: {
166 if (!HandleOpPrev(opCode)) {
167 return false;
168 }
169 break;
170 }
171 case RegExpOpCode::OP_LOOP_GREEDY:
172 case RegExpOpCode::OP_LOOP:
173 HandleOpLoop(byteCode, opCode);
174 break;
175 case RegExpOpCode::OP_PUSH_CHAR: {
176 PushRegExpState(StateType::STATE_PUSH, 0, 0);
177 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr()));
178 Advance(opCode);
179 break;
180 }
181 case RegExpOpCode::OP_CHECK_CHAR: {
182 if (stateStackLen_ > 0 && PeekRegExpState()->type_ == StateType::STATE_PUSH) {
183 DropRegExpState();
184 } else {
185 ASSERT(currentStack_ > 0);
186 PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]);
187 }
188 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) {
189 Advance(opCode);
190 } else {
191 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
192 Advance(opCode, offset);
193 }
194 break;
195 }
196 case RegExpOpCode::OP_PUSH: {
197 PushRegExpState(StateType::STATE_PUSH, 0, 0);
198 PushStack(0);
199 Advance(opCode);
200 break;
201 }
202 case RegExpOpCode::OP_POP: {
203 ASSERT(currentStack_ > 0);
204 PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]);
205 PopStack();
206 Advance(opCode);
207 break;
208 }
209 case RegExpOpCode::OP_RANGE32: {
210 if (!HandleOpRange32(byteCode)) {
211 return false;
212 }
213 break;
214 }
215 case RegExpOpCode::OP_RANGE: {
216 if (!HandleOpRange(byteCode)) {
217 return false;
218 }
219 break;
220 }
221 case RegExpOpCode::OP_SPARSE: {
222 if (!HandleOpSparse(byteCode)) {
223 return false;
224 }
225 break;
226 }
227 case RegExpOpCode::OP_BACKREFERENCE:
228 case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: {
229 if (!HandleOpBackReference(byteCode, opCode)) {
230 return false;
231 }
232 break;
233 }
234 default:
235 UNREACHABLE();
236 }
237 }
238 // for loop match
239 return true;
240 }
241
DumpResult(std::ostream & out) const242 void RegExpExecutor::DumpResult(std::ostream &out) const
243 {
244 out << "captures:" << std::endl;
245 for (uint32_t i = 0; i < nCapture_; i++) {
246 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
247 CaptureState *captureState = &captureResultList_[i];
248 int32_t len = captureState->captureEnd - captureState->captureStart;
249 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
250 out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl;
251 } else {
252 out << i << ":\t"
253 << "undefined" << std::endl;
254 }
255 }
256 }
257
GetResult(JSThread * thread)258 void RegExpExecutor::GetResult(JSThread *thread)
259 {
260 JSHandle<RegExpGlobalResult> matchResult(thread->GetCurrentEcmaContext()->GetRegExpGlobalResult());
261 matchResult->SetTotalCaptureCounts(thread, JSTaggedValue(nCapture_));
262 uint32_t firstIndex = RegExpGlobalResult::FIRST_CAPTURE_INDEX;
263 uint32_t availableCaptureSlot = matchResult->GetLength() - firstIndex;
264 uint32_t requiredLength = nCapture_ * 2;
265 if (requiredLength > availableCaptureSlot) {
266 matchResult = RegExpGlobalResult::GrowCapturesCapacity(thread, matchResult, requiredLength + firstIndex);
267 }
268 for (uint32_t i = 0; i < nCapture_; i++) {
269 CaptureState *captureState = &captureResultList_[i];
270 int32_t len = captureState->captureEnd - captureState->captureStart;
271 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
272 if (isWideChar_) {
273 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(
274 static_cast<int32_t>((captureState->captureStart - input_) / WIDE_CHAR_SIZE)));
275 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(
276 static_cast<int32_t>((captureState->captureEnd - input_) / WIDE_CHAR_SIZE)));
277 } else {
278 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(
279 static_cast<int32_t>(captureState->captureStart - input_)));
280 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(
281 static_cast<int32_t>(captureState->captureEnd - input_)));
282 }
283 } else {
284 // undefined
285 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(0));
286 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(-1));
287 }
288 }
289 uint32_t endIndex = currentPtr_ - input_;
290 if (isWideChar_) {
291 endIndex /= WIDE_CHAR_SIZE;
292 }
293 matchResult->SetEndIndex(thread, JSTaggedValue(endIndex));
294 }
295
PushRegExpState(StateType type,uint32_t pc)296 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc)
297 {
298 ReAllocStack(stateStackLen_ + 1);
299 auto state = reinterpret_cast<RegExpState *>(
300 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
301 stateStackLen_ * sizeof(RegExpState));
302 state->type_ = type;
303 state->currentPc_ = pc;
304 state->currentPtr_ = GetCurrentPtr();
305 stateStackLen_++;
306 }
307
PushRegExpState(StateType type,uint32_t pc,uintptr_t ptr)308 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr)
309 {
310 ReAllocStack(stateStackLen_ + 1);
311 auto state = reinterpret_cast<RegExpState *>(
312 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
313 stateStackLen_ * sizeof(RegExpState));
314 state->type_ = type;
315 state->currentPc_ = pc;
316 state->currentPtr_ = reinterpret_cast<const uint8_t *>(ptr);
317 stateStackLen_++;
318 }
319
PopRegExpState(bool copyCapture)320 RegExpExecutor::StateType RegExpExecutor::PopRegExpState(bool copyCapture)
321 {
322 if (stateStackLen_ != 0) {
323 auto state = PeekRegExpState();
324 stateStackLen_--;
325 switch (state->type_) {
326 case StateType::STATE_SPLIT:
327 case StateType::STATE_NEGATIVE_MATCH_AHEAD:
328 case StateType::STATE_MATCH_AHEAD:
329 SetCurrentPC(state->currentPc_);
330 SetCurrentPtr(state->currentPtr_);
331 break;
332 case StateType::STATE_SAVE:
333 if (copyCapture) {
334 *(reinterpret_cast<const uint8_t **>(GetCaptureResultList()) + state->currentPc_) =
335 state->currentPtr_;
336 }
337 break;
338 case StateType::STATE_PUSH:
339 PopStack();
340 break;
341 case StateType::STATE_POP:
342 PushStack((uintptr_t)state->currentPtr_);
343 break;
344 case StateType::STATE_SET:
345 SetStackValue((uintptr_t)state->currentPtr_);
346 break;
347 default:
348 UNREACHABLE();
349 break;
350 }
351 return state->type_;
352 }
353 return StateType::STATE_INVALID;
354 }
355
ReAllocStack(uint32_t stackLen)356 void RegExpExecutor::ReAllocStack(uint32_t stackLen)
357 {
358 if (stackLen > stateStackSize_) {
359 ASSERT((static_cast<size_t>(stateStackSize_) * 2) <= static_cast<size_t>(UINT32_MAX)); // 2: double the size
360 uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE); // 2: double the size
361 ASSERT((static_cast<size_t>(newStackSize) * static_cast<size_t>(sizeof(RegExpState))) <=
362 static_cast<size_t>(UINT32_MAX));
363 uint32_t stackByteSize = newStackSize * sizeof(RegExpState);
364 auto newStack = chunk_->NewArray<uint8_t>(stackByteSize);
365 if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) {
366 LOG_FULL(FATAL) << "memset_s failed";
367 UNREACHABLE();
368 }
369 if (stateStack_ != nullptr) {
370 auto stackSize = stateStackSize_ * sizeof(RegExpState);
371 if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) {
372 return;
373 }
374 }
375 stateStack_ = newStack;
376 stateStackSize_ = newStackSize;
377 }
378 }
379 } // namespace panda::ecmascript
380