1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_executor.h"
17
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/regexp/dyn_chunk.h"
20 #include "ecmascript/regexp/regexp_opcode.h"
21 #include "securec.h"
22
23 namespace panda::ecmascript {
24 using RegExpState = RegExpExecutor::RegExpState;
25 using MatchResult = RegExpExecutor::MatchResult;
Execute(const uint8_t * input,uint32_t lastIndex,uint32_t length,uint8_t * buf,bool isWideChar)26 bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar)
27 {
28 DynChunk buffer(buf, chunk_);
29 input_ = const_cast<uint8_t *>(input);
30 inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
31 uint32_t size = buffer.GetU32(0);
32 nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET);
33 nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET);
34 flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET);
35 isWideChar_ = isWideChar;
36
37 uint32_t captureResultSize = sizeof(CaptureState) * nCapture_;
38 uint32_t stackSize = sizeof(uintptr_t) * nStack_;
39 stateSize_ = sizeof(RegExpState) + captureResultSize + stackSize;
40 stateStackLen_ = 0;
41
42 if (captureResultSize != 0) {
43 captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_);
44 if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) {
45 LOG_ECMA(FATAL) << "memset_s failed";
46 UNREACHABLE();
47 }
48 }
49 if (stackSize != 0) {
50 stack_ = chunk_->NewArray<uintptr_t>(nStack_);
51 if (memset_s(stack_, stackSize, 0, stackSize) != EOK) {
52 LOG_ECMA(FATAL) << "memset_s failed";
53 UNREACHABLE();
54 }
55 }
56 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
57 SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
58 SetCurrentPC(RegExpParser::OP_START_OFFSET);
59
60 // first split
61 if ((flags_ & RegExpParser::FLAG_STICKY) == 0) {
62 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
63 }
64 return ExecuteInternal(buffer, size);
65 }
66
MatchFailed(bool isMatched)67 bool RegExpExecutor::MatchFailed(bool isMatched)
68 {
69 while (true) {
70 if (stateStackLen_ == 0) {
71 return true;
72 }
73 RegExpState *state = PeekRegExpState();
74 if (state->type_ == StateType::STATE_SPLIT) {
75 if (!isMatched) {
76 PopRegExpState();
77 return false;
78 }
79 } else {
80 isMatched = (state->type_ == StateType::STATE_MATCH_AHEAD && isMatched) ||
81 (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD && !isMatched);
82 if (isMatched) {
83 if (state->type_ == StateType::STATE_MATCH_AHEAD) {
84 PopRegExpState(false);
85 return false;
86 }
87 if (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD) {
88 PopRegExpState();
89 return false;
90 }
91 }
92 }
93 DropRegExpState();
94 }
95
96 return true;
97 }
98
HandleFirstSplit()99 bool RegExpExecutor::HandleFirstSplit()
100 {
101 if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
102 (flags_ & RegExpParser::FLAG_STICKY) == 0) {
103 if (IsEOF()) {
104 if (MatchFailed()) {
105 return false;
106 }
107 } else {
108 AdvanceCurrentPtr();
109 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
110 }
111 }
112 return true;
113 }
114
HandleOpAll(uint8_t opCode)115 bool RegExpExecutor::HandleOpAll(uint8_t opCode)
116 {
117 if (IsEOF()) {
118 return !MatchFailed();
119 }
120 uint32_t currentChar = GetCurrentChar();
121 if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
122 return !MatchFailed();
123 }
124 Advance(opCode);
125 return true;
126 }
127
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)128 bool RegExpExecutor::HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
129 {
130 uint32_t expectedChar;
131 if (opCode == RegExpOpCode::OP_CHAR32) {
132 expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
133 } else {
134 expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
135 }
136 if (IsEOF()) {
137 return !MatchFailed();
138 }
139 uint32_t currentChar = GetCurrentChar();
140 if (IsIgnoreCase()) {
141 currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
142 }
143 if (currentChar == expectedChar) {
144 Advance(opCode);
145 } else {
146 if (MatchFailed()) {
147 return false;
148 }
149 }
150 return true;
151 }
152
HandleOpWordBoundary(uint8_t opCode)153 bool RegExpExecutor::HandleOpWordBoundary(uint8_t opCode)
154 {
155 if (IsEOF()) {
156 if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) {
157 Advance(opCode);
158 } else {
159 if (MatchFailed()) {
160 return false;
161 }
162 }
163 return true;
164 }
165 bool preIsWord = false;
166 if (GetCurrentPtr() != input_) {
167 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
168 preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
169 }
170 bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_));
171 if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
172 ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
173 ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
174 ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
175 Advance(opCode);
176 } else {
177 if (MatchFailed()) {
178 return false;
179 }
180 }
181 return true;
182 }
183
HandleOpLineStart(uint8_t opCode)184 bool RegExpExecutor::HandleOpLineStart(uint8_t opCode)
185 {
186 if (IsEOF()) {
187 return !MatchFailed();
188 }
189 if ((GetCurrentPtr() == input_) ||
190 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
191 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
192 Advance(opCode);
193 } else {
194 if (MatchFailed()) {
195 return false;
196 }
197 }
198 return true;
199 }
200
HandleOpLineEnd(uint8_t opCode)201 bool RegExpExecutor::HandleOpLineEnd(uint8_t opCode)
202 {
203 if (IsEOF() ||
204 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
205 ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
206 Advance(opCode);
207 } else {
208 if (MatchFailed()) {
209 return false;
210 }
211 }
212 return true;
213 }
214
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)215 void RegExpExecutor::HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
216 {
217 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
218 ASSERT(captureIndex < nCapture_);
219 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
220 CaptureState *captureState = &captureResultList_[captureIndex];
221 captureState->captureStart = GetCurrentPtr();
222 Advance(opCode);
223 }
224
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)225 void RegExpExecutor::HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
226 {
227 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
228 ASSERT(captureIndex < nCapture_);
229 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
230 CaptureState *captureState = &captureResultList_[captureIndex];
231 captureState->captureEnd = GetCurrentPtr();
232 Advance(opCode);
233 }
234
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)235 void RegExpExecutor::HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
236 {
237 uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
238 uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
239 for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
240 CaptureState *captureState =
241 &captureResultList_[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
242 captureState->captureStart = nullptr;
243 captureState->captureEnd = nullptr;
244 }
245 Advance(opCode);
246 }
247
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)248 void RegExpExecutor::HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
249 {
250 auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
251 ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
252 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
253 Advance(opCode);
254 uint32_t splitPc = GetCurrentPC() + offset;
255 PushRegExpState(type, splitPc);
256 }
257
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)258 void RegExpExecutor::HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
259 {
260 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
261 Advance(opCode);
262 PushRegExpState(STATE_SPLIT, GetCurrentPC());
263 AdvanceOffset(offset);
264 }
265
HandleOpPrev(uint8_t opCode)266 bool RegExpExecutor::HandleOpPrev(uint8_t opCode)
267 {
268 if (GetCurrentPtr() == input_) {
269 if (MatchFailed()) {
270 return false;
271 }
272 } else {
273 PrevPtr(¤tPtr_, input_);
274 Advance(opCode);
275 }
276 return true;
277 }
278
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)279 void RegExpExecutor::HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
280 {
281 uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
282 uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
283 uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
284 Advance(opCode);
285 uint32_t loopPcEnd = GetCurrentPC();
286 uint32_t loopPcStart = GetCurrentPC() + pcOffset;
287 bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
288 uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
289
290 uint32_t loopCount = PeekStack();
291 SetStackValue(++loopCount);
292 if (loopCount < loopMax) {
293 // greedy failed, goto next
294 if (loopCount >= quantifyMin) {
295 PushRegExpState(STATE_SPLIT, loopPcEnd);
296 }
297 // Goto loop start
298 SetCurrentPC(loopPcStart);
299 } else {
300 if (!isGreedy && (loopCount < quantifyMax)) {
301 PushRegExpState(STATE_SPLIT, loopPcStart);
302 }
303 }
304 }
305
HandleOpRange32(const DynChunk & byteCode)306 bool RegExpExecutor::HandleOpRange32(const DynChunk &byteCode)
307 {
308 if (IsEOF()) {
309 return !MatchFailed();
310 }
311 uint32_t currentChar = GetCurrentChar();
312 if (IsIgnoreCase()) {
313 currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
314 }
315 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
316 bool isFound = false;
317 int32_t idxMin = 0;
318 int32_t idxMax = rangeCount - 1;
319 int32_t idx = 0;
320 uint32_t low = 0;
321 uint32_t high =
322 byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + RANGE32_MAX_HALF_OFFSET);
323 if (currentChar <= high) {
324 while (idxMin <= idxMax) {
325 idx = (idxMin + idxMax) / RANGE32_OFFSET;
326 low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET);
327 high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET +
328 RANGE32_MAX_HALF_OFFSET);
329 if (currentChar < low) {
330 idxMax = idx - 1;
331 } else if (currentChar > high) {
332 idxMin = idx + 1;
333 } else {
334 isFound = true;
335 break;
336 }
337 }
338 }
339 if (isFound) {
340 AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
341 } else {
342 if (MatchFailed()) {
343 return false;
344 }
345 }
346 return true;
347 }
348
HandleOpRange(const DynChunk & byteCode)349 bool RegExpExecutor::HandleOpRange(const DynChunk &byteCode)
350 {
351 if (IsEOF()) {
352 return !MatchFailed();
353 }
354 uint32_t currentChar = GetCurrentChar();
355 if (IsIgnoreCase()) {
356 currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
357 }
358 uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
359 bool isFound = false;
360 int32_t idxMin = 0;
361 int32_t idxMax = rangeCount - 1;
362 int32_t idx = 0;
363 uint32_t low = 0;
364 uint32_t high =
365 byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
366 if (currentChar <= high) {
367 while (idxMin <= idxMax) {
368 idx = (idxMin + idxMax) / RANGE32_OFFSET;
369 low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET);
370 high =
371 byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
372 if (currentChar < low) {
373 idxMax = idx - 1;
374 } else if (currentChar > high) {
375 idxMin = idx + 1;
376 } else {
377 isFound = true;
378 break;
379 }
380 }
381 }
382 if (isFound) {
383 AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
384 } else {
385 if (MatchFailed()) {
386 return false;
387 }
388 }
389 return true;
390 }
391
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)392 bool RegExpExecutor::HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
393 {
394 uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
395 if (captureIndex >= nCapture_) {
396 return !MatchFailed();
397 }
398 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
399 const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
400 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
401 const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
402 if (captureStart == nullptr || captureEnd == nullptr) {
403 Advance(opCode);
404 return true;
405 }
406 bool isMatched = true;
407 if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
408 const uint8_t *refCptr = captureStart;
409 while (refCptr < captureEnd) {
410 if (IsEOF()) {
411 isMatched = false;
412 break;
413 }
414 // NOLINTNEXTLINE(readability-identifier-naming)
415 uint32_t c1 = GetChar(&refCptr, captureEnd);
416 // NOLINTNEXTLINE(readability-identifier-naming)
417 uint32_t c2 = GetChar(¤tPtr_, inputEnd_);
418 if (IsIgnoreCase()) {
419 c1 = RegExpParser::Canonicalize(c1, IsUtf16());
420 c2 = RegExpParser::Canonicalize(c2, IsUtf16());
421 }
422 if (c1 != c2) {
423 isMatched = false;
424 break;
425 }
426 }
427 if (!isMatched) {
428 if (MatchFailed()) {
429 return false;
430 }
431 } else {
432 Advance(opCode);
433 }
434 } else {
435 const uint8_t *refCptr = captureEnd;
436 while (refCptr > captureStart) {
437 if (GetCurrentPtr() == input_) {
438 isMatched = false;
439 break;
440 }
441 // NOLINTNEXTLINE(readability-identifier-naming)
442 uint32_t c1 = GetPrevChar(&refCptr, captureStart);
443 // NOLINTNEXTLINE(readability-identifier-naming)
444 uint32_t c2 = GetPrevChar(¤tPtr_, input_);
445 if (IsIgnoreCase()) {
446 c1 = RegExpParser::Canonicalize(c1, IsUtf16());
447 c2 = RegExpParser::Canonicalize(c2, IsUtf16());
448 }
449 if (c1 != c2) {
450 isMatched = false;
451 break;
452 }
453 }
454 if (!isMatched) {
455 if (MatchFailed()) {
456 return false;
457 }
458 } else {
459 Advance(opCode);
460 }
461 }
462 return true;
463 }
464
465 // NOLINTNEXTLINE(readability-function-size)
ExecuteInternal(const DynChunk & byteCode,uint32_t pcEnd)466 bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd)
467 {
468 while (GetCurrentPC() < pcEnd) {
469 // first split
470 if (!HandleFirstSplit()) {
471 return false;
472 }
473 uint8_t opCode = byteCode.GetU8(GetCurrentPC());
474 switch (opCode) {
475 case RegExpOpCode::OP_DOTS:
476 case RegExpOpCode::OP_ALL: {
477 if (!HandleOpAll(opCode)) {
478 return false;
479 }
480 break;
481 }
482 case RegExpOpCode::OP_CHAR32:
483 case RegExpOpCode::OP_CHAR: {
484 if (!HandleOpChar(byteCode, opCode)) {
485 return false;
486 }
487 break;
488 }
489 case RegExpOpCode::OP_NOT_WORD_BOUNDARY:
490 case RegExpOpCode::OP_WORD_BOUNDARY: {
491 if (!HandleOpWordBoundary(opCode)) {
492 return false;
493 }
494 break;
495 }
496 case RegExpOpCode::OP_LINE_START: {
497 if (!HandleOpLineStart(opCode)) {
498 return false;
499 }
500 break;
501 }
502 case RegExpOpCode::OP_LINE_END: {
503 if (!HandleOpLineEnd(opCode)) {
504 return false;
505 }
506 break;
507 }
508 case RegExpOpCode::OP_SAVE_START: {
509 HandleOpSaveStart(byteCode, opCode);
510 break;
511 }
512 case RegExpOpCode::OP_SAVE_END: {
513 HandleOpSaveEnd(byteCode, opCode);
514 break;
515 }
516 case RegExpOpCode::OP_GOTO: {
517 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
518 Advance(opCode, offset);
519 break;
520 }
521 case RegExpOpCode::OP_MATCH: {
522 // jump to match ahead
523 if (MatchFailed(true)) {
524 return false;
525 }
526 break;
527 }
528 case RegExpOpCode::OP_MATCH_END:
529 return true;
530 case RegExpOpCode::OP_SAVE_RESET:
531 HandleOpSaveReset(byteCode, opCode);
532 break;
533 case RegExpOpCode::OP_SPLIT_NEXT:
534 case RegExpOpCode::OP_MATCH_AHEAD:
535 case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD:
536 HandleOpMatch(byteCode, opCode);
537 break;
538 case RegExpOpCode::OP_SPLIT_FIRST:
539 HandleOpSplitFirst(byteCode, opCode);
540 break;
541 case RegExpOpCode::OP_PREV: {
542 if (!HandleOpPrev(opCode)) {
543 return false;
544 }
545 break;
546 }
547 case RegExpOpCode::OP_LOOP_GREEDY:
548 case RegExpOpCode::OP_LOOP:
549 HandleOpLoop(byteCode, opCode);
550 break;
551 case RegExpOpCode::OP_PUSH_CHAR:
552 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr()));
553 Advance(opCode);
554 break;
555 case RegExpOpCode::OP_CHECK_CHAR: {
556 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) {
557 Advance(opCode);
558 } else {
559 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
560 Advance(opCode, offset);
561 }
562 break;
563 }
564 case RegExpOpCode::OP_PUSH:
565 PushStack(0);
566 Advance(opCode);
567 break;
568 case RegExpOpCode::OP_POP:
569 PopStack();
570 Advance(opCode);
571 break;
572 case RegExpOpCode::OP_RANGE32: {
573 if (!HandleOpRange32(byteCode)) {
574 return false;
575 }
576 break;
577 }
578 case RegExpOpCode::OP_RANGE: {
579 if (!HandleOpRange(byteCode)) {
580 return false;
581 }
582 break;
583 }
584 case RegExpOpCode::OP_BACKREFERENCE:
585 case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: {
586 if (!HandleOpBackReference(byteCode, opCode)) {
587 return false;
588 }
589 break;
590 }
591 default:
592 UNREACHABLE();
593 }
594 }
595 // for loop match
596 return true;
597 }
598
DumpResult(std::ostream & out) const599 void RegExpExecutor::DumpResult(std::ostream &out) const
600 {
601 out << "captures:" << std::endl;
602 for (uint32_t i = 0; i < nCapture_; i++) {
603 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
604 CaptureState *captureState = &captureResultList_[i];
605 int32_t len = captureState->captureEnd - captureState->captureStart;
606 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
607 out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl;
608 } else {
609 out << i << ":\t"
610 << "undefined" << std::endl;
611 }
612 }
613 }
614
GetResult(const JSThread * thread,bool isSuccess) const615 MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) const
616 {
617 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
618 MatchResult result;
619 std::vector<std::pair<bool, JSHandle<EcmaString>>> captures;
620 result.isSuccess_ = isSuccess;
621 if (isSuccess) {
622 for (uint32_t i = 0; i < nCapture_; i++) {
623 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
624 CaptureState *captureState = &captureResultList_[i];
625 if (i == 0) {
626 result.index_ = captureState->captureStart - input_;
627 if (isWideChar_) {
628 result.index_ /= WIDE_CHAR_SIZE;
629 }
630 }
631 int32_t len = captureState->captureEnd - captureState->captureStart;
632 std::pair<bool, JSHandle<EcmaString>> pair;
633 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
634 pair.first = false;
635 if (isWideChar_) {
636 // create utf-16 string
637 pair.second = factory->NewFromUtf16(
638 reinterpret_cast<const uint16_t *>(captureState->captureStart), len / 2);
639 } else {
640 // create utf-8 string
641 CVector<uint8_t> buffer(len + 1);
642 uint8_t *dest = buffer.data();
643 if (memcpy_s(dest, len + 1, reinterpret_cast<const uint8_t *>(captureState->captureStart), len) !=
644 EOK) {
645 LOG_ECMA(FATAL) << "memcpy_s failed";
646 UNREACHABLE();
647 }
648 dest[len] = '\0'; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
649 pair.second =
650 factory->NewFromUtf8(reinterpret_cast<const uint8_t *>(buffer.data()), len);
651 }
652 } else {
653 // undefined
654 pair.first = true;
655 }
656 captures.emplace_back(pair);
657 }
658 result.captures_ = captures;
659 result.endIndex_ = currentPtr_ - input_;
660 if (isWideChar_) {
661 result.endIndex_ /= WIDE_CHAR_SIZE;
662 }
663 }
664 return result;
665 }
666
PushRegExpState(StateType type,uint32_t pc)667 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc)
668 {
669 ReAllocStack(stateStackLen_ + 1);
670 auto state = reinterpret_cast<RegExpState *>(
671 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
672 stateStackLen_ * stateSize_);
673 state->type_ = type;
674 state->currentPc_ = pc;
675 state->currentStack_ = currentStack_;
676 state->currentPtr_ = GetCurrentPtr();
677 size_t listSize = sizeof(CaptureState) * nCapture_;
678 if (memcpy_s(state->captureResultList_, listSize, GetCaptureResultList(), listSize) != EOK) {
679 LOG_ECMA(FATAL) << "memcpy_s failed";
680 UNREACHABLE();
681 }
682 uint8_t *stackStart =
683 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
684 reinterpret_cast<uint8_t *>(state->captureResultList_) + sizeof(CaptureState) * nCapture_;
685 if (stack_ != nullptr) {
686 size_t stackSize = sizeof(uintptr_t) * nStack_;
687 if (memcpy_s(stackStart, stackSize, stack_, stackSize) != EOK) {
688 LOG_ECMA(FATAL) << "memcpy_s failed";
689 UNREACHABLE();
690 }
691 }
692 stateStackLen_++;
693 }
694
PopRegExpState(bool copyCaptrue)695 RegExpState *RegExpExecutor::PopRegExpState(bool copyCaptrue)
696 {
697 if (stateStackLen_ != 0) {
698 auto state = PeekRegExpState();
699 size_t listSize = sizeof(CaptureState) * nCapture_;
700 if (copyCaptrue) {
701 if (memcpy_s(GetCaptureResultList(), listSize, state->captureResultList_, listSize) != EOK) {
702 LOG_ECMA(FATAL) << "memcpy_s failed";
703 UNREACHABLE();
704 }
705 }
706 SetCurrentPtr(state->currentPtr_);
707 SetCurrentPC(state->currentPc_);
708 currentStack_ = state->currentStack_;
709 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
710 uint8_t *stackStart = reinterpret_cast<uint8_t *>(state->captureResultList_) + listSize;
711 if (stack_ != nullptr) {
712 size_t stackSize = sizeof(uintptr_t) * nStack_;
713 if (memcpy_s(stack_, stackSize, stackStart, stackSize) != EOK) {
714 LOG_ECMA(FATAL) << "memcpy_s failed";
715 UNREACHABLE();
716 }
717 }
718 stateStackLen_--;
719 return state;
720 }
721 return nullptr;
722 }
723
ReAllocStack(uint32_t stackLen)724 void RegExpExecutor::ReAllocStack(uint32_t stackLen)
725 {
726 if (stackLen > stateStackSize_) {
727 uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE); // 2: double the size
728 uint32_t stackByteSize = newStackSize * stateSize_;
729 auto newStack = chunk_->NewArray<uint8_t>(stackByteSize);
730 if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) {
731 LOG_ECMA(FATAL) << "memset_s failed";
732 UNREACHABLE();
733 }
734 if (stateStack_ != nullptr) {
735 size_t stackSize = stateStackSize_ * stateSize_;
736 if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) {
737 return;
738 }
739 }
740 stateStack_ = newStack;
741 stateStackSize_ = newStackSize;
742 }
743 }
744 } // namespace panda::ecmascript
745