• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_executor.h"
17 
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/regexp/dyn_chunk.h"
20 #include "ecmascript/regexp/regexp_opcode.h"
21 #include "securec.h"
22 
23 namespace panda::ecmascript {
24 using RegExpState = RegExpExecutor::RegExpState;
25 using MatchResult = RegExpExecutor::MatchResult;
Execute(const uint8_t * input,uint32_t lastIndex,uint32_t length,uint8_t * buf,bool isWideChar)26 bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar)
27 {
28     DynChunk buffer(buf, chunk_);
29     input_ = const_cast<uint8_t *>(input);
30     inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
31     uint32_t size = buffer.GetU32(0);
32     nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET);
33     nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET);
34     flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET);
35     isWideChar_ = isWideChar;
36 
37     uint32_t captureResultSize = sizeof(CaptureState) * nCapture_;
38     uint32_t stackSize = sizeof(uintptr_t) * nStack_;
39     stateSize_ = sizeof(RegExpState) + captureResultSize + stackSize;
40     stateStackLen_ = 0;
41 
42     if (captureResultSize != 0) {
43         captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_);
44         if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) {
45             LOG_ECMA(FATAL) << "memset_s failed";
46             UNREACHABLE();
47         }
48     }
49     if (stackSize != 0) {
50         stack_ = chunk_->NewArray<uintptr_t>(nStack_);
51         if (memset_s(stack_, stackSize, 0, stackSize) != EOK) {
52             LOG_ECMA(FATAL) << "memset_s failed";
53             UNREACHABLE();
54         }
55     }
56     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
57     SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE));
58     SetCurrentPC(RegExpParser::OP_START_OFFSET);
59 
60     // first split
61     if ((flags_ & RegExpParser::FLAG_STICKY) == 0) {
62         PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
63     }
64     return ExecuteInternal(buffer, size);
65 }
66 
MatchFailed(bool isMatched)67 bool RegExpExecutor::MatchFailed(bool isMatched)
68 {
69     while (true) {
70         if (stateStackLen_ == 0) {
71             return true;
72         }
73         RegExpState *state = PeekRegExpState();
74         if (state->type_ == StateType::STATE_SPLIT) {
75             if (!isMatched) {
76                 PopRegExpState();
77                 return false;
78             }
79         } else {
80             isMatched = (state->type_ == StateType::STATE_MATCH_AHEAD && isMatched) ||
81                         (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD && !isMatched);
82             if (isMatched) {
83                 if (state->type_ == StateType::STATE_MATCH_AHEAD) {
84                     PopRegExpState(false);
85                     return false;
86                 }
87                 if (state->type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD) {
88                     PopRegExpState();
89                     return false;
90                 }
91             }
92         }
93         DropRegExpState();
94     }
95 
96     return true;
97 }
98 
HandleFirstSplit()99 bool RegExpExecutor::HandleFirstSplit()
100 {
101     if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
102         (flags_ & RegExpParser::FLAG_STICKY) == 0) {
103         if (IsEOF()) {
104             if (MatchFailed()) {
105                 return false;
106             }
107         } else {
108             AdvanceCurrentPtr();
109             PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
110         }
111     }
112     return true;
113 }
114 
HandleOpAll(uint8_t opCode)115 bool RegExpExecutor::HandleOpAll(uint8_t opCode)
116 {
117     if (IsEOF()) {
118         return !MatchFailed();
119     }
120     uint32_t currentChar = GetCurrentChar();
121     if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
122         return !MatchFailed();
123     }
124     Advance(opCode);
125     return true;
126 }
127 
HandleOpChar(const DynChunk & byteCode,uint8_t opCode)128 bool RegExpExecutor::HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
129 {
130     uint32_t expectedChar;
131     if (opCode == RegExpOpCode::OP_CHAR32) {
132         expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
133     } else {
134         expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
135     }
136     if (IsEOF()) {
137         return !MatchFailed();
138     }
139     uint32_t currentChar = GetCurrentChar();
140     if (IsIgnoreCase()) {
141         currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
142     }
143     if (currentChar == expectedChar) {
144         Advance(opCode);
145     } else {
146         if (MatchFailed()) {
147             return false;
148         }
149     }
150     return true;
151 }
152 
HandleOpWordBoundary(uint8_t opCode)153 bool RegExpExecutor::HandleOpWordBoundary(uint8_t opCode)
154 {
155     if (IsEOF()) {
156         if (opCode == RegExpOpCode::OP_WORD_BOUNDARY) {
157             Advance(opCode);
158         } else {
159             if (MatchFailed()) {
160                 return false;
161             }
162         }
163         return true;
164     }
165     bool preIsWord = false;
166     if (GetCurrentPtr() != input_) {
167         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
168         preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
169     }
170     bool currentIsWord = IsWordChar(PeekChar(currentPtr_, inputEnd_));
171     if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
172         ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
173         ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
174         ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
175         Advance(opCode);
176     } else {
177         if (MatchFailed()) {
178             return false;
179         }
180     }
181     return true;
182 }
183 
HandleOpLineStart(uint8_t opCode)184 bool RegExpExecutor::HandleOpLineStart(uint8_t opCode)
185 {
186     if (IsEOF()) {
187         return !MatchFailed();
188     }
189     if ((GetCurrentPtr() == input_) ||
190         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
191         ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
192         Advance(opCode);
193     } else {
194         if (MatchFailed()) {
195             return false;
196         }
197     }
198     return true;
199 }
200 
HandleOpLineEnd(uint8_t opCode)201 bool RegExpExecutor::HandleOpLineEnd(uint8_t opCode)
202 {
203     if (IsEOF() ||
204         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
205         ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekChar(currentPtr_, inputEnd_) == '\n')) {
206         Advance(opCode);
207     } else {
208         if (MatchFailed()) {
209             return false;
210         }
211     }
212     return true;
213 }
214 
HandleOpSaveStart(const DynChunk & byteCode,uint8_t opCode)215 void RegExpExecutor::HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
216 {
217     uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
218     ASSERT(captureIndex < nCapture_);
219     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
220     CaptureState *captureState = &captureResultList_[captureIndex];
221     captureState->captureStart = GetCurrentPtr();
222     Advance(opCode);
223 }
224 
HandleOpSaveEnd(const DynChunk & byteCode,uint8_t opCode)225 void RegExpExecutor::HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
226 {
227     uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
228     ASSERT(captureIndex < nCapture_);
229     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
230     CaptureState *captureState = &captureResultList_[captureIndex];
231     captureState->captureEnd = GetCurrentPtr();
232     Advance(opCode);
233 }
234 
HandleOpSaveReset(const DynChunk & byteCode,uint8_t opCode)235 void RegExpExecutor::HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
236 {
237     uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
238     uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
239     for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
240         CaptureState *captureState =
241             &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
242         captureState->captureStart = nullptr;
243         captureState->captureEnd = nullptr;
244     }
245     Advance(opCode);
246 }
247 
HandleOpMatch(const DynChunk & byteCode,uint8_t opCode)248 void RegExpExecutor::HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
249 {
250     auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
251     ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
252     uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
253     Advance(opCode);
254     uint32_t splitPc = GetCurrentPC() + offset;
255     PushRegExpState(type, splitPc);
256 }
257 
HandleOpSplitFirst(const DynChunk & byteCode,uint8_t opCode)258 void RegExpExecutor::HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
259 {
260     uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
261     Advance(opCode);
262     PushRegExpState(STATE_SPLIT, GetCurrentPC());
263     AdvanceOffset(offset);
264 }
265 
HandleOpPrev(uint8_t opCode)266 bool RegExpExecutor::HandleOpPrev(uint8_t opCode)
267 {
268     if (GetCurrentPtr() == input_) {
269         if (MatchFailed()) {
270             return false;
271         }
272     } else {
273         PrevPtr(&currentPtr_, input_);
274         Advance(opCode);
275     }
276     return true;
277 }
278 
HandleOpLoop(const DynChunk & byteCode,uint8_t opCode)279 void RegExpExecutor::HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
280 {
281     uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
282     uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
283     uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
284     Advance(opCode);
285     uint32_t loopPcEnd = GetCurrentPC();
286     uint32_t loopPcStart = GetCurrentPC() + pcOffset;
287     bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
288     uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
289 
290     uint32_t loopCount = PeekStack();
291     SetStackValue(++loopCount);
292     if (loopCount < loopMax) {
293         // greedy failed, goto next
294         if (loopCount >= quantifyMin) {
295             PushRegExpState(STATE_SPLIT, loopPcEnd);
296         }
297         // Goto loop start
298         SetCurrentPC(loopPcStart);
299     } else {
300         if (!isGreedy && (loopCount < quantifyMax)) {
301             PushRegExpState(STATE_SPLIT, loopPcStart);
302         }
303     }
304 }
305 
HandleOpRange32(const DynChunk & byteCode)306 bool RegExpExecutor::HandleOpRange32(const DynChunk &byteCode)
307 {
308     if (IsEOF()) {
309         return !MatchFailed();
310     }
311     uint32_t currentChar = GetCurrentChar();
312     if (IsIgnoreCase()) {
313         currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
314     }
315     uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
316     bool isFound = false;
317     int32_t idxMin = 0;
318     int32_t idxMax = rangeCount - 1;
319     int32_t idx = 0;
320     uint32_t low = 0;
321     uint32_t high =
322         byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET + RANGE32_MAX_HALF_OFFSET);
323     if (currentChar <= high) {
324         while (idxMin <= idxMax) {
325             idx = (idxMin + idxMax) / RANGE32_OFFSET;
326             low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET);
327             high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_OFFSET +
328                 RANGE32_MAX_HALF_OFFSET);
329             if (currentChar < low) {
330                 idxMax = idx - 1;
331             } else if (currentChar > high) {
332                 idxMin = idx + 1;
333             } else {
334                 isFound = true;
335                 break;
336             }
337         }
338     }
339     if (isFound) {
340         AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
341     } else {
342         if (MatchFailed()) {
343             return false;
344         }
345     }
346     return true;
347 }
348 
HandleOpRange(const DynChunk & byteCode)349 bool RegExpExecutor::HandleOpRange(const DynChunk &byteCode)
350 {
351     if (IsEOF()) {
352         return !MatchFailed();
353     }
354     uint32_t currentChar = GetCurrentChar();
355     if (IsIgnoreCase()) {
356         currentChar = RegExpParser::Canonicalize(currentChar, IsUtf16());
357     }
358     uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
359     bool isFound = false;
360     int32_t idxMin = 0;
361     int32_t idxMax = rangeCount - 1;
362     int32_t idx = 0;
363     uint32_t low = 0;
364     uint32_t high =
365         byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
366     if (currentChar <= high) {
367         while (idxMin <= idxMax) {
368             idx = (idxMin + idxMax) / RANGE32_OFFSET;
369             low = byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET);
370             high =
371                 byteCode.GetU16(GetCurrentPC() + RANGE32_HEAD_OFFSET + idx * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
372             if (currentChar < low) {
373                 idxMax = idx - 1;
374             } else if (currentChar > high) {
375                 idxMin = idx + 1;
376             } else {
377                 isFound = true;
378                 break;
379             }
380         }
381     }
382     if (isFound) {
383         AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
384     } else {
385         if (MatchFailed()) {
386             return false;
387         }
388     }
389     return true;
390 }
391 
HandleOpBackReference(const DynChunk & byteCode,uint8_t opCode)392 bool RegExpExecutor::HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
393 {
394     uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
395     if (captureIndex >= nCapture_) {
396         return !MatchFailed();
397     }
398     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
399     const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
400     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
401     const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
402     if (captureStart == nullptr || captureEnd == nullptr) {
403         Advance(opCode);
404         return true;
405     }
406     bool isMatched = true;
407     if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
408         const uint8_t *refCptr = captureStart;
409         while (refCptr < captureEnd) {
410             if (IsEOF()) {
411                 isMatched = false;
412                 break;
413             }
414             // NOLINTNEXTLINE(readability-identifier-naming)
415             uint32_t c1 = GetChar(&refCptr, captureEnd);
416             // NOLINTNEXTLINE(readability-identifier-naming)
417             uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
418             if (IsIgnoreCase()) {
419                 c1 = RegExpParser::Canonicalize(c1, IsUtf16());
420                 c2 = RegExpParser::Canonicalize(c2, IsUtf16());
421             }
422             if (c1 != c2) {
423                 isMatched = false;
424                 break;
425             }
426         }
427         if (!isMatched) {
428             if (MatchFailed()) {
429                 return false;
430             }
431         } else {
432             Advance(opCode);
433         }
434     } else {
435         const uint8_t *refCptr = captureEnd;
436         while (refCptr > captureStart) {
437             if (GetCurrentPtr() == input_) {
438                 isMatched = false;
439                 break;
440             }
441             // NOLINTNEXTLINE(readability-identifier-naming)
442             uint32_t c1 = GetPrevChar(&refCptr, captureStart);
443             // NOLINTNEXTLINE(readability-identifier-naming)
444             uint32_t c2 = GetPrevChar(&currentPtr_, input_);
445             if (IsIgnoreCase()) {
446                 c1 = RegExpParser::Canonicalize(c1, IsUtf16());
447                 c2 = RegExpParser::Canonicalize(c2, IsUtf16());
448             }
449             if (c1 != c2) {
450                 isMatched = false;
451                 break;
452             }
453         }
454         if (!isMatched) {
455             if (MatchFailed()) {
456                 return false;
457             }
458         } else {
459             Advance(opCode);
460         }
461     }
462     return true;
463 }
464 
465 // NOLINTNEXTLINE(readability-function-size)
ExecuteInternal(const DynChunk & byteCode,uint32_t pcEnd)466 bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd)
467 {
468     while (GetCurrentPC() < pcEnd) {
469         // first split
470         if (!HandleFirstSplit()) {
471             return false;
472         }
473         uint8_t opCode = byteCode.GetU8(GetCurrentPC());
474         switch (opCode) {
475             case RegExpOpCode::OP_DOTS:
476             case RegExpOpCode::OP_ALL: {
477                 if (!HandleOpAll(opCode)) {
478                     return false;
479                 }
480                 break;
481             }
482             case RegExpOpCode::OP_CHAR32:
483             case RegExpOpCode::OP_CHAR: {
484                 if (!HandleOpChar(byteCode, opCode)) {
485                     return false;
486                 }
487                 break;
488             }
489             case RegExpOpCode::OP_NOT_WORD_BOUNDARY:
490             case RegExpOpCode::OP_WORD_BOUNDARY: {
491                 if (!HandleOpWordBoundary(opCode)) {
492                     return false;
493                 }
494                 break;
495             }
496             case RegExpOpCode::OP_LINE_START: {
497                 if (!HandleOpLineStart(opCode)) {
498                     return false;
499                 }
500                 break;
501             }
502             case RegExpOpCode::OP_LINE_END: {
503                 if (!HandleOpLineEnd(opCode)) {
504                     return false;
505                 }
506                 break;
507             }
508             case RegExpOpCode::OP_SAVE_START: {
509                 HandleOpSaveStart(byteCode, opCode);
510                 break;
511             }
512             case RegExpOpCode::OP_SAVE_END: {
513                 HandleOpSaveEnd(byteCode, opCode);
514                 break;
515             }
516             case RegExpOpCode::OP_GOTO: {
517                 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
518                 Advance(opCode, offset);
519                 break;
520             }
521             case RegExpOpCode::OP_MATCH: {
522                 // jump to match ahead
523                 if (MatchFailed(true)) {
524                     return false;
525                 }
526                 break;
527             }
528             case RegExpOpCode::OP_MATCH_END:
529                 return true;
530             case RegExpOpCode::OP_SAVE_RESET:
531                 HandleOpSaveReset(byteCode, opCode);
532                 break;
533             case RegExpOpCode::OP_SPLIT_NEXT:
534             case RegExpOpCode::OP_MATCH_AHEAD:
535             case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD:
536                 HandleOpMatch(byteCode, opCode);
537                 break;
538             case RegExpOpCode::OP_SPLIT_FIRST:
539                 HandleOpSplitFirst(byteCode, opCode);
540                 break;
541             case RegExpOpCode::OP_PREV: {
542                 if (!HandleOpPrev(opCode)) {
543                     return false;
544                 }
545                 break;
546             }
547             case RegExpOpCode::OP_LOOP_GREEDY:
548             case RegExpOpCode::OP_LOOP:
549                 HandleOpLoop(byteCode, opCode);
550                 break;
551             case RegExpOpCode::OP_PUSH_CHAR:
552                 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr()));
553                 Advance(opCode);
554                 break;
555             case RegExpOpCode::OP_CHECK_CHAR: {
556                 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) {
557                     Advance(opCode);
558                 } else {
559                     uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
560                     Advance(opCode, offset);
561                 }
562                 break;
563             }
564             case RegExpOpCode::OP_PUSH:
565                 PushStack(0);
566                 Advance(opCode);
567                 break;
568             case RegExpOpCode::OP_POP:
569                 PopStack();
570                 Advance(opCode);
571                 break;
572             case RegExpOpCode::OP_RANGE32: {
573                 if (!HandleOpRange32(byteCode)) {
574                     return false;
575                 }
576                 break;
577             }
578             case RegExpOpCode::OP_RANGE: {
579                 if (!HandleOpRange(byteCode)) {
580                     return false;
581                 }
582                 break;
583             }
584             case RegExpOpCode::OP_BACKREFERENCE:
585             case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: {
586                 if (!HandleOpBackReference(byteCode, opCode)) {
587                     return false;
588                 }
589                 break;
590             }
591             default:
592                 UNREACHABLE();
593         }
594     }
595     // for loop match
596     return true;
597 }
598 
DumpResult(std::ostream & out) const599 void RegExpExecutor::DumpResult(std::ostream &out) const
600 {
601     out << "captures:" << std::endl;
602     for (uint32_t i = 0; i < nCapture_; i++) {
603         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
604         CaptureState *captureState = &captureResultList_[i];
605         int32_t len = captureState->captureEnd - captureState->captureStart;
606         if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
607             out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl;
608         } else {
609             out << i << ":\t"
610                 << "undefined" << std::endl;
611         }
612     }
613 }
614 
GetResult(const JSThread * thread,bool isSuccess) const615 MatchResult RegExpExecutor::GetResult(const JSThread *thread, bool isSuccess) const
616 {
617     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
618     MatchResult result;
619     std::vector<std::pair<bool, JSHandle<EcmaString>>> captures;
620     result.isSuccess_ = isSuccess;
621     if (isSuccess) {
622         for (uint32_t i = 0; i < nCapture_; i++) {
623             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
624             CaptureState *captureState = &captureResultList_[i];
625             if (i == 0) {
626                 result.index_ = captureState->captureStart - input_;
627                 if (isWideChar_) {
628                     result.index_ /= WIDE_CHAR_SIZE;
629                 }
630             }
631             int32_t len = captureState->captureEnd - captureState->captureStart;
632             std::pair<bool, JSHandle<EcmaString>> pair;
633             if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
634                 pair.first = false;
635                 if (isWideChar_) {
636                     // create utf-16 string
637                     pair.second = factory->NewFromUtf16(
638                         reinterpret_cast<const uint16_t *>(captureState->captureStart), len / 2);
639                 } else {
640                     // create utf-8 string
641                     CVector<uint8_t> buffer(len + 1);
642                     uint8_t *dest = buffer.data();
643                     if (memcpy_s(dest, len + 1, reinterpret_cast<const uint8_t *>(captureState->captureStart), len) !=
644                         EOK) {
645                         LOG_ECMA(FATAL) << "memcpy_s failed";
646                         UNREACHABLE();
647                     }
648                     dest[len] = '\0';  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
649                     pair.second =
650                         factory->NewFromUtf8(reinterpret_cast<const uint8_t *>(buffer.data()), len);
651                 }
652             } else {
653                 // undefined
654                 pair.first = true;
655             }
656             captures.emplace_back(pair);
657         }
658         result.captures_ = captures;
659         result.endIndex_ = currentPtr_ - input_;
660         if (isWideChar_) {
661             result.endIndex_ /= WIDE_CHAR_SIZE;
662         }
663     }
664     return result;
665 }
666 
PushRegExpState(StateType type,uint32_t pc)667 void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc)
668 {
669     ReAllocStack(stateStackLen_ + 1);
670     auto state = reinterpret_cast<RegExpState *>(
671         stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
672         stateStackLen_ * stateSize_);
673     state->type_ = type;
674     state->currentPc_ = pc;
675     state->currentStack_ = currentStack_;
676     state->currentPtr_ = GetCurrentPtr();
677     size_t listSize = sizeof(CaptureState) * nCapture_;
678     if (memcpy_s(state->captureResultList_, listSize, GetCaptureResultList(), listSize) != EOK) {
679         LOG_ECMA(FATAL) << "memcpy_s failed";
680         UNREACHABLE();
681     }
682     uint8_t *stackStart =
683         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
684         reinterpret_cast<uint8_t *>(state->captureResultList_) + sizeof(CaptureState) * nCapture_;
685     if (stack_ != nullptr) {
686         size_t stackSize = sizeof(uintptr_t) * nStack_;
687         if (memcpy_s(stackStart, stackSize, stack_, stackSize) != EOK) {
688             LOG_ECMA(FATAL) << "memcpy_s failed";
689             UNREACHABLE();
690         }
691     }
692     stateStackLen_++;
693 }
694 
PopRegExpState(bool copyCaptrue)695 RegExpState *RegExpExecutor::PopRegExpState(bool copyCaptrue)
696 {
697     if (stateStackLen_ != 0) {
698         auto state = PeekRegExpState();
699         size_t listSize = sizeof(CaptureState) * nCapture_;
700         if (copyCaptrue) {
701             if (memcpy_s(GetCaptureResultList(), listSize, state->captureResultList_, listSize) != EOK) {
702                 LOG_ECMA(FATAL) << "memcpy_s failed";
703                 UNREACHABLE();
704             }
705         }
706         SetCurrentPtr(state->currentPtr_);
707         SetCurrentPC(state->currentPc_);
708         currentStack_ = state->currentStack_;
709         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
710         uint8_t *stackStart = reinterpret_cast<uint8_t *>(state->captureResultList_) + listSize;
711         if (stack_ != nullptr) {
712             size_t stackSize = sizeof(uintptr_t) * nStack_;
713             if (memcpy_s(stack_, stackSize, stackStart, stackSize) != EOK) {
714                 LOG_ECMA(FATAL) << "memcpy_s failed";
715                 UNREACHABLE();
716             }
717         }
718         stateStackLen_--;
719         return state;
720     }
721     return nullptr;
722 }
723 
ReAllocStack(uint32_t stackLen)724 void RegExpExecutor::ReAllocStack(uint32_t stackLen)
725 {
726     if (stackLen > stateStackSize_) {
727         uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE);  // 2: double the size
728         uint32_t stackByteSize = newStackSize * stateSize_;
729         auto newStack = chunk_->NewArray<uint8_t>(stackByteSize);
730         if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) {
731             LOG_ECMA(FATAL) << "memset_s failed";
732             UNREACHABLE();
733         }
734         if (stateStack_ != nullptr) {
735             size_t stackSize = stateStackSize_ * stateSize_;
736             if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) {
737                 return;
738             }
739         }
740         stateStack_ = newStack;
741         stateStackSize_ = newStackSize;
742     }
743 }
744 }  // namespace panda::ecmascript
745