1 /**
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "plugins/ets/runtime/regexp/regexp_executor.h"
17 #include "include/coretypes/string.h"
18 #include "include/mem/panda_string.h"
19 #include "runtime/handle_scope-inl.h"
20 #include "types/ets_array.h"
21
22 namespace ark::ets {
23
GetIndices(CaptureState * captureState) const24 std::pair<uint32_t, uint32_t> RegExpExecutor::GetIndices(CaptureState *captureState) const
25 {
26 uint8_t *begin = GetInputPtr();
27 uint32_t start = captureState->captureStart - begin;
28 uint32_t end = captureState->captureEnd - begin;
29 if (IsWideChar()) {
30 return {start / WIDE_CHAR_SIZE, end / WIDE_CHAR_SIZE};
31 }
32 return {start, end};
33 }
34
HandleCaptures(PandaVector<std::pair<bool,PandaString>> & captures,PandaVector<std::pair<uint32_t,uint32_t>> & indices,CaptureState * captureState) const35 void RegExpExecutor::HandleCaptures(PandaVector<std::pair<bool, PandaString>> &captures,
36 PandaVector<std::pair<uint32_t, uint32_t>> &indices,
37 CaptureState *captureState) const
38 {
39 int32_t len = captureState->captureEnd - captureState->captureStart;
40 PandaString res;
41 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) {
42 if (IsWideChar()) {
43 // create utf-16
44 res = PandaString(reinterpret_cast<const char *>(captureState->captureStart), len);
45 } else {
46 // create utf-8 string
47 PandaVector<uint8_t> buffer(len + 1);
48 uint8_t *dest = buffer.data();
49 if (memcpy_s(dest, len + 1, reinterpret_cast<const uint8_t *>(captureState->captureStart), len) != EOK) {
50 LOG(FATAL, COMMON) << "memcpy_s failed";
51 UNREACHABLE();
52 }
53 dest[len] = '\0'; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
54 res = PandaString(reinterpret_cast<const char *>(buffer.data()), len);
55 }
56 captures.push_back({true, res});
57 indices.emplace_back(GetIndices(captureState));
58 }
59 }
60
GetResult(bool isSuccess,bool hasIndices) const61 RegExpExecResult RegExpExecutor::GetResult(bool isSuccess, bool hasIndices) const
62 {
63 RegExpExecResult result;
64 PandaVector<std::pair<bool, PandaString>> captures;
65 PandaVector<std::pair<uint32_t, uint32_t>> indices;
66 result.isSuccess = isSuccess;
67 result.isWide = IsWideChar();
68 if (!isSuccess) {
69 return result;
70 }
71 for (uint32_t i = 0; i < GetCaptureCount(); i++) {
72 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
73 CaptureState *captureState = &GetCaptureResultList()[i];
74 if (i == 0) {
75 result.index = captureState->captureStart - GetInputPtr();
76 if (IsWideChar()) {
77 result.index /= WIDE_CHAR_SIZE;
78 }
79 }
80 HandleCaptures(captures, indices, captureState);
81 }
82 result.captures = std::move(captures);
83 if (hasIndices) {
84 result.indices = std::move(indices);
85 } else {
86 result.indices = {};
87 }
88 result.endIndex = GetCurrentPtr() - GetInputPtr();
89 if (IsWideChar()) {
90 result.endIndex /= WIDE_CHAR_SIZE;
91 }
92 return result;
93 }
94 } // namespace ark::ets
95