1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp_16.h"
17
18 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
19 #define PCRE2_CODE_UNIT_WIDTH 16
20 #include "pcre2.h"
21
22 #include "plugins/ets/runtime/ets_exceptions.h"
23
24 #include <utility>
25
26 namespace ark::ets {
27
28 constexpr int PCRE2_MATCH_DATA_UNIT_WIDTH = 2;
29 constexpr int PCRE2_CHARACTER_WIDTH = 2;
30 constexpr int PCRE2_GROUPS_NAME_ENTRY_SHIFT = 4;
31
CreatePcre2Object(const uint16_t * patternStr,uint32_t flags,uint32_t extraFlags,const int len)32 Pcre2Obj RegExp16::CreatePcre2Object(const uint16_t *patternStr, uint32_t flags, uint32_t extraFlags, const int len)
33 {
34 int errorNumber;
35 PCRE2_SPTR pattern = static_cast<PCRE2_SPTR>(patternStr);
36 PCRE2_SIZE errorOffset;
37 auto *compileContext = pcre2_compile_context_create(nullptr);
38 pcre2_set_compile_extra_options(compileContext, extraFlags);
39 auto re = pcre2_compile(pattern, len, flags, &errorNumber, &errorOffset, compileContext);
40 pcre2_compile_context_free(compileContext);
41 return reinterpret_cast<Pcre2Obj>(re);
42 }
43
Execute(Pcre2Obj re,const uint16_t * str,int len,const int startOffset)44 RegExpExecResult RegExp16::Execute(Pcre2Obj re, const uint16_t *str, int len, const int startOffset)
45 {
46 auto *expr = reinterpret_cast<pcre2_code *>(re);
47 auto *matchData = pcre2_match_data_create_from_pattern(expr, nullptr);
48 PandaVector<std::pair<bool, PandaString>> captures;
49 PandaVector<std::pair<uint32_t, uint32_t>> indices;
50 auto resultCount = pcre2_match(expr, str, len, startOffset, 0, matchData, nullptr);
51 auto *ovector = pcre2_get_ovector_pointer(matchData);
52
53 RegExpExecResult result;
54 result.isWide = true;
55 if (resultCount < 0) {
56 result.isSuccess = false;
57 pcre2_match_data_free(matchData);
58 return result;
59 }
60 const auto lastIndex = resultCount * PCRE2_MATCH_DATA_UNIT_WIDTH;
61 for (decltype(resultCount) i = 0; i < lastIndex; i += PCRE2_MATCH_DATA_UNIT_WIDTH) {
62 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
63 const auto substringStart = ovector[i];
64 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
65 const auto substringEnd = ovector[i + 1];
66 indices.emplace_back(
67 std::make_pair(static_cast<uint32_t>(substringStart), static_cast<uint32_t>(substringEnd)));
68 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
69 auto res = PandaString(reinterpret_cast<const char *>(str + substringStart),
70 (substringEnd - substringStart) * PCRE2_CHARACTER_WIDTH);
71 captures.push_back({true, res});
72 }
73
74 int nameCount;
75 pcre2_pattern_info(expr, PCRE2_INFO_NAMECOUNT, &nameCount);
76
77 if (nameCount > 0) {
78 RegExp16::ExtractGroups(re, nameCount, result, reinterpret_cast<void *>(ovector));
79 }
80
81 result.isSuccess = true;
82 result.captures = std::move(captures);
83 result.indices = std::move(indices);
84 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
85 result.index = ovector[0];
86 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
87 result.endIndex = ovector[1];
88 int groupCount = pcre2_get_ovector_count(matchData);
89 while (static_cast<int>(result.captures.size()) < groupCount) {
90 result.captures.push_back({false, PandaString()});
91 result.indices.push_back({0, 0});
92 }
93 pcre2_match_data_free(matchData);
94 return result;
95 }
96
ExtractGroups(Pcre2Obj expression,int count,RegExpExecResult & result,void * data)97 void RegExp16::ExtractGroups(Pcre2Obj expression, int count, RegExpExecResult &result, void *data)
98 {
99 PCRE2_SPTR nameTable;
100 PCRE2_SPTR tabPtr;
101 int nameEntrySize;
102
103 auto *expr = reinterpret_cast<pcre2_code *>(expression);
104 auto *ovector = reinterpret_cast<PCRE2_SIZE *>(data);
105
106 pcre2_pattern_info(expr, PCRE2_INFO_NAMETABLE, &nameTable);
107 pcre2_pattern_info(expr, PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
108
109 tabPtr = nameTable;
110 for (int currentNameId = 0; currentNameId < count; currentNameId++) {
111 auto n = static_cast<int32_t>(tabPtr[0]);
112 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
113 auto index = static_cast<int32_t>(ovector[PCRE2_CHARACTER_WIDTH * n]);
114 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
115 auto endIndex = static_cast<int32_t>(ovector[PCRE2_CHARACTER_WIDTH * n + 1]);
116 auto tabConstCharPtr = reinterpret_cast<const char *>(tabPtr + 1);
117 size_t size = nameEntrySize * PCRE2_CHARACTER_WIDTH - PCRE2_GROUPS_NAME_ENTRY_SHIFT;
118 while (size > 0) {
119 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
120 if (static_cast<uint8_t>(*(tabConstCharPtr + size - PCRE2_CHARACTER_WIDTH)) != 0) {
121 break;
122 }
123 size -= PCRE2_CHARACTER_WIDTH;
124 }
125 auto key16 = PandaString(tabConstCharPtr, size);
126 PandaString key;
127 key.reserve(key16.size() / PCRE2_CHARACTER_WIDTH);
128 for (size_t i = 0; i < key16.size(); i += PCRE2_CHARACTER_WIDTH) {
129 key += key16[i];
130 }
131 result.namedGroups[key] = {index, endIndex};
132 tabPtr += nameEntrySize;
133 }
134 }
135
FreePcre2Object(Pcre2Obj re)136 void RegExp16::FreePcre2Object(Pcre2Obj re)
137 {
138 pcre2_code_free(reinterpret_cast<pcre2_code *>(re));
139 }
140
141 } // namespace ark::ets
142