1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp_8.h"
17
18 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
19 #define PCRE2_CODE_UNIT_WIDTH 8
20 #include "pcre2.h"
21
22 #include "plugins/ets/runtime/ets_exceptions.h"
23
24 #include <utility>
25
26 namespace ark::ets {
27
28 constexpr int PCRE2_MATCH_DATA_UNIT_WIDTH = 2;
29 constexpr int PCRE2_CHARACTER_WIDTH = 1;
30 constexpr int PCRE2_GROUPS_NAME_ENTRY_SHIFT = 3;
31
CreatePcre2Object(const uint8_t * patternStr,uint32_t flags,uint32_t extraFlags,const int len)32 Pcre2Obj RegExp8::CreatePcre2Object(const uint8_t *patternStr, uint32_t flags, uint32_t extraFlags, const int len)
33 {
34 int errorNumber;
35 PCRE2_SPTR pattern = static_cast<PCRE2_SPTR>(patternStr);
36 PCRE2_SIZE errorOffset;
37 auto *compileContext = pcre2_compile_context_create(nullptr);
38 pcre2_set_compile_extra_options(compileContext, extraFlags);
39 auto re = pcre2_compile(pattern, len, flags, &errorNumber, &errorOffset, compileContext);
40 pcre2_compile_context_free(compileContext);
41 return reinterpret_cast<Pcre2Obj>(re);
42 }
43
Execute(Pcre2Obj re,const uint8_t * str,const int len,const int startOffset)44 RegExpExecResult RegExp8::Execute(Pcre2Obj re, const uint8_t *str, const int len, const int startOffset)
45 {
46 auto *expr = reinterpret_cast<pcre2_code *>(re);
47 auto *matchData = pcre2_match_data_create_from_pattern(expr, nullptr);
48 PandaVector<std::pair<bool, PandaString>> captures;
49 PandaVector<std::pair<uint32_t, uint32_t>> indices;
50 auto resultCount = pcre2_match(expr, str, len, startOffset, 0, matchData, nullptr);
51 auto *ovector = pcre2_get_ovector_pointer(matchData);
52 RegExpExecResult result;
53 result.isWide = false;
54 if (resultCount < 0) {
55 result.isSuccess = false;
56 pcre2_match_data_free(matchData);
57 return result;
58 }
59
60 const auto lastIndex = resultCount * PCRE2_MATCH_DATA_UNIT_WIDTH;
61 for (decltype(resultCount) i = 0; i < lastIndex; i += PCRE2_MATCH_DATA_UNIT_WIDTH) {
62 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
63 const auto substringStart = ovector[i];
64 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
65 const auto substringEnd = ovector[i + 1];
66 indices.emplace_back(
67 std::make_pair(static_cast<uint32_t>(substringStart), static_cast<uint32_t>(substringEnd)));
68 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
69 auto res = PandaString(reinterpret_cast<const char *>(str + substringStart), substringEnd - substringStart);
70 captures.push_back({true, res});
71 }
72
73 int nameCount;
74 pcre2_pattern_info(expr, PCRE2_INFO_NAMECOUNT, &nameCount);
75
76 if (nameCount > 0) {
77 RegExp8::ExtractGroups(re, nameCount, result, reinterpret_cast<void *>(ovector));
78 }
79
80 result.isSuccess = true;
81 result.captures = std::move(captures);
82 result.indices = std::move(indices);
83 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
84 result.index = ovector[0];
85 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
86 result.endIndex = ovector[1];
87 int groupCount = pcre2_get_ovector_count(matchData);
88 while (static_cast<int>(result.captures.size()) < groupCount) {
89 result.captures.push_back({false, PandaString()});
90 result.indices.push_back({0, 0});
91 }
92 pcre2_match_data_free(matchData);
93 return result;
94 }
95
ExtractGroups(Pcre2Obj expression,int count,RegExpExecResult & result,void * data)96 void RegExp8::ExtractGroups(Pcre2Obj expression, int count, RegExpExecResult &result, void *data)
97 {
98 PCRE2_SPTR nameTable;
99 PCRE2_SPTR tabPtr;
100 int nameEntrySize;
101
102 auto *expr = reinterpret_cast<pcre2_code *>(expression);
103 auto *ovector = reinterpret_cast<PCRE2_SIZE *>(data);
104
105 pcre2_pattern_info(expr, PCRE2_INFO_NAMETABLE, &nameTable);
106 pcre2_pattern_info(expr, PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
107
108 tabPtr = nameTable;
109 for (int i = 0; i < count; i++) {
110 auto n = static_cast<int32_t>(static_cast<PCRE2_UCHAR8>(tabPtr[0] << 8U) | tabPtr[1]);
111 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
112 auto index = static_cast<int32_t>(ovector[2 * n]);
113 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
114 auto endIndex = static_cast<int32_t>(ovector[2 * n + 1]);
115 auto tabConstCharPtr = reinterpret_cast<const char *>(tabPtr + 2);
116 size_t size = nameEntrySize - PCRE2_GROUPS_NAME_ENTRY_SHIFT;
117 while (size > 0) {
118 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
119 if (static_cast<uint8_t>(*(tabConstCharPtr + size - PCRE2_CHARACTER_WIDTH)) != 0) {
120 break;
121 }
122 size -= PCRE2_CHARACTER_WIDTH;
123 }
124 auto key = PandaString(reinterpret_cast<const char *>(tabPtr + 2), size);
125 result.namedGroups[key] = {index, endIndex};
126 tabPtr += nameEntrySize;
127 }
128 }
129
FreePcre2Object(Pcre2Obj re)130 void RegExp8::FreePcre2Object(Pcre2Obj re)
131 {
132 pcre2_code_free(reinterpret_cast<pcre2_code *>(re));
133 }
134
135 } // namespace ark::ets
136