• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "regexp_16.h"
17 
18 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
19 #define PCRE2_CODE_UNIT_WIDTH 16
20 #include "pcre2.h"
21 
22 #include "plugins/ets/runtime/ets_exceptions.h"
23 
24 #include <utility>
25 
26 namespace ark::ets {
27 
28 constexpr int PCRE2_MATCH_DATA_UNIT_WIDTH = 2;
29 constexpr int PCRE2_CHARACTER_WIDTH = 2;
30 constexpr int PCRE2_GROUPS_NAME_ENTRY_SHIFT = 4;
31 
CreatePcre2Object(const uint16_t * patternStr,uint32_t flags,uint32_t extraFlags,const int len)32 Pcre2Obj RegExp16::CreatePcre2Object(const uint16_t *patternStr, uint32_t flags, uint32_t extraFlags, const int len)
33 {
34     int errorNumber;
35     PCRE2_SPTR pattern = static_cast<PCRE2_SPTR>(patternStr);
36     PCRE2_SIZE errorOffset;
37     auto *compileContext = pcre2_compile_context_create(nullptr);
38     pcre2_set_compile_extra_options(compileContext, extraFlags);
39     auto re = pcre2_compile(pattern, len, flags, &errorNumber, &errorOffset, compileContext);
40     pcre2_compile_context_free(compileContext);
41     return reinterpret_cast<Pcre2Obj>(re);
42 }
43 
Execute(Pcre2Obj re,const uint16_t * str,int len,const int startOffset)44 RegExpExecResult RegExp16::Execute(Pcre2Obj re, const uint16_t *str, int len, const int startOffset)
45 {
46     auto *expr = reinterpret_cast<pcre2_code *>(re);
47     auto *matchData = pcre2_match_data_create_from_pattern(expr, nullptr);
48     PandaVector<std::pair<bool, PandaString>> captures;
49     PandaVector<std::pair<uint32_t, uint32_t>> indices;
50     auto resultCount = pcre2_match(expr, str, len, startOffset, 0, matchData, nullptr);
51     auto *ovector = pcre2_get_ovector_pointer(matchData);
52 
53     RegExpExecResult result;
54     result.isWide = true;
55     if (resultCount < 0) {
56         result.isSuccess = false;
57         pcre2_match_data_free(matchData);
58         return result;
59     }
60     const auto lastIndex = resultCount * PCRE2_MATCH_DATA_UNIT_WIDTH;
61     for (decltype(resultCount) i = 0; i < lastIndex; i += PCRE2_MATCH_DATA_UNIT_WIDTH) {
62         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
63         const auto substringStart = ovector[i];
64         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
65         const auto substringEnd = ovector[i + 1];
66         indices.emplace_back(
67             std::make_pair(static_cast<uint32_t>(substringStart), static_cast<uint32_t>(substringEnd)));
68         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
69         auto res = PandaString(reinterpret_cast<const char *>(str + substringStart),
70                                (substringEnd - substringStart) * PCRE2_CHARACTER_WIDTH);
71         captures.push_back({true, res});
72     }
73 
74     int nameCount;
75     pcre2_pattern_info(expr, PCRE2_INFO_NAMECOUNT, &nameCount);
76 
77     if (nameCount > 0) {
78         RegExp16::ExtractGroups(re, nameCount, result, reinterpret_cast<void *>(ovector));
79     }
80 
81     result.isSuccess = true;
82     result.captures = std::move(captures);
83     result.indices = std::move(indices);
84     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
85     result.index = ovector[0];
86     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
87     result.endIndex = ovector[1];
88     int groupCount = pcre2_get_ovector_count(matchData);
89     while (static_cast<int>(result.captures.size()) < groupCount) {
90         result.captures.push_back({false, PandaString()});
91         result.indices.push_back({0, 0});
92     }
93     pcre2_match_data_free(matchData);
94     return result;
95 }
96 
ExtractGroups(Pcre2Obj expression,int count,RegExpExecResult & result,void * data)97 void RegExp16::ExtractGroups(Pcre2Obj expression, int count, RegExpExecResult &result, void *data)
98 {
99     PCRE2_SPTR nameTable;
100     PCRE2_SPTR tabPtr;
101     int nameEntrySize;
102 
103     auto *expr = reinterpret_cast<pcre2_code *>(expression);
104     auto *ovector = reinterpret_cast<PCRE2_SIZE *>(data);
105 
106     pcre2_pattern_info(expr, PCRE2_INFO_NAMETABLE, &nameTable);
107     pcre2_pattern_info(expr, PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
108 
109     tabPtr = nameTable;
110     for (int currentNameId = 0; currentNameId < count; currentNameId++) {
111         auto n = static_cast<int32_t>(tabPtr[0]);
112         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
113         auto index = static_cast<int32_t>(ovector[PCRE2_CHARACTER_WIDTH * n]);
114         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
115         auto endIndex = static_cast<int32_t>(ovector[PCRE2_CHARACTER_WIDTH * n + 1]);
116         auto tabConstCharPtr = reinterpret_cast<const char *>(tabPtr + 1);
117         size_t size = nameEntrySize * PCRE2_CHARACTER_WIDTH - PCRE2_GROUPS_NAME_ENTRY_SHIFT;
118         while (size > 0) {
119             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
120             if (static_cast<uint8_t>(*(tabConstCharPtr + size - PCRE2_CHARACTER_WIDTH)) != 0) {
121                 break;
122             }
123             size -= PCRE2_CHARACTER_WIDTH;
124         }
125         auto key16 = PandaString(tabConstCharPtr, size);
126         PandaString key;
127         key.reserve(key16.size() / PCRE2_CHARACTER_WIDTH);
128         for (size_t i = 0; i < key16.size(); i += PCRE2_CHARACTER_WIDTH) {
129             key += key16[i];
130         }
131         result.namedGroups[key] = {index, endIndex};
132         tabPtr += nameEntrySize;
133     }
134 }
135 
FreePcre2Object(Pcre2Obj re)136 void RegExp16::FreePcre2Object(Pcre2Obj re)
137 {
138     pcre2_code_free(reinterpret_cast<pcre2_code *>(re));
139 }
140 
141 }  // namespace ark::ets
142