• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "regexp_8.h"
17 
18 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
19 #define PCRE2_CODE_UNIT_WIDTH 8
20 #include "pcre2.h"
21 
22 #include "plugins/ets/runtime/ets_exceptions.h"
23 
24 #include <utility>
25 
26 namespace ark::ets {
27 
28 constexpr int PCRE2_MATCH_DATA_UNIT_WIDTH = 2;
29 constexpr int PCRE2_CHARACTER_WIDTH = 1;
30 constexpr int PCRE2_GROUPS_NAME_ENTRY_SHIFT = 3;
31 
CreatePcre2Object(const uint8_t * patternStr,uint32_t flags,uint32_t extraFlags,const int len)32 Pcre2Obj RegExp8::CreatePcre2Object(const uint8_t *patternStr, uint32_t flags, uint32_t extraFlags, const int len)
33 {
34     int errorNumber;
35     PCRE2_SPTR pattern = static_cast<PCRE2_SPTR>(patternStr);
36     PCRE2_SIZE errorOffset;
37     auto *compileContext = pcre2_compile_context_create(nullptr);
38     pcre2_set_compile_extra_options(compileContext, extraFlags);
39     auto re = pcre2_compile(pattern, len, flags, &errorNumber, &errorOffset, compileContext);
40     pcre2_compile_context_free(compileContext);
41     return reinterpret_cast<Pcre2Obj>(re);
42 }
43 
Execute(Pcre2Obj re,const uint8_t * str,const int len,const int startOffset)44 RegExpExecResult RegExp8::Execute(Pcre2Obj re, const uint8_t *str, const int len, const int startOffset)
45 {
46     auto *expr = reinterpret_cast<pcre2_code *>(re);
47     auto *matchData = pcre2_match_data_create_from_pattern(expr, nullptr);
48     PandaVector<std::pair<bool, PandaString>> captures;
49     PandaVector<std::pair<uint32_t, uint32_t>> indices;
50     auto resultCount = pcre2_match(expr, str, len, startOffset, 0, matchData, nullptr);
51     auto *ovector = pcre2_get_ovector_pointer(matchData);
52     RegExpExecResult result;
53     result.isWide = false;
54     if (resultCount < 0) {
55         result.isSuccess = false;
56         pcre2_match_data_free(matchData);
57         return result;
58     }
59 
60     const auto lastIndex = resultCount * PCRE2_MATCH_DATA_UNIT_WIDTH;
61     for (decltype(resultCount) i = 0; i < lastIndex; i += PCRE2_MATCH_DATA_UNIT_WIDTH) {
62         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
63         const auto substringStart = ovector[i];
64         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
65         const auto substringEnd = ovector[i + 1];
66         indices.emplace_back(
67             std::make_pair(static_cast<uint32_t>(substringStart), static_cast<uint32_t>(substringEnd)));
68         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
69         auto res = PandaString(reinterpret_cast<const char *>(str + substringStart), substringEnd - substringStart);
70         captures.push_back({true, res});
71     }
72 
73     int nameCount;
74     pcre2_pattern_info(expr, PCRE2_INFO_NAMECOUNT, &nameCount);
75 
76     if (nameCount > 0) {
77         RegExp8::ExtractGroups(re, nameCount, result, reinterpret_cast<void *>(ovector));
78     }
79 
80     result.isSuccess = true;
81     result.captures = std::move(captures);
82     result.indices = std::move(indices);
83     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
84     result.index = ovector[0];
85     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
86     result.endIndex = ovector[1];
87     int groupCount = pcre2_get_ovector_count(matchData);
88     while (static_cast<int>(result.captures.size()) < groupCount) {
89         result.captures.push_back({false, PandaString()});
90         result.indices.push_back({0, 0});
91     }
92     pcre2_match_data_free(matchData);
93     return result;
94 }
95 
ExtractGroups(Pcre2Obj expression,int count,RegExpExecResult & result,void * data)96 void RegExp8::ExtractGroups(Pcre2Obj expression, int count, RegExpExecResult &result, void *data)
97 {
98     PCRE2_SPTR nameTable;
99     PCRE2_SPTR tabPtr;
100     int nameEntrySize;
101 
102     auto *expr = reinterpret_cast<pcre2_code *>(expression);
103     auto *ovector = reinterpret_cast<PCRE2_SIZE *>(data);
104 
105     pcre2_pattern_info(expr, PCRE2_INFO_NAMETABLE, &nameTable);
106     pcre2_pattern_info(expr, PCRE2_INFO_NAMEENTRYSIZE, &nameEntrySize);
107 
108     tabPtr = nameTable;
109     for (int i = 0; i < count; i++) {
110         auto n = static_cast<int32_t>(static_cast<PCRE2_UCHAR8>(tabPtr[0] << 8U) | tabPtr[1]);
111         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
112         auto index = static_cast<int32_t>(ovector[2 * n]);
113         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
114         auto endIndex = static_cast<int32_t>(ovector[2 * n + 1]);
115         auto tabConstCharPtr = reinterpret_cast<const char *>(tabPtr + 2);
116         size_t size = nameEntrySize - PCRE2_GROUPS_NAME_ENTRY_SHIFT;
117         while (size > 0) {
118             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
119             if (static_cast<uint8_t>(*(tabConstCharPtr + size - PCRE2_CHARACTER_WIDTH)) != 0) {
120                 break;
121             }
122             size -= PCRE2_CHARACTER_WIDTH;
123         }
124         auto key = PandaString(reinterpret_cast<const char *>(tabPtr + 2), size);
125         result.namedGroups[key] = {index, endIndex};
126         tabPtr += nameEntrySize;
127     }
128 }
129 
FreePcre2Object(Pcre2Obj re)130 void RegExp8::FreePcre2Object(Pcre2Obj re)
131 {
132     pcre2_code_free(reinterpret_cast<pcre2_code *>(re));
133 }
134 
135 }  // namespace ark::ets
136