1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp.h"
17
18 #include "regexp_8.h"
19 #include "regexp_16.h"
20 #include "plugins/ets/runtime/ets_exceptions.h"
21
22 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
23 #define PCRE2_CODE_UNIT_WIDTH 8
24 #include "pcre2.h"
25
26 namespace ark::ets {
27
Compile(const PandaVector<uint8_t> & pattern,const bool isUtf16,const int len)28 bool EtsRegExp::Compile(const PandaVector<uint8_t> &pattern, const bool isUtf16, const int len)
29 {
30 utf16_ |= isUtf16;
31 uint32_t flags = 0U;
32 if (flagMultiline_) {
33 flags |= PCRE2_MULTILINE;
34 }
35 if (flagCaseInsentitive_) {
36 flags |= PCRE2_CASELESS;
37 }
38 if (flagSticky_) {
39 flags |= PCRE2_ANCHORED;
40 }
41 if (flagDotAll_) {
42 flags |= PCRE2_DOTALL;
43 }
44 if (flagUnicode_) {
45 flags |= PCRE2_UTF;
46 }
47 if (flagVnicode_) {
48 flags |= PCRE2_UCP;
49 }
50 flags |= PCRE2_MATCH_UNSET_BACKREF;
51 flags |= PCRE2_ALLOW_EMPTY_CLASS;
52 uint32_t extraFlags = 0U;
53 extraFlags |= PCRE2_EXTRA_ALT_BSUX;
54 utf16_ |= flagUnicode_;
55 if (utf16_) {
56 re_ = RegExp16::CreatePcre2Object(reinterpret_cast<const uint16_t *>(pattern.data()), flags, extraFlags, len);
57 } else {
58 re_ = RegExp8::CreatePcre2Object(pattern.data(), flags, extraFlags, len);
59 }
60 return re_ != nullptr;
61 }
62
Execute(const PandaVector<uint8_t> & str,const int len,const int startOffset)63 RegExpExecResult EtsRegExp::Execute(const PandaVector<uint8_t> &str, const int len, const int startOffset)
64 {
65 RegExpExecResult result;
66 if (utf16_) {
67 result = RegExp16::Execute(re_, reinterpret_cast<const uint16_t *>(str.data()), len, startOffset);
68 } else {
69 result = RegExp8::Execute(re_, str.data(), len, startOffset);
70 }
71
72 return result;
73 }
74
Destroy()75 void EtsRegExp::Destroy()
76 {
77 if (utf16_) {
78 RegExp16::FreePcre2Object(re_);
79 } else {
80 RegExp8::FreePcre2Object(re_);
81 }
82 }
83
SetUnicodeFlag(const char & chr)84 void EtsRegExp::SetUnicodeFlag(const char &chr)
85 {
86 if (chr == 'u') {
87 if (flagUnicode_ || flagVnicode_) {
88 ThrowBadFlagsException();
89 }
90 flagUnicode_ = true;
91 } else if (chr == 'v') {
92 if (flagVnicode_ || flagUnicode_) {
93 ThrowBadFlagsException();
94 }
95 flagVnicode_ = true;
96 }
97 }
98
ThrowBadFlagsException()99 void EtsRegExp::ThrowBadFlagsException()
100 {
101 auto *coroutine = EtsCoroutine::GetCurrent();
102 ThrowEtsException(coroutine, panda_file_items::class_descriptors::UNSUPPORTED_OPERATION_EXCEPTION,
103 "invalid regular expression flags");
104 }
105
SetIfNotSet(bool & flag)106 void EtsRegExp::SetIfNotSet(bool &flag)
107 {
108 if (flag) {
109 ThrowBadFlagsException();
110 }
111 flag = true;
112 }
113
SetFlag(const char & chr)114 void EtsRegExp::SetFlag(const char &chr)
115 {
116 switch (chr) {
117 case 'g':
118 SetIfNotSet(flagGlobal_);
119 break;
120 case 'm':
121 SetIfNotSet(flagMultiline_);
122 break;
123 case 'i':
124 SetIfNotSet(flagCaseInsentitive_);
125 break;
126 case 'y':
127 SetIfNotSet(flagSticky_);
128 break;
129 case 's':
130 SetIfNotSet(flagDotAll_);
131 break;
132 case 'd':
133 SetIfNotSet(flagIndices_);
134 break;
135 default: {
136 SetUnicodeFlag(chr);
137 }
138 }
139 }
140
SetFlags(EtsString * flagsStr)141 void EtsRegExp::SetFlags(EtsString *flagsStr)
142 {
143 auto *coroutine = EtsCoroutine::GetCurrent();
144 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
145 VMHandle<EtsString> flagsHandle(coroutine, flagsStr->GetCoreType());
146 ASSERT(flagsHandle.GetPtr() != nullptr);
147 for (int i = 0; i < flagsHandle.GetPtr()->GetLength(); i++) {
148 SetFlag(flagsHandle.GetPtr()->At(i));
149 }
150 utf16_ |= flagUnicode_;
151 }
152
153 } // namespace ark::ets