• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <algorithm>
17 #include <cstdint>
18 #include <regex>
19 #include "include/mem/panda_string.h"
20 #include "intrinsics.h"
21 #include "libpandabase/utils/logger.h"
22 #include "macros.h"
23 #include "napi/ets_napi.h"
24 #include "runtime/handle_scope-inl.h"
25 #include "runtime/entrypoints/string_index_of.h"
26 #include "plugins/ets/runtime/types/ets_string.h"
27 #include "plugins/ets/runtime/ets_exceptions.h"
28 #include "plugins/ets/runtime/ets_language_context.h"
29 #include "plugins/ets/runtime/ets_panda_file_items.h"
30 
31 #include "unicode/locid.h"
32 #include "unicode/coll.h"
33 #include "unicode/unistr.h"
34 #include "unicode/normalizer2.h"
35 #include "utils/span.h"
36 
37 using icu::Normalizer2;
38 
39 namespace panda::ets::intrinsics {
40 
41 constexpr const uint32_t CHAR0X1FFC00 = 0x1ffc00;
42 constexpr const uint16_t CHAR0XD800 = 0xd800;
43 constexpr const uint16_t CHAR0XDC00 = 0xdc00;
44 
StdCoreStringGetChars(EtsString * s,ets_int begin,ets_int end)45 EtsCharArray *StdCoreStringGetChars(EtsString *s, ets_int begin, ets_int end)
46 {
47     ASSERT(s != nullptr);
48     ets_int length = s->GetLength();
49     if (UNLIKELY(begin > end)) {
50         panda::ThrowStringIndexOutOfBoundsException(begin, length);
51         return nullptr;
52     }
53     if (UNLIKELY(begin > length || begin < 0)) {
54         panda::ThrowStringIndexOutOfBoundsException(begin, length);
55         return nullptr;
56     }
57     if (UNLIKELY(end > length)) {
58         panda::ThrowStringIndexOutOfBoundsException(end, length);
59         return nullptr;
60     }
61 
62     auto thread = ManagedThread::GetCurrent();
63     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
64     VMHandle<coretypes::String> sHandle(thread, s->GetCoreType());
65     ets_int n = end - begin;
66     EtsCharArray *charArray = EtsCharArray::Create(n);
67     if (charArray == nullptr || n == 0) {
68         return charArray;
69     }
70     Span<ets_char> out(charArray->GetData<ets_char>(), charArray->GetLength());
71     sHandle.GetPtr()->CopyDataRegionUtf16(&out[0], begin, charArray->GetLength(), sHandle.GetPtr()->GetLength());
72     return charArray;
73 }
74 
StdCoreStringSubstring(EtsString * str,ets_int begin,ets_int end)75 EtsString *StdCoreStringSubstring(EtsString *str, ets_int begin, ets_int end)
76 {
77     ASSERT(str != nullptr);
78     auto indexes = coretypes::String::NormalizeSubStringIndexes(begin, end, str->GetCoreType());
79     if (UNLIKELY(indexes.first == 0 && indexes.second == str->GetLength())) {
80         return str;
81     }
82     ets_int substrLength = indexes.second - indexes.first;
83     return EtsString::FastSubString(str, static_cast<uint32_t>(indexes.first), static_cast<uint32_t>(substrLength));
84 }
85 
StdCoreStringCharAt(EtsString * s,int32_t index)86 uint16_t StdCoreStringCharAt(EtsString *s, int32_t index)
87 {
88     ASSERT(s != nullptr);
89 
90     int32_t length = s->GetLength();
91     if (UNLIKELY(index >= length || index < 0)) {
92         panda::ThrowStringIndexOutOfBoundsException(index, length);
93         return 0;
94     }
95 
96     if (s->IsUtf16()) {
97         Span<uint16_t> sp(s->GetDataUtf16(), length);
98         return sp[index];
99     }
100 
101     Span<uint8_t> sp(s->GetDataMUtf8(), length);
102     return sp[index];
103 }
104 
StdCoreStringGetLength(EtsString * s)105 int32_t StdCoreStringGetLength(EtsString *s)
106 {
107     ASSERT(s != nullptr);
108     return s->GetLength();
109 }
110 
StdCoreStringIsEmpty(EtsString * s)111 EtsBoolean StdCoreStringIsEmpty(EtsString *s)
112 {
113     ASSERT(s != nullptr);
114     return ToEtsBoolean(s->IsEmpty());
115 }
116 
StdCoreStringEquals(EtsString * owner,EtsObject * s)117 uint8_t StdCoreStringEquals(EtsString *owner, EtsObject *s)
118 {
119     if ((owner->AsObject()) == s) {
120         return UINT8_C(1);
121     }
122     if (s == nullptr || !(s->GetClass()->IsStringClass())) {
123         return UINT8_C(0);
124     }
125     return static_cast<uint8_t>(owner->StringsAreEqual(s));
126 }
127 
StdCoreStringMatch(EtsString * thisStr,EtsString * reg)128 EtsString *StdCoreStringMatch(EtsString *thisStr, EtsString *reg)
129 {
130     PandaVector<uint8_t> buf;
131     auto thisS = std::string(thisStr->ConvertToStringView(&buf));
132     auto regex = std::string(reg->ConvertToStringView(&buf));
133 
134     std::regex e(regex);
135     return EtsString::CreateFromMUtf8(std::sregex_iterator(thisS.begin(), thisS.end(), e)->str().c_str());
136 }
137 
StringNormalize(EtsString * str,const Normalizer2 * normalizer)138 EtsString *StringNormalize(EtsString *str, const Normalizer2 *normalizer)
139 {
140     auto coroutine = EtsCoroutine::GetCurrent();
141     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
142 
143     icu::UnicodeString utf16Str;
144     if (str->IsUtf16()) {
145         utf16Str = icu::UnicodeString {str->GetDataUtf16(), static_cast<int32_t>(str->GetUtf16Length())};
146     } else {
147         utf16Str =
148             icu::UnicodeString {utf::Mutf8AsCString(str->GetDataMUtf8()), static_cast<int32_t>(str->GetLength())};
149     }
150 
151     UErrorCode errorCode = U_ZERO_ERROR;
152     utf16Str = normalizer->normalize(utf16Str, errorCode);
153 
154     if (UNLIKELY(U_FAILURE(errorCode))) {
155         std::string message = "Got error in process of normalization: '" + std::string(u_errorName(errorCode)) + "'";
156         ThrowEtsException(coroutine, panda_file_items::class_descriptors::RANGE_ERROR, message);
157         return nullptr;
158     }
159 
160     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(utf16Str.getTerminatedBuffer()),
161                                       utf16Str.length());
162 }
163 
StdCoreStringNormalizeNFC(EtsString * thisStr)164 EtsString *StdCoreStringNormalizeNFC(EtsString *thisStr)
165 {
166     UErrorCode errorCode = U_ZERO_ERROR;
167     auto normalizer = Normalizer2::getNFCInstance(errorCode);
168     if (UNLIKELY(U_FAILURE(errorCode))) {
169         std::string message = "Cannot get NFC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
170         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
171         return nullptr;
172     }
173     return StringNormalize(thisStr, normalizer);
174 }
175 
StdCoreStringNormalizeNFD(EtsString * thisStr)176 EtsString *StdCoreStringNormalizeNFD(EtsString *thisStr)
177 {
178     UErrorCode errorCode = U_ZERO_ERROR;
179     auto normalizer = Normalizer2::getNFDInstance(errorCode);
180     if (UNLIKELY(U_FAILURE(errorCode))) {
181         std::string message = "Cannot get NFD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
182         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
183         return nullptr;
184     }
185     return StringNormalize(thisStr, normalizer);
186 }
187 
StdCoreStringNormalizeNFKC(EtsString * thisStr)188 EtsString *StdCoreStringNormalizeNFKC(EtsString *thisStr)
189 {
190     UErrorCode errorCode = U_ZERO_ERROR;
191     auto normalizer = Normalizer2::getNFKCInstance(errorCode);
192     if (UNLIKELY(U_FAILURE(errorCode))) {
193         std::string message = "Cannot get NFKC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
194         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
195         return nullptr;
196     }
197     return StringNormalize(thisStr, normalizer);
198 }
199 
StdCoreStringNormalizeNFKD(EtsString * thisStr)200 EtsString *StdCoreStringNormalizeNFKD(EtsString *thisStr)
201 {
202     UErrorCode errorCode = U_ZERO_ERROR;
203     auto normalizer = Normalizer2::getNFKDInstance(errorCode);
204     if (UNLIKELY(U_FAILURE(errorCode))) {
205         std::string message = "Cannot get NFKD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
206         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
207         return nullptr;
208     }
209     return StringNormalize(thisStr, normalizer);
210 }
211 
StdCoreStringIsWellFormed(EtsString * thisStr)212 uint8_t StdCoreStringIsWellFormed(EtsString *thisStr)
213 {
214     if (!thisStr->IsUtf16()) {
215         return UINT8_C(1);
216     }
217     auto length = thisStr->GetUtf16Length();
218     auto codeUnits = Span<uint16_t>(thisStr->GetDataUtf16(), length);
219     for (size_t i = 0; i < length; ++i) {
220         uint16_t codeUnit = codeUnits[i];
221         if ((codeUnit & CHAR0X1FFC00) == CHAR0XD800) {
222             // Code unit is a leading surrogate
223             if (i == length - 1) {
224                 return UINT8_C(0);
225             }
226             // Is not trail surrogate
227             if ((codeUnits[i + 1] & CHAR0X1FFC00) != CHAR0XDC00) {
228                 return UINT8_C(0);
229             }
230             // Skip the paired trailing surrogate
231             ++i;
232             // Is trail surrogate
233         } else if ((codeUnit & CHAR0X1FFC00) == CHAR0XDC00) {
234             return UINT8_C(0);
235         }
236     }
237     return UINT8_C(1);
238 }
239 
ToLowerCase(EtsString * thisStr,const icu::Locale & locale)240 EtsString *ToLowerCase(EtsString *thisStr, const icu::Locale &locale)
241 {
242     auto coroutine = EtsCoroutine::GetCurrent();
243     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
244 
245     icu::UnicodeString utf16Str;
246     if (thisStr->IsUtf16()) {
247         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
248     } else {
249         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
250                                        static_cast<int32_t>(thisStr->GetLength())};
251     }
252     auto res = utf16Str.toLower(locale);
253     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
254 }
255 
ToUpperCase(EtsString * thisStr,const icu::Locale & locale)256 EtsString *ToUpperCase(EtsString *thisStr, const icu::Locale &locale)
257 {
258     auto coroutine = EtsCoroutine::GetCurrent();
259     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
260 
261     icu::UnicodeString utf16Str;
262     if (thisStr->IsUtf16()) {
263         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
264     } else {
265         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
266                                        static_cast<int32_t>(thisStr->GetLength())};
267     }
268     auto res = utf16Str.toUpper(locale);
269     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
270 }
271 
ParseSingleBCP47LanguageTag(EtsString * langTag,icu::Locale & locale)272 UErrorCode ParseSingleBCP47LanguageTag(EtsString *langTag, icu::Locale &locale)
273 {
274     PandaVector<uint8_t> buf;
275     std::string_view locTag = langTag->ConvertToStringView(&buf);
276     icu::StringPiece sp {locTag.data(), static_cast<int32_t>(locTag.size())};
277     UErrorCode status = U_ZERO_ERROR;
278     locale = icu::Locale::forLanguageTag(sp, status);
279     return status;
280 }
281 
StdCoreStringToUpperCase(EtsString * thisStr)282 EtsString *StdCoreStringToUpperCase(EtsString *thisStr)
283 {
284     return ToUpperCase(thisStr, icu::Locale::getDefault());
285 }
286 
StdCoreStringToLowerCase(EtsString * thisStr)287 EtsString *StdCoreStringToLowerCase(EtsString *thisStr)
288 {
289     return ToLowerCase(thisStr, icu::Locale::getDefault());
290 }
291 
StdCoreStringToLocaleUpperCase(EtsString * thisStr,EtsString * langTag)292 EtsString *StdCoreStringToLocaleUpperCase(EtsString *thisStr, EtsString *langTag)
293 {
294     ASSERT(langTag != nullptr);
295 
296     icu::Locale locale;
297     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
298     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
299         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
300         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
301         return nullptr;
302     }
303     return ToUpperCase(thisStr, locale);
304 }
305 
StdCoreStringToLocaleLowerCase(EtsString * thisStr,EtsString * langTag)306 EtsString *StdCoreStringToLocaleLowerCase(EtsString *thisStr, EtsString *langTag)
307 {
308     ASSERT(langTag != nullptr);
309 
310     icu::Locale locale;
311     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
312     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
313         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
314         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
315         return nullptr;
316     }
317     return ToLowerCase(thisStr, locale);
318 }
319 
StdCoreStringLocaleCmp(EtsString * thisStr,EtsString * cmpStr,EtsString * langTag)320 ets_short StdCoreStringLocaleCmp(EtsString *thisStr, EtsString *cmpStr, EtsString *langTag)
321 {
322     ASSERT(thisStr != nullptr && cmpStr != nullptr && langTag != nullptr);
323 
324     icu::Locale locale;
325     auto status = ParseSingleBCP47LanguageTag(langTag, locale);
326     if (UNLIKELY(U_FAILURE(status))) {
327         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
328         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
329         return 0;
330     }
331 
332     icu::UnicodeString source;
333     if (thisStr->IsUtf16()) {
334         source = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
335     } else {
336         source = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
337                                      static_cast<int32_t>(thisStr->GetLength())};
338     }
339     icu::UnicodeString target;
340     if (cmpStr->IsUtf16()) {
341         target = icu::UnicodeString {cmpStr->GetDataUtf16(), static_cast<int32_t>(cmpStr->GetUtf16Length())};
342     } else {
343         target =
344             icu::UnicodeString {utf::Mutf8AsCString(cmpStr->GetDataMUtf8()), static_cast<int32_t>(cmpStr->GetLength())};
345     }
346     status = U_ZERO_ERROR;
347     std::unique_ptr<icu::Collator> myCollator(icu::Collator::createInstance(locale, status));
348     if (UNLIKELY(U_FAILURE(status))) {
349         icu::UnicodeString dispName;
350         locale.getDisplayName(dispName);
351         std::string localeName;
352         dispName.toUTF8String(localeName);
353         LOG(FATAL, ETS) << "Failed to create the collator for " << localeName;
354     }
355     return myCollator->compare(source, target);
356 }
357 
StdCoreStringIndexOfAfter(EtsString * s,uint16_t ch,ets_int fromIndex)358 ets_int StdCoreStringIndexOfAfter(EtsString *s, uint16_t ch, ets_int fromIndex)
359 {
360     return panda::intrinsics::StringIndexOfU16(s, ch, fromIndex);
361 }
362 
StdCoreStringIndexOf(EtsString * s,uint16_t ch)363 ets_int StdCoreStringIndexOf(EtsString *s, uint16_t ch)
364 {
365     return StdCoreStringIndexOfAfter(s, ch, 0);
366 }
367 
StdCoreStringIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)368 ets_int StdCoreStringIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
369 {
370     ASSERT(thisStr != nullptr && patternStr != nullptr);
371     return thisStr->GetCoreType()->IndexOf(patternStr->GetCoreType(), fromIndex);
372 }
373 
StdCoreStringLastIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)374 ets_int StdCoreStringLastIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
375 {
376     ASSERT(thisStr != nullptr && patternStr != nullptr);
377     // "abc".lastIndexOf("ab", -10) will return 0
378     return thisStr->GetCoreType()->LastIndexOf(patternStr->GetCoreType(), std::max(fromIndex, 0));
379 }
380 
StdCoreStringCodePointToChar(ets_int codePoint)381 ets_char StdCoreStringCodePointToChar(ets_int codePoint)
382 {
383     icu::UnicodeString uniStr((UChar32)codePoint);
384     return static_cast<ets_char>(uniStr.charAt(0));
385 }
386 
StdCoreStringHashCode(EtsString * thisStr)387 int32_t StdCoreStringHashCode(EtsString *thisStr)
388 {
389     ASSERT(thisStr != nullptr);
390     return thisStr->GetCoreType()->GetHashcode();
391 }
392 
StdCoreStringIsCompressed(EtsString * thisStr)393 EtsBoolean StdCoreStringIsCompressed(EtsString *thisStr)
394 {
395     ASSERT(thisStr != nullptr);
396     return ToEtsBoolean(thisStr->GetCoreType()->IsMUtf8());
397 }
398 
399 }  // namespace panda::ets::intrinsics
400