• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <algorithm>
17 #include <cstdint>
18 #include <regex>
19 #include "include/mem/panda_string.h"
20 #include "intrinsics.h"
21 #include "libpandabase/utils/logger.h"
22 #include "macros.h"
23 #include "napi/ets_napi.h"
24 #include "runtime/handle_scope-inl.h"
25 #include "runtime/entrypoints/string_index_of.h"
26 #include "plugins/ets/runtime/types/ets_string.h"
27 #include "plugins/ets/runtime/ets_exceptions.h"
28 #include "plugins/ets/runtime/ets_language_context.h"
29 #include "plugins/ets/runtime/ets_panda_file_items.h"
30 
31 #include "unicode/locid.h"
32 #include "unicode/coll.h"
33 #include "unicode/unistr.h"
34 #include "unicode/normalizer2.h"
35 #include "utils/span.h"
36 
37 using icu::Normalizer2;
38 
39 namespace ark::ets::intrinsics {
40 
41 constexpr const uint32_t CHAR0X1FFC00 = 0x1ffc00;
42 constexpr const uint16_t CHAR0XD800 = 0xd800;
43 constexpr const uint16_t CHAR0XDC00 = 0xdc00;
44 
StdCoreStringGetChars(EtsString * s,ets_int begin,ets_int end)45 EtsCharArray *StdCoreStringGetChars(EtsString *s, ets_int begin, ets_int end)
46 {
47     ASSERT(s != nullptr);
48     ets_int length = s->GetLength();
49     if (UNLIKELY(begin > end)) {
50         ark::ThrowStringIndexOutOfBoundsException(begin, length);
51         return nullptr;
52     }
53     if (UNLIKELY(begin > length || begin < 0)) {
54         ark::ThrowStringIndexOutOfBoundsException(begin, length);
55         return nullptr;
56     }
57     if (UNLIKELY(end > length)) {
58         ark::ThrowStringIndexOutOfBoundsException(end, length);
59         return nullptr;
60     }
61 
62     auto thread = ManagedThread::GetCurrent();
63     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
64     VMHandle<coretypes::String> sHandle(thread, s->GetCoreType());
65     ets_int n = end - begin;
66     EtsCharArray *charArray = EtsCharArray::Create(n);
67     if (charArray == nullptr || n == 0) {
68         return charArray;
69     }
70     Span<ets_char> out(charArray->GetData<ets_char>(), charArray->GetLength());
71     sHandle.GetPtr()->CopyDataRegionUtf16(&out[0], begin, charArray->GetLength(), sHandle.GetPtr()->GetLength());
72     return charArray;
73 }
74 
StdCoreStringSubstring(EtsString * str,ets_int begin,ets_int end)75 EtsString *StdCoreStringSubstring(EtsString *str, ets_int begin, ets_int end)
76 {
77     ASSERT(str != nullptr);
78     auto indexes = coretypes::String::NormalizeSubStringIndexes(begin, end, str->GetCoreType());
79     if (UNLIKELY(indexes.first == 0 && indexes.second == str->GetLength())) {
80         return str;
81     }
82     ets_int substrLength = indexes.second - indexes.first;
83     return EtsString::FastSubString(str, static_cast<uint32_t>(indexes.first), static_cast<uint32_t>(substrLength));
84 }
85 
StdCoreStringCharAt(EtsString * s,int32_t index)86 uint16_t StdCoreStringCharAt(EtsString *s, int32_t index)
87 {
88     ASSERT(s != nullptr);
89 
90     int32_t length = s->GetLength();
91     if (UNLIKELY(index >= length || index < 0)) {
92         ark::ThrowStringIndexOutOfBoundsException(index, length);
93         return 0;
94     }
95 
96     if (s->IsUtf16()) {
97         Span<uint16_t> sp(s->GetDataUtf16(), length);
98         return sp[index];
99     }
100 
101     Span<uint8_t> sp(s->GetDataMUtf8(), length);
102     return sp[index];
103 }
104 
StdCoreStringGetLength(EtsString * s)105 int32_t StdCoreStringGetLength(EtsString *s)
106 {
107     ASSERT(s != nullptr);
108     return s->GetLength();
109 }
110 
StdCoreStringLength(EtsString * s)111 double StdCoreStringLength(EtsString *s)
112 {
113     ASSERT(s != nullptr);
114     return static_cast<double>(s->GetLength());
115 }
116 
StdCoreStringIsEmpty(EtsString * s)117 EtsBoolean StdCoreStringIsEmpty(EtsString *s)
118 {
119     ASSERT(s != nullptr);
120     return ToEtsBoolean(s->IsEmpty());
121 }
122 
StdCoreStringEquals(EtsString * owner,EtsObject * s)123 uint8_t StdCoreStringEquals(EtsString *owner, EtsObject *s)
124 {
125     if ((owner->AsObject()) == s) {
126         return UINT8_C(1);
127     }
128     if (s == nullptr || !(s->GetClass()->IsStringClass())) {
129         return UINT8_C(0);
130     }
131     return static_cast<uint8_t>(owner->StringsAreEqual(s));
132 }
133 
StdCoreStringMatch(EtsString * thisStr,EtsString * reg)134 EtsString *StdCoreStringMatch(EtsString *thisStr, EtsString *reg)
135 {
136     PandaVector<uint8_t> buf;
137     auto thisS = std::string(thisStr->ConvertToStringView(&buf));
138     auto regex = std::string(reg->ConvertToStringView(&buf));
139 
140     std::regex e(regex);
141     return EtsString::CreateFromMUtf8(std::sregex_iterator(thisS.begin(), thisS.end(), e)->str().c_str());
142 }
143 
StringNormalize(EtsString * str,const Normalizer2 * normalizer)144 EtsString *StringNormalize(EtsString *str, const Normalizer2 *normalizer)
145 {
146     auto coroutine = EtsCoroutine::GetCurrent();
147     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
148 
149     icu::UnicodeString utf16Str;
150     if (str->IsUtf16()) {
151         utf16Str = icu::UnicodeString {str->GetDataUtf16(), static_cast<int32_t>(str->GetUtf16Length())};
152     } else {
153         utf16Str =
154             icu::UnicodeString {utf::Mutf8AsCString(str->GetDataMUtf8()), static_cast<int32_t>(str->GetLength())};
155     }
156 
157     UErrorCode errorCode = U_ZERO_ERROR;
158     utf16Str = normalizer->normalize(utf16Str, errorCode);
159 
160     if (UNLIKELY(U_FAILURE(errorCode))) {
161         std::string message = "Got error in process of normalization: '" + std::string(u_errorName(errorCode)) + "'";
162         ThrowEtsException(coroutine, panda_file_items::class_descriptors::RANGE_ERROR, message);
163         return nullptr;
164     }
165 
166     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(utf16Str.getTerminatedBuffer()),
167                                       utf16Str.length());
168 }
169 
StdCoreStringNormalizeNFC(EtsString * thisStr)170 EtsString *StdCoreStringNormalizeNFC(EtsString *thisStr)
171 {
172     UErrorCode errorCode = U_ZERO_ERROR;
173     auto normalizer = Normalizer2::getNFCInstance(errorCode);
174     if (UNLIKELY(U_FAILURE(errorCode))) {
175         std::string message = "Cannot get NFC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
176         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
177         return nullptr;
178     }
179     return StringNormalize(thisStr, normalizer);
180 }
181 
StdCoreStringNormalizeNFD(EtsString * thisStr)182 EtsString *StdCoreStringNormalizeNFD(EtsString *thisStr)
183 {
184     UErrorCode errorCode = U_ZERO_ERROR;
185     auto normalizer = Normalizer2::getNFDInstance(errorCode);
186     if (UNLIKELY(U_FAILURE(errorCode))) {
187         std::string message = "Cannot get NFD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
188         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
189         return nullptr;
190     }
191     return StringNormalize(thisStr, normalizer);
192 }
193 
StdCoreStringNormalizeNFKC(EtsString * thisStr)194 EtsString *StdCoreStringNormalizeNFKC(EtsString *thisStr)
195 {
196     UErrorCode errorCode = U_ZERO_ERROR;
197     auto normalizer = Normalizer2::getNFKCInstance(errorCode);
198     if (UNLIKELY(U_FAILURE(errorCode))) {
199         std::string message = "Cannot get NFKC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
200         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
201         return nullptr;
202     }
203     return StringNormalize(thisStr, normalizer);
204 }
205 
StdCoreStringNormalizeNFKD(EtsString * thisStr)206 EtsString *StdCoreStringNormalizeNFKD(EtsString *thisStr)
207 {
208     UErrorCode errorCode = U_ZERO_ERROR;
209     auto normalizer = Normalizer2::getNFKDInstance(errorCode);
210     if (UNLIKELY(U_FAILURE(errorCode))) {
211         std::string message = "Cannot get NFKD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
212         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
213         return nullptr;
214     }
215     return StringNormalize(thisStr, normalizer);
216 }
217 
StdCoreStringIsWellFormed(EtsString * thisStr)218 uint8_t StdCoreStringIsWellFormed(EtsString *thisStr)
219 {
220     if (!thisStr->IsUtf16()) {
221         return UINT8_C(1);
222     }
223     auto length = thisStr->GetUtf16Length();
224     auto codeUnits = Span<uint16_t>(thisStr->GetDataUtf16(), length);
225     for (size_t i = 0; i < length; ++i) {
226         uint16_t codeUnit = codeUnits[i];
227         if ((codeUnit & CHAR0X1FFC00) == CHAR0XD800) {
228             // Code unit is a leading surrogate
229             if (i == length - 1) {
230                 return UINT8_C(0);
231             }
232             // Is not trail surrogate
233             if ((codeUnits[i + 1] & CHAR0X1FFC00) != CHAR0XDC00) {
234                 return UINT8_C(0);
235             }
236             // Skip the paired trailing surrogate
237             ++i;
238             // Is trail surrogate
239         } else if ((codeUnit & CHAR0X1FFC00) == CHAR0XDC00) {
240             return UINT8_C(0);
241         }
242     }
243     return UINT8_C(1);
244 }
245 
ToLowerCase(EtsString * thisStr,const icu::Locale & locale)246 EtsString *ToLowerCase(EtsString *thisStr, const icu::Locale &locale)
247 {
248     auto coroutine = EtsCoroutine::GetCurrent();
249     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
250 
251     icu::UnicodeString utf16Str;
252     if (thisStr->IsUtf16()) {
253         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
254     } else {
255         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
256                                        static_cast<int32_t>(thisStr->GetLength())};
257     }
258     auto res = utf16Str.toLower(locale);
259     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
260 }
261 
ToUpperCase(EtsString * thisStr,const icu::Locale & locale)262 EtsString *ToUpperCase(EtsString *thisStr, const icu::Locale &locale)
263 {
264     auto coroutine = EtsCoroutine::GetCurrent();
265     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
266 
267     icu::UnicodeString utf16Str;
268     if (thisStr->IsUtf16()) {
269         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
270     } else {
271         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
272                                        static_cast<int32_t>(thisStr->GetLength())};
273     }
274     auto res = utf16Str.toUpper(locale);
275     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
276 }
277 
ParseSingleBCP47LanguageTag(EtsString * langTag,icu::Locale & locale)278 UErrorCode ParseSingleBCP47LanguageTag(EtsString *langTag, icu::Locale &locale)
279 {
280     if (langTag == nullptr) {
281         locale = icu::Locale::getDefault();
282         return U_ZERO_ERROR;
283     }
284 
285     PandaVector<uint8_t> buf;
286     std::string_view locTag = langTag->ConvertToStringView(&buf);
287     icu::StringPiece sp {locTag.data(), static_cast<int32_t>(locTag.size())};
288     UErrorCode status = U_ZERO_ERROR;
289     locale = icu::Locale::forLanguageTag(sp, status);
290     return status;
291 }
292 
StdCoreStringToUpperCase(EtsString * thisStr)293 EtsString *StdCoreStringToUpperCase(EtsString *thisStr)
294 {
295     return ToUpperCase(thisStr, icu::Locale::getDefault());
296 }
297 
StdCoreStringToLowerCase(EtsString * thisStr)298 EtsString *StdCoreStringToLowerCase(EtsString *thisStr)
299 {
300     return ToLowerCase(thisStr, icu::Locale::getDefault());
301 }
302 
StdCoreStringToLocaleUpperCase(EtsString * thisStr,EtsString * langTag)303 EtsString *StdCoreStringToLocaleUpperCase(EtsString *thisStr, EtsString *langTag)
304 {
305     ASSERT(langTag != nullptr);
306 
307     icu::Locale locale;
308     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
309     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
310         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
311         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
312         return nullptr;
313     }
314     return ToUpperCase(thisStr, locale);
315 }
316 
StdCoreStringToLocaleLowerCase(EtsString * thisStr,EtsString * langTag)317 EtsString *StdCoreStringToLocaleLowerCase(EtsString *thisStr, EtsString *langTag)
318 {
319     ASSERT(langTag != nullptr);
320 
321     icu::Locale locale;
322     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
323     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
324         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
325         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
326         return nullptr;
327     }
328     return ToLowerCase(thisStr, locale);
329 }
330 
StdCoreStringLocaleCmp(EtsString * thisStr,EtsString * cmpStr,EtsString * langTag)331 ets_double StdCoreStringLocaleCmp(EtsString *thisStr, EtsString *cmpStr, EtsString *langTag)
332 {
333     ASSERT(thisStr != nullptr && cmpStr != nullptr);
334 
335     icu::Locale locale;
336     auto status = ParseSingleBCP47LanguageTag(langTag, locale);
337     if (UNLIKELY(U_FAILURE(status))) {
338         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
339         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
340         return 0;
341     }
342 
343     icu::UnicodeString source;
344     if (thisStr->IsUtf16()) {
345         source = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
346     } else {
347         source = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
348                                      static_cast<int32_t>(thisStr->GetLength())};
349     }
350     icu::UnicodeString target;
351     if (cmpStr->IsUtf16()) {
352         target = icu::UnicodeString {cmpStr->GetDataUtf16(), static_cast<int32_t>(cmpStr->GetUtf16Length())};
353     } else {
354         target =
355             icu::UnicodeString {utf::Mutf8AsCString(cmpStr->GetDataMUtf8()), static_cast<int32_t>(cmpStr->GetLength())};
356     }
357     status = U_ZERO_ERROR;
358     std::unique_ptr<icu::Collator> myCollator(icu::Collator::createInstance(locale, status));
359     if (UNLIKELY(U_FAILURE(status))) {
360         icu::UnicodeString dispName;
361         locale.getDisplayName(dispName);
362         std::string localeName;
363         dispName.toUTF8String(localeName);
364         LOG(FATAL, ETS) << "Failed to create the collator for " << localeName;
365     }
366     return myCollator->compare(source, target);
367 }
368 
StdCoreStringIndexOfAfter(EtsString * s,uint16_t ch,ets_int fromIndex)369 ets_int StdCoreStringIndexOfAfter(EtsString *s, uint16_t ch, ets_int fromIndex)
370 {
371     return ark::intrinsics::StringIndexOfU16(s, ch, fromIndex);
372 }
373 
StdCoreStringIndexOf(EtsString * s,uint16_t ch)374 ets_int StdCoreStringIndexOf(EtsString *s, uint16_t ch)
375 {
376     return StdCoreStringIndexOfAfter(s, ch, 0);
377 }
378 
StdCoreStringIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)379 ets_int StdCoreStringIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
380 {
381     ASSERT(thisStr != nullptr && patternStr != nullptr);
382     return thisStr->GetCoreType()->IndexOf(patternStr->GetCoreType(), fromIndex);
383 }
384 
StdCoreStringLastIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)385 ets_int StdCoreStringLastIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
386 {
387     ASSERT(thisStr != nullptr && patternStr != nullptr);
388     // "abc".lastIndexOf("ab", -10) will return 0
389     return thisStr->GetCoreType()->LastIndexOf(patternStr->GetCoreType(), std::max(fromIndex, 0));
390 }
391 
StdCoreStringCodePointToChar(ets_int codePoint)392 ets_int StdCoreStringCodePointToChar(ets_int codePoint)
393 {
394     icu::UnicodeString uniStr((UChar32)codePoint);
395     uint32_t ret = bit_cast<uint16_t>(uniStr.charAt(0));
396     // if codepoint contains a surrogate pair
397     // encode it into int with higher bits being second char
398     if (uniStr.length() > 1) {
399         constexpr uint32_t BITS_IN_CHAR = 16;
400         ret |= static_cast<uint32_t>(bit_cast<uint16_t>(uniStr.charAt(1))) << BITS_IN_CHAR;
401     }
402     return bit_cast<ets_int>(ret);
403 }
404 
StdCoreStringHashCode(EtsString * thisStr)405 int32_t StdCoreStringHashCode(EtsString *thisStr)
406 {
407     ASSERT(thisStr != nullptr);
408     return thisStr->GetCoreType()->GetHashcode();
409 }
410 
StdCoreStringIsCompressed(EtsString * thisStr)411 EtsBoolean StdCoreStringIsCompressed(EtsString *thisStr)
412 {
413     ASSERT(thisStr != nullptr);
414     return ToEtsBoolean(thisStr->GetCoreType()->IsMUtf8());
415 }
416 
StdCoreStringConcat2(EtsString * str1,EtsString * str2)417 EtsString *StdCoreStringConcat2(EtsString *str1, EtsString *str2)
418 {
419     auto s1 = reinterpret_cast<coretypes::String *>(str1);
420     auto s2 = reinterpret_cast<coretypes::String *>(str2);
421     return reinterpret_cast<EtsString *>(CoreStringConcat2(s1, s2));
422 }
423 
StdCoreStringConcat3(EtsString * str1,EtsString * str2,EtsString * str3)424 EtsString *StdCoreStringConcat3(EtsString *str1, EtsString *str2, EtsString *str3)
425 {
426     auto s1 = reinterpret_cast<coretypes::String *>(str1);
427     auto s2 = reinterpret_cast<coretypes::String *>(str2);
428     auto s3 = reinterpret_cast<coretypes::String *>(str3);
429     return reinterpret_cast<EtsString *>(CoreStringConcat3(s1, s2, s3));
430 }
431 
StdCoreStringConcat4(EtsString * str1,EtsString * str2,EtsString * str3,EtsString * str4)432 EtsString *StdCoreStringConcat4(EtsString *str1, EtsString *str2, EtsString *str3, EtsString *str4)
433 {
434     auto s1 = reinterpret_cast<coretypes::String *>(str1);
435     auto s2 = reinterpret_cast<coretypes::String *>(str2);
436     auto s3 = reinterpret_cast<coretypes::String *>(str3);
437     auto s4 = reinterpret_cast<coretypes::String *>(str4);
438     return reinterpret_cast<EtsString *>(CoreStringConcat4(s1, s2, s3, s4));
439 }
440 
StdCoreStringCompareTo(EtsString * str1,EtsString * str2)441 ets_int StdCoreStringCompareTo(EtsString *str1, EtsString *str2)
442 {
443     /* corner cases */
444     if (str1->GetLength() == 0) {
445         return -str2->GetLength();
446     }
447     if (str2->GetLength() == 0) {
448         return str1->GetLength();
449     }
450 
451     /* use the default implementation otherwise */
452     return str1->GetCoreType()->Compare(str2->GetCoreType());
453 }
454 
StdCoreStringTrimLeft(EtsString * thisStr)455 EtsString *StdCoreStringTrimLeft(EtsString *thisStr)
456 {
457     return thisStr->TrimLeft();
458 }
459 
StdCoreStringTrimRight(EtsString * thisStr)460 EtsString *StdCoreStringTrimRight(EtsString *thisStr)
461 {
462     return thisStr->TrimRight();
463 }
464 
StdCoreStringTrim(EtsString * thisStr)465 EtsString *StdCoreStringTrim(EtsString *thisStr)
466 {
467     return thisStr->Trim();
468 }
469 
470 }  // namespace ark::ets::intrinsics
471