• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <algorithm>
17 #include <cstdint>
18 #include <regex>
19 #include "include/mem/panda_string.h"
20 #include "intrinsics.h"
21 #include "libpandabase/utils/logger.h"
22 #include "macros.h"
23 #include "napi/ets_napi.h"
24 #include "runtime/handle_scope-inl.h"
25 #include "runtime/entrypoints/string_index_of.h"
26 #include "runtime/arch/memory_helpers.h"
27 #include "plugins/ets/runtime/types/ets_string.h"
28 #include "plugins/ets/runtime/types/ets_escompat_array.h"
29 #include "plugins/ets/runtime/ets_exceptions.h"
30 #include "plugins/ets/runtime/ets_language_context.h"
31 #include "plugins/ets/runtime/ets_panda_file_items.h"
32 
33 #include "unicode/locid.h"
34 #include "unicode/coll.h"
35 #include "unicode/unistr.h"
36 #include "unicode/normalizer2.h"
37 #include "utils/span.h"
38 
39 using icu::Normalizer2;
40 
41 namespace ark::ets::intrinsics {
42 
43 constexpr const uint32_t CHAR0X1FFC00 = 0x1ffc00;
44 constexpr const uint16_t CHAR0XD800 = 0xd800;
45 constexpr const uint16_t CHAR0XDC00 = 0xdc00;
46 
StdCoreStringDataAsArray(EtsString * s,ets_int begin,ets_int end,bool isUtf16)47 static ObjectHeader *StdCoreStringDataAsArray(EtsString *s, ets_int begin, ets_int end, bool isUtf16)
48 {
49     ASSERT(s != nullptr);
50     ets_int length = s->GetLength();
51     if (UNLIKELY(begin > end || begin > length || begin < 0)) {
52         ark::ThrowStringIndexOutOfBoundsException(begin, length);
53         return nullptr;
54     }
55     if (UNLIKELY(end > length)) {
56         ark::ThrowStringIndexOutOfBoundsException(end, length);
57         return nullptr;
58     }
59 
60     auto thread = ManagedThread::GetCurrent();
61     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
62     VMHandle<coretypes::String> sHandle(thread, s->GetCoreType());
63     ASSERT(sHandle.GetPtr() != nullptr);
64     ets_int n = end - begin;
65     void *array = nullptr;
66     if (isUtf16) {
67         array = EtsCharArray::Create(n);
68     } else {
69         array = EtsByteArray::Create(n);
70     }
71     if (array == nullptr || n == 0) {
72         return reinterpret_cast<ObjectHeader *>(array);
73     }
74     if (isUtf16) {
75         auto charArray = reinterpret_cast<EtsCharArray *>(array);
76         Span<ets_char> out(charArray->GetData<ets_char>(), charArray->GetLength());
77         sHandle.GetPtr()->CopyDataRegionUtf16(&out[0], begin, charArray->GetLength(), sHandle.GetPtr()->GetLength());
78     } else {
79         auto byteArray = reinterpret_cast<EtsByteArray *>(array);
80         Span<uint8_t> out(byteArray->GetData<uint8_t>(), byteArray->GetLength());
81 
82         /* as we need only one LSB no sophisticated conversion is needed */
83         if (sHandle.GetPtr()->IsUtf16()) {
84             auto in = sHandle.GetPtr()->GetDataUtf16();
85             for (int i = 0; i < n; ++i) {
86                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
87                 out[i] = in[i + begin];
88             }
89         } else {
90             auto in = sHandle.GetPtr()->GetDataMUtf8();
91             for (int i = 0; i < n; ++i) {
92                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
93                 out[i] = in[i + begin];
94             }
95         }
96     }
97     return reinterpret_cast<ObjectHeader *>(array);
98 }
99 
StdCoreStringGetChars(EtsString * s,ets_int begin,ets_int end)100 ObjectHeader *StdCoreStringGetChars(EtsString *s, ets_int begin, ets_int end)
101 {
102     return StdCoreStringDataAsArray(s, begin, end, true);
103 }
104 
StdCoreStringGetBytes(EtsString * s,ets_int begin,ets_int end)105 ObjectHeader *StdCoreStringGetBytes(EtsString *s, ets_int begin, ets_int end)
106 {
107     return StdCoreStringDataAsArray(s, begin, end, false);
108 }
109 
StdCoreStringSubstring(EtsString * str,ets_int begin,ets_int end)110 EtsString *StdCoreStringSubstring(EtsString *str, ets_int begin, ets_int end)
111 {
112     ASSERT(str != nullptr);
113     auto indexes = coretypes::String::NormalizeSubStringIndexes(begin, end, str->GetCoreType());
114     if (UNLIKELY(indexes.first == 0 && indexes.second == str->GetLength())) {
115         return str;
116     }
117     ets_int substrLength = indexes.second - indexes.first;
118     return EtsString::FastSubString(str, static_cast<uint32_t>(indexes.first), static_cast<uint32_t>(substrLength));
119 }
120 
StdCoreStringCharAt(EtsString * s,int32_t index)121 uint16_t StdCoreStringCharAt(EtsString *s, int32_t index)
122 {
123     ASSERT(s != nullptr);
124 
125     int32_t length = s->GetLength();
126     if (UNLIKELY(index >= length || index < 0)) {
127         ark::ThrowStringIndexOutOfBoundsException(index, length);
128         return 0;
129     }
130 
131     if (s->IsUtf16()) {
132         Span<uint16_t> sp(s->GetDataUtf16(), length);
133         return sp[index];
134     }
135 
136     Span<uint8_t> sp(s->GetDataMUtf8(), length);
137     return sp[index];
138 }
139 
StdCoreStringGetLength(EtsString * s)140 int32_t StdCoreStringGetLength(EtsString *s)
141 {
142     ASSERT(s != nullptr);
143     return s->GetLength();
144 }
145 
StdCoreStringLength(EtsString * s)146 double StdCoreStringLength(EtsString *s)
147 {
148     ASSERT(s != nullptr);
149     return static_cast<double>(s->GetLength());
150 }
151 
StdCoreStringIsEmpty(EtsString * s)152 EtsBoolean StdCoreStringIsEmpty(EtsString *s)
153 {
154     ASSERT(s != nullptr);
155     return ToEtsBoolean(s->IsEmpty());
156 }
157 
StdCoreStringEquals(EtsString * owner,EtsObject * s)158 uint8_t StdCoreStringEquals(EtsString *owner, EtsObject *s)
159 {
160     if ((owner->AsObject()) == s) {
161         return UINT8_C(1);
162     }
163     if (s == nullptr || !(s->GetClass()->IsStringClass())) {
164         return UINT8_C(0);
165     }
166     return static_cast<uint8_t>(owner->StringsAreEqual(s));
167 }
168 
StringNormalize(EtsString * str,const Normalizer2 * normalizer)169 EtsString *StringNormalize(EtsString *str, const Normalizer2 *normalizer)
170 {
171     auto coroutine = EtsCoroutine::GetCurrent();
172     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
173 
174     icu::UnicodeString utf16Str;
175     if (str->IsUtf16()) {
176         utf16Str = icu::UnicodeString {str->GetDataUtf16(), static_cast<int32_t>(str->GetUtf16Length())};
177     } else {
178         utf16Str =
179             icu::UnicodeString {utf::Mutf8AsCString(str->GetDataMUtf8()), static_cast<int32_t>(str->GetLength())};
180     }
181 
182     UErrorCode errorCode = U_ZERO_ERROR;
183     utf16Str = normalizer->normalize(utf16Str, errorCode);
184 
185     if (UNLIKELY(U_FAILURE(errorCode))) {
186         std::string message = "Got error in process of normalization: '" + std::string(u_errorName(errorCode)) + "'";
187         ThrowEtsException(coroutine, panda_file_items::class_descriptors::RANGE_ERROR, message);
188         return nullptr;
189     }
190 
191     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(utf16Str.getTerminatedBuffer()),
192                                       utf16Str.length());
193 }
194 
StdCoreStringNormalizeNFC(EtsString * thisStr)195 EtsString *StdCoreStringNormalizeNFC(EtsString *thisStr)
196 {
197     UErrorCode errorCode = U_ZERO_ERROR;
198     auto normalizer = Normalizer2::getNFCInstance(errorCode);
199     if (UNLIKELY(U_FAILURE(errorCode))) {
200         std::string message = "Cannot get NFC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
201         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
202         return nullptr;
203     }
204     return StringNormalize(thisStr, normalizer);
205 }
206 
StdCoreStringNormalizeNFD(EtsString * thisStr)207 EtsString *StdCoreStringNormalizeNFD(EtsString *thisStr)
208 {
209     UErrorCode errorCode = U_ZERO_ERROR;
210     auto normalizer = Normalizer2::getNFDInstance(errorCode);
211     if (UNLIKELY(U_FAILURE(errorCode))) {
212         std::string message = "Cannot get NFD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
213         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
214         return nullptr;
215     }
216     return StringNormalize(thisStr, normalizer);
217 }
218 
StdCoreStringNormalizeNFKC(EtsString * thisStr)219 EtsString *StdCoreStringNormalizeNFKC(EtsString *thisStr)
220 {
221     UErrorCode errorCode = U_ZERO_ERROR;
222     auto normalizer = Normalizer2::getNFKCInstance(errorCode);
223     if (UNLIKELY(U_FAILURE(errorCode))) {
224         std::string message = "Cannot get NFKC normalizer: '" + std::string(u_errorName(errorCode)) + "'";
225         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
226         return nullptr;
227     }
228     return StringNormalize(thisStr, normalizer);
229 }
230 
StdCoreStringNormalizeNFKD(EtsString * thisStr)231 EtsString *StdCoreStringNormalizeNFKD(EtsString *thisStr)
232 {
233     UErrorCode errorCode = U_ZERO_ERROR;
234     auto normalizer = Normalizer2::getNFKDInstance(errorCode);
235     if (UNLIKELY(U_FAILURE(errorCode))) {
236         std::string message = "Cannot get NFKD normalizer: '" + std::string(u_errorName(errorCode)) + "'";
237         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
238         return nullptr;
239     }
240     return StringNormalize(thisStr, normalizer);
241 }
242 
StdCoreStringIsWellFormed(EtsString * thisStr)243 uint8_t StdCoreStringIsWellFormed(EtsString *thisStr)
244 {
245     if (!thisStr->IsUtf16()) {
246         return UINT8_C(1);
247     }
248     auto length = thisStr->GetUtf16Length();
249     auto codeUnits = Span<uint16_t>(thisStr->GetDataUtf16(), length);
250     for (size_t i = 0; i < length; ++i) {
251         uint16_t codeUnit = codeUnits[i];
252         if ((codeUnit & CHAR0X1FFC00) == CHAR0XD800) {
253             // Code unit is a leading surrogate
254             if (i == length - 1) {
255                 return UINT8_C(0);
256             }
257             // Is not trail surrogate
258             if ((codeUnits[i + 1] & CHAR0X1FFC00) != CHAR0XDC00) {
259                 return UINT8_C(0);
260             }
261             // Skip the paired trailing surrogate
262             ++i;
263             // Is trail surrogate
264         } else if ((codeUnit & CHAR0X1FFC00) == CHAR0XDC00) {
265             return UINT8_C(0);
266         }
267     }
268     return UINT8_C(1);
269 }
270 
ToLowerCase(EtsString * thisStr,const icu::Locale & locale)271 EtsString *ToLowerCase(EtsString *thisStr, const icu::Locale &locale)
272 {
273     auto coroutine = EtsCoroutine::GetCurrent();
274     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
275 
276     icu::UnicodeString utf16Str;
277     if (thisStr->IsUtf16()) {
278         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
279     } else {
280         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
281                                        static_cast<int32_t>(thisStr->GetLength())};
282     }
283     auto res = utf16Str.toLower(locale);
284     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
285 }
286 
ToUpperCase(EtsString * thisStr,const icu::Locale & locale)287 EtsString *ToUpperCase(EtsString *thisStr, const icu::Locale &locale)
288 {
289     auto coroutine = EtsCoroutine::GetCurrent();
290     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
291 
292     icu::UnicodeString utf16Str;
293     if (thisStr->IsUtf16()) {
294         utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast<int32_t>(thisStr->GetUtf16Length())};
295     } else {
296         utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()),
297                                        static_cast<int32_t>(thisStr->GetLength())};
298     }
299     auto res = utf16Str.toUpper(locale);
300     return EtsString::CreateFromUtf16(reinterpret_cast<const uint16_t *>(res.getTerminatedBuffer()), res.length());
301 }
302 
ParseSingleBCP47LanguageTag(EtsString * langTag,icu::Locale & locale)303 UErrorCode ParseSingleBCP47LanguageTag(EtsString *langTag, icu::Locale &locale)
304 {
305     if (langTag == nullptr) {
306         locale = icu::Locale::getDefault();
307         return U_ZERO_ERROR;
308     }
309 
310     PandaVector<uint8_t> buf;
311     std::string_view locTag = langTag->ConvertToStringView(&buf);
312     icu::StringPiece sp {locTag.data(), static_cast<int32_t>(locTag.size())};
313     UErrorCode status = U_ZERO_ERROR;
314     locale = icu::Locale::forLanguageTag(sp, status);
315     return status;
316 }
317 
StdCoreStringToUpperCase(EtsString * thisStr)318 EtsString *StdCoreStringToUpperCase(EtsString *thisStr)
319 {
320     return ToUpperCase(thisStr, icu::Locale::getDefault());
321 }
322 
StdCoreStringToLowerCase(EtsString * thisStr)323 EtsString *StdCoreStringToLowerCase(EtsString *thisStr)
324 {
325     return ToLowerCase(thisStr, icu::Locale::getDefault());
326 }
327 
StdCoreStringToLocaleUpperCase(EtsString * thisStr,EtsString * langTag)328 EtsString *StdCoreStringToLocaleUpperCase(EtsString *thisStr, EtsString *langTag)
329 {
330     ASSERT(langTag != nullptr);
331 
332     icu::Locale locale;
333     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
334     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
335         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
336         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
337         return nullptr;
338     }
339     return ToUpperCase(thisStr, locale);
340 }
341 
StdCoreStringToLocaleLowerCase(EtsString * thisStr,EtsString * langTag)342 EtsString *StdCoreStringToLocaleLowerCase(EtsString *thisStr, EtsString *langTag)
343 {
344     ASSERT(langTag != nullptr);
345 
346     icu::Locale locale;
347     auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale);
348     if (UNLIKELY(U_FAILURE(localeParseStatus))) {
349         auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported";
350         ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message);
351         return nullptr;
352     }
353     return ToLowerCase(thisStr, locale);
354 }
355 
StdCoreStringIndexOfAfter(EtsString * s,uint16_t ch,ets_int fromIndex)356 ets_int StdCoreStringIndexOfAfter(EtsString *s, uint16_t ch, ets_int fromIndex)
357 {
358     return ark::intrinsics::StringIndexOfU16(s, ch, fromIndex);
359 }
360 
StdCoreStringIndexOf(EtsString * s,uint16_t ch)361 ets_int StdCoreStringIndexOf(EtsString *s, uint16_t ch)
362 {
363     return StdCoreStringIndexOfAfter(s, ch, 0);
364 }
365 
StdCoreStringIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)366 ets_int StdCoreStringIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
367 {
368     ASSERT(thisStr != nullptr && patternStr != nullptr);
369     return thisStr->GetCoreType()->IndexOf(patternStr->GetCoreType(), fromIndex);
370 }
371 
StdCoreStringLastIndexOfString(EtsString * thisStr,EtsString * patternStr,ets_int fromIndex)372 ets_int StdCoreStringLastIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex)
373 {
374     ASSERT(thisStr != nullptr && patternStr != nullptr);
375     // "abc".lastIndexOf("ab", -10) will return 0
376     return thisStr->GetCoreType()->LastIndexOf(patternStr->GetCoreType(), std::max(fromIndex, 0));
377 }
378 
StdCoreStringCodePointToChar(ets_int codePoint)379 ets_int StdCoreStringCodePointToChar(ets_int codePoint)
380 {
381     icu::UnicodeString uniStr((UChar32)codePoint);
382     uint32_t ret = bit_cast<uint16_t>(uniStr.charAt(0));
383     // if codepoint contains a surrogate pair
384     // encode it into int with higher bits being second char
385     if (uniStr.length() > 1) {
386         constexpr uint32_t BITS_IN_CHAR = 16;
387         ret |= static_cast<uint32_t>(bit_cast<uint16_t>(uniStr.charAt(1))) << BITS_IN_CHAR;
388     }
389     return bit_cast<ets_int>(ret);
390 }
391 
StdCoreStringHashCode(EtsString * thisStr)392 int32_t StdCoreStringHashCode(EtsString *thisStr)
393 {
394     ASSERT(thisStr != nullptr);
395     return thisStr->GetCoreType()->GetHashcode();
396 }
397 
StdCoreStringIsCompressed(EtsString * thisStr)398 EtsBoolean StdCoreStringIsCompressed(EtsString *thisStr)
399 {
400     ASSERT(thisStr != nullptr);
401     return ToEtsBoolean(thisStr->GetCoreType()->IsMUtf8());
402 }
403 
StdCoreStringConcat2(EtsString * str1,EtsString * str2)404 EtsString *StdCoreStringConcat2(EtsString *str1, EtsString *str2)
405 {
406     auto s1 = reinterpret_cast<coretypes::String *>(str1);
407     auto s2 = reinterpret_cast<coretypes::String *>(str2);
408     return reinterpret_cast<EtsString *>(CoreStringConcat2(s1, s2));
409 }
410 
StdCoreStringConcat3(EtsString * str1,EtsString * str2,EtsString * str3)411 EtsString *StdCoreStringConcat3(EtsString *str1, EtsString *str2, EtsString *str3)
412 {
413     auto s1 = reinterpret_cast<coretypes::String *>(str1);
414     auto s2 = reinterpret_cast<coretypes::String *>(str2);
415     auto s3 = reinterpret_cast<coretypes::String *>(str3);
416     return reinterpret_cast<EtsString *>(CoreStringConcat3(s1, s2, s3));
417 }
418 
StdCoreStringConcat4(EtsString * str1,EtsString * str2,EtsString * str3,EtsString * str4)419 EtsString *StdCoreStringConcat4(EtsString *str1, EtsString *str2, EtsString *str3, EtsString *str4)
420 {
421     auto s1 = reinterpret_cast<coretypes::String *>(str1);
422     auto s2 = reinterpret_cast<coretypes::String *>(str2);
423     auto s3 = reinterpret_cast<coretypes::String *>(str3);
424     auto s4 = reinterpret_cast<coretypes::String *>(str4);
425     return reinterpret_cast<EtsString *>(CoreStringConcat4(s1, s2, s3, s4));
426 }
427 
StdCoreStringCompareTo(EtsString * str1,EtsString * str2)428 ets_int StdCoreStringCompareTo(EtsString *str1, EtsString *str2)
429 {
430     /* corner cases */
431     if (str1->GetLength() == 0) {
432         return -str2->GetLength();
433     }
434     if (str2->GetLength() == 0) {
435         return str1->GetLength();
436     }
437 
438     /* use the default implementation otherwise */
439     return str1->GetCoreType()->Compare(str2->GetCoreType());
440 }
441 
StdCoreStringTrimLeft(EtsString * thisStr)442 EtsString *StdCoreStringTrimLeft(EtsString *thisStr)
443 {
444     return thisStr->TrimLeft();
445 }
446 
StdCoreStringTrimRight(EtsString * thisStr)447 EtsString *StdCoreStringTrimRight(EtsString *thisStr)
448 {
449     return thisStr->TrimRight();
450 }
451 
StdCoreStringTrim(EtsString * thisStr)452 EtsString *StdCoreStringTrim(EtsString *thisStr)
453 {
454     return thisStr->Trim();
455 }
456 
StdCoreStringStartsWith(EtsString * thisStr,EtsString * prefix,EtsInt fromIndex)457 EtsBoolean StdCoreStringStartsWith(EtsString *thisStr, EtsString *prefix, EtsInt fromIndex)
458 {
459     ASSERT(thisStr != nullptr);
460     return thisStr->StartsWith(prefix, fromIndex);
461 }
462 
StdCoreStringEndsWith(EtsString * thisStr,EtsString * suffix,EtsInt endIndex)463 EtsBoolean StdCoreStringEndsWith(EtsString *thisStr, EtsString *suffix, EtsInt endIndex)
464 {
465     ASSERT(thisStr != nullptr);
466     return thisStr->EndsWith(suffix, endIndex);
467 }
468 
StdCoreStringFromCharCode(ObjectHeader * array)469 EtsString *StdCoreStringFromCharCode(ObjectHeader *array)
470 {
471     ASSERT(array != nullptr);
472     auto *charCodes = EtsBoxedDoubleArray::FromEtsObject(EtsObject::FromCoreType(array));
473     ASSERT(charCodes->GetData() != nullptr);
474     return EtsString::CreateNewStringFromCharCode(charCodes->GetData());
475 }
476 
StdCoreStringFromCharCodeSingle(EtsDouble charCode)477 EtsString *StdCoreStringFromCharCodeSingle(EtsDouble charCode)
478 {
479     if (LIKELY(Runtime::GetOptions().IsUseStringCaches())) {
480         constexpr double UTF16_CHAR_DIVIDER = 0x10000;
481         auto character = static_cast<uint16_t>(static_cast<int64_t>(std::fmod(charCode, UTF16_CHAR_DIVIDER)));
482         if (character < EtsPlatformTypes::ASCII_CHAR_TABLE_SIZE && coretypes::String::IsASCIICharacter(character)) {
483             auto *cache = PlatformTypes()->GetAsciiCacheTable();
484             return static_cast<EtsString *>(cache->Get(character));
485         }
486     }
487     return EtsString::CreateNewStringFromCharCode(charCode);
488 }
489 
490 /* the allocation routine to create an unitialized string of the given size */
AllocateStringObject(size_t length,bool compressed)491 extern "C" EtsString *AllocateStringObject(size_t length, bool compressed)
492 {
493     auto ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS);
494     auto vm = Runtime::GetCurrent()->GetPandaVM();
495     ASSERT(vm != nullptr);
496     auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
497     size_t size =
498         compressed ? coretypes::String::ComputeSizeMUtf8(length) : coretypes::String::ComputeSizeUtf16(length);
499     auto string = reinterpret_cast<EtsString *>(vm->GetHeapManager()->AllocateObject(
500         stringClass, size, DEFAULT_ALIGNMENT, nullptr, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT));
501     if (string != nullptr) {
502         // After setting length we should have a full barrier, so this write should happens-before barrier
503         TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
504         auto len = ToNativePtr<uint32_t>(ToUintPtr(string) + coretypes::String::GetLengthOffset());
505         auto hashcode = ToNativePtr<uint32_t>(ToUintPtr(string) + coretypes::String::GetHashcodeOffset());
506         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
507         len[0] = compressed ? (length << 1U) : (length << 1U) | 1U;
508         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
509         hashcode[0] = 0U;
510         TSAN_ANNOTATE_IGNORE_WRITES_END();
511         // Witout full memory barrier it is possible that architectures with
512         // weak memory order can try fetching string legth before it's set
513         arch::FullMemoryBarrier();
514     }
515     return string;
516 }
517 
StdCoreStringRepeat(EtsString * str,EtsInt count)518 EtsString *StdCoreStringRepeat(EtsString *str, EtsInt count)
519 {
520     auto length = str->GetLength();
521 
522     if (UNLIKELY(count < 0)) {
523         PandaString message = "repeat: count is negative";
524         auto coroutine = EtsCoroutine::GetCurrent();
525         ThrowEtsException(coroutine, panda_file_items::class_descriptors::RANGE_ERROR, message);
526         return nullptr;
527     }
528 
529     if (length == 0 || count == 0) {
530         return EtsString::CreateFromUtf8(nullptr, 0);
531     }
532 
533     auto thread = ManagedThread::GetCurrent();
534     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
535     VMHandle<coretypes::String> sHandle(thread, str->GetCoreType());
536 
537     int size = length * count;
538     auto compressed = str->GetCoreType()->IsMUtf8();
539     auto rep = AllocateStringObject(size, compressed);
540     if (UNLIKELY(rep == nullptr)) {
541         PandaString message = "repeat: memory allocation failed";
542         auto coroutine = EtsCoroutine::GetCurrent();
543         ThrowEtsException(coroutine, panda_file_items::class_descriptors::OUT_OF_MEMORY_ERROR, message);
544         return nullptr;
545     }
546 
547     if (compressed) {
548         auto strData = sHandle.GetPtr()->GetDataMUtf8();
549         auto repData = rep->GetDataMUtf8();
550         for (int i = 0; i < count; ++i) {
551             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
552             std::copy_n(strData, length, repData + i * length);
553         }
554     } else {
555         auto strData = sHandle.GetPtr()->GetDataUtf16();
556         auto repData = rep->GetDataUtf16();
557         for (int i = 0; i < count; ++i) {
558             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
559             std::copy_n(strData, length, repData + i * length);
560         }
561     }
562     return rep;
563 }
564 
StdCoreStringGet(EtsString * str,EtsInt index)565 uint16_t StdCoreStringGet(EtsString *str, EtsInt index)
566 {
567     return StdCoreStringCharAt(str, index);
568 }
569 
570 }  // namespace ark::ets::intrinsics
571