• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19 
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/hash.h"
22 #include "libpandabase/utils/span.h"
23 #include "runtime/arch/memory_helpers.h"
24 #include "runtime/include/coretypes/array.h"
25 #include "runtime/include/coretypes/string-inl.h"
26 #include "runtime/include/runtime.h"
27 #include "runtime/handle_base-inl.h"
28 #include "runtime/include/panda_vm.h"
29 
30 namespace ark::coretypes {
31 
32 bool String::compressedStringsEnabled_ = true;
33 
34 /* static */
CreateFromString(String * str,const LanguageContext & ctx,PandaVM * vm)35 String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaVM *vm)
36 {
37     ASSERT(str != nullptr);
38     // allocator may trig gc and move str, need to hold it
39     auto thread = ManagedThread::GetCurrent();
40     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
41     VMHandle<String> strHandle(thread, str);
42     ASSERT(strHandle.GetPtr() != nullptr);
43     auto string = AllocStringObject(strHandle->GetLength(), !strHandle->IsUtf16(), ctx, vm);
44     if (string == nullptr) {
45         return nullptr;
46     }
47 
48     // retrive str after gc
49     str = strHandle.GetPtr();
50     string->hashcode_ = str->hashcode_;
51 
52     uint32_t length = str->GetLength();
53     // After memcpy we should have a full barrier, so this writes should happen-before barrier
54     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
55     if (str->IsUtf16()) {
56         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), str->GetDataUtf16(),
57                  ComputeDataSizeUtf16(length));
58     } else {
59         memcpy_s(string->GetDataMUtf8(), string->GetLength(), str->GetDataMUtf8(), length);
60     }
61     TSAN_ANNOTATE_IGNORE_WRITES_END();
62     // String is supposed to be a constant object, so all its data should be visible by all threads
63     arch::FullMemoryBarrier();
64 
65     return string;
66 }
67 
68 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,size_t mutf8Length,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)69 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, size_t mutf8Length, uint32_t utf16Length,
70                                 bool canBeCompressed, const LanguageContext &ctx, PandaVM *vm, bool movable,
71                                 bool pinned)
72 {
73     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable, pinned);
74     if (string == nullptr) {
75         return nullptr;
76     }
77 
78     ASSERT(string->hashcode_ == 0);
79     // After copying we should have a full barrier, so this writes should happen-before barrier
80     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
81     if (canBeCompressed) {
82         memcpy_s(string->GetDataMUtf8(), string->GetLength(), mutf8Data, utf16Length);
83     } else {
84         utf::ConvertMUtf8ToUtf16(mutf8Data, mutf8Length, string->GetDataUtf16());
85     }
86     TSAN_ANNOTATE_IGNORE_WRITES_END();
87     // String is supposed to be a constant object, so all its data should be visible by all threads
88     arch::FullMemoryBarrier();
89     return string;
90 }
91 
92 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)93 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, const LanguageContext &ctx, PandaVM *vm,
94                                 bool movable, bool pinned)
95 {
96     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
97     auto mutf8Length = utf::Mutf8Size(mutf8Data);
98     ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
99     return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
100 }
101 
102 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)103 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed,
104                                 const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned)
105 {
106     auto mutf8Length = utf::Mutf8Size(mutf8Data);
107     ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
108     ASSERT(canBeCompressed == CanBeCompressedMUtf8(mutf8Data));
109     return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
110 }
111 
112 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)113 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, const LanguageContext &ctx, PandaVM *vm, bool movable,
114                                 bool pinned)
115 {
116     size_t mutf8Length = utf::Mutf8Size(mutf8Data);
117     size_t utf16Length = utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length);
118     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
119     return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
120 }
121 
122 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)123 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint32_t utf16Length,
124                                 const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned)
125 {
126     ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
127     auto canBeCompressed = CanBeCompressedMUtf8(mutf8Data, mutf8Length);
128     return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
129 }
130 
131 /* static */
CreateFromUtf8(const uint8_t * utf8Data,uint32_t utf8Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)132 String *String::CreateFromUtf8(const uint8_t *utf8Data, uint32_t utf8Length, const LanguageContext &ctx, PandaVM *vm,
133                                bool movable, bool pinned)
134 {
135     coretypes::String *s = nullptr;
136     auto utf16Length = utf::Utf8ToUtf16Size(utf8Data, utf8Length);
137     if (CanBeCompressedMUtf8(utf8Data, utf8Length)) {
138         // ascii string have equal representation in utf8 and mutf8 formats
139         s = coretypes::String::CreateFromMUtf8(utf8Data, utf8Length, utf16Length, true, ctx, vm, movable, pinned);
140     } else {
141         PandaVector<uint16_t> tmpBuffer(utf16Length);
142         [[maybe_unused]] auto len =
143             utf::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Length, utf16Length, 0);
144         ASSERT(len == utf16Length);
145         s = coretypes::String::CreateFromUtf16(tmpBuffer.data(), utf16Length, ctx, vm, movable, pinned);
146     }
147     return s;
148 }
149 
150 /* static */
CreateFromUtf16(const uint16_t * utf16Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)151 String *String::CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Length, bool canBeCompressed,
152                                 const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned)
153 {
154     ASSERT(canBeCompressed == CanBeCompressed(utf16Data, utf16Length));
155     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable, pinned);
156     if (string == nullptr) {
157         return nullptr;
158     }
159 
160     ASSERT(string->hashcode_ == 0);
161     // After copying we should have a full barrier, so this writes should happen-before barrier
162     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
163     if (canBeCompressed) {
164         CopyUtf16AsMUtf8(utf16Data, string->GetDataMUtf8(), utf16Length);
165     } else {
166         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), utf16Data, utf16Length << 1UL);
167     }
168     TSAN_ANNOTATE_IGNORE_WRITES_END();
169     // String is supposed to be a constant object, so all its data should be visible by all threads
170     arch::FullMemoryBarrier();
171     return string;
172 }
173 
174 /* static */
CreateFromUtf16(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)175 String *String::CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Length, const LanguageContext &ctx,
176                                 PandaVM *vm, bool movable, bool pinned)
177 {
178     bool compressable = CanBeCompressed(utf16Data, utf16Length);
179     return CreateFromUtf16(utf16Data, utf16Length, compressable, ctx, vm, movable, pinned);
180 }
181 
182 /* static */
CreateEmptyString(const LanguageContext & ctx,PandaVM * vm)183 String *String::CreateEmptyString(const LanguageContext &ctx, PandaVM *vm)
184 {
185     uint16_t data = 0;
186     return CreateFromUtf16(&data, 0, ctx, vm);
187 }
188 
189 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16From,uint8_t * mutf8To,uint32_t utf16Length)190 void String::CopyUtf16AsMUtf8(const uint16_t *utf16From, uint8_t *mutf8To, uint32_t utf16Length)
191 {
192     Span<const uint16_t> from(utf16From, utf16Length);
193     Span<uint8_t> to(mutf8To, utf16Length);
194     for (uint32_t i = 0; i < utf16Length; i++) {
195         to[i] = from[i];
196     }
197 }
198 
199 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,const LanguageContext & ctx,PandaVM * vm)200 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, const LanguageContext &ctx,
201                                          PandaVM *vm)
202 {
203     ASSERT(chararray != nullptr);
204     // allocator may trig gc and move array, need to hold it
205     auto thread = ManagedThread::GetCurrent();
206     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
207     VMHandle<Array> arrayHandle(thread, chararray);
208     ASSERT(arrayHandle.GetPtr() != nullptr);
209 
210     // There is a potential data race between read of src in CanBeCompressed and write of destination buf
211     // in CopyDataRegionUtf16. The src is a cast from chararray comming from managed object.
212     // Hence the race is reported on managed object, which has a synchronization on a high level.
213     // TSAN does not see such synchronization, thus we ignore such races here.
214     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
215     // NOLINTNEXTLINE(readability-identifier-naming)
216     const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
217     bool canBeCompressed = CanBeCompressed(src, length);
218     TSAN_ANNOTATE_IGNORE_WRITES_END();
219     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
220     if (string == nullptr) {
221         return nullptr;
222     }
223 
224     // retrieve src since gc may move it
225     src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + (offset << 1UL));
226     ASSERT(string->hashcode_ == 0);
227     // After copying we should have a full barrier, so this writes should happen-before barrier
228     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
229     if (canBeCompressed) {
230         CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
231     } else {
232         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), src, length << 1UL);
233     }
234     TSAN_ANNOTATE_IGNORE_WRITES_END();
235     // String is supposed to be a constant object, so all its data should be visible by all threads
236     arch::FullMemoryBarrier();
237     return string;
238 }
239 
240 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t highByte,Array * bytearray,const LanguageContext & ctx,PandaVM * vm)241 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t highByte, Array *bytearray,
242                                          const LanguageContext &ctx, PandaVM *vm)
243 {
244     ASSERT(length != 0);
245     ASSERT(bytearray != nullptr);
246     // allocator may trig gc and move array, need to hold it
247     auto thread = ManagedThread::GetCurrent();
248     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
249     VMHandle<Array> arrayHandle(thread, bytearray);
250     ASSERT(arrayHandle.GetPtr() != nullptr);
251 
252     constexpr size_t BYTE_MASK = 0xFF;
253 
254     // NOLINTNEXTLINE(readability-identifier-naming)
255     const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
256     highByte &= BYTE_MASK;
257     bool canBeCompressed = CanBeCompressedMUtf8(src, length) && (highByte == 0);
258     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
259     if (string == nullptr) {
260         return nullptr;
261     }
262 
263     // retrieve src since gc may move it
264     src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + offset);
265     ASSERT(string->hashcode_ == 0);
266     // After copying we should have a full barrier, so this writes should happen-before barrier
267     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
268     if (canBeCompressed) {
269         Span<const uint8_t> from(src, length);
270         Span<uint8_t> to(string->GetDataMUtf8(), length);
271         for (uint32_t i = 0; i < length; ++i) {
272             to[i] = (from[i] & BYTE_MASK);
273         }
274     } else {
275         Span<const uint8_t> from(src, length);
276         Span<uint16_t> to(string->GetDataUtf16(), length);
277         for (uint32_t i = 0; i < length; ++i) {
278             to[i] = (highByte << 8U) + (from[i] & BYTE_MASK);
279         }
280     }
281     TSAN_ANNOTATE_IGNORE_WRITES_END();
282 
283     // String is supposed to be a constant object, so all its data should be visible by all threads
284     arch::FullMemoryBarrier();
285     return string;
286 }
287 
288 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)289 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
290 {
291     for (int32_t i = 0; i < count; ++i) {
292         int32_t charDiff = static_cast<int32_t>(lhsSp[i]) - static_cast<int32_t>(rhsSp[i]);
293         if (charDiff != 0) {
294             return charDiff;
295         }
296     }
297     return 0;
298 }
299 
300 template <typename T>
CompareBytesBlock(T * lstrPt,T * rstrPt,int32_t minCount)301 int32_t CompareBytesBlock(T *lstrPt, T *rstrPt, int32_t minCount)
302 {
303     constexpr int32_t BYTES_CNT = sizeof(size_t);
304     static_assert(BYTES_CNT >= sizeof(T));
305     static_assert(BYTES_CNT % sizeof(T) == 0);
306     int32_t totalBytes = minCount * sizeof(T);
307     auto lhsBlock = reinterpret_cast<size_t *>(lstrPt);
308     auto rhsBlock = reinterpret_cast<size_t *>(rstrPt);
309     int32_t curBytePos = 0;
310     while (curBytePos + BYTES_CNT <= totalBytes) {
311         if (*lhsBlock == *rhsBlock) {
312             curBytePos += BYTES_CNT;
313             lhsBlock++;
314             rhsBlock++;
315         } else {
316             break;
317         }
318     }
319     int32_t curElementPos = curBytePos / sizeof(T);
320     for (int32_t i = curElementPos; i < minCount; ++i) {
321         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
322         int32_t charDiff = static_cast<int32_t>(lstrPt[i]) - static_cast<int32_t>(rstrPt[i]);
323         if (charDiff != 0) {
324             return charDiff;
325         }
326     }
327 
328     return 0;
329 }
330 
Compare(String * rstr)331 int32_t String::Compare(String *rstr)
332 {
333     String *lstr = this;
334     if (lstr == rstr) {
335         return 0;
336     }
337     ASSERT(lstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
338     ASSERT(rstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
339     auto lstrLeng = static_cast<int32_t>(lstr->GetLength());
340     auto rstrLeng = static_cast<int32_t>(rstr->GetLength());
341     int32_t lengRet = lstrLeng - rstrLeng;
342     int32_t minCount = (lengRet < 0) ? lstrLeng : rstrLeng;
343     bool lstrIsUtf16 = lstr->IsUtf16();
344     bool rstrIsUtf16 = rstr->IsUtf16();
345     if (!lstrIsUtf16 && !rstrIsUtf16) {
346         int32_t charDiff = CompareBytesBlock(lstr->GetDataMUtf8(), rstr->GetDataMUtf8(), minCount);
347         if (charDiff != 0) {
348             return charDiff;
349         }
350     } else if (!lstrIsUtf16) {
351         Span<uint8_t> lhsSp(lstr->GetDataMUtf8(), lstrLeng);
352         Span<uint16_t> rhsSp(rstr->GetDataUtf16(), rstrLeng);
353         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
354         if (charDiff != 0) {
355             return charDiff;
356         }
357     } else if (!rstrIsUtf16) {
358         Span<uint16_t> lhsSp(lstr->GetDataUtf16(), lstrLeng);
359         Span<uint8_t> rhsSp(rstr->GetDataMUtf8(), rstrLeng);
360         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
361         if (charDiff != 0) {
362             return charDiff;
363         }
364     } else {
365         int32_t charDiff = CompareBytesBlock(lstr->GetDataUtf16(), rstr->GetDataUtf16(), minCount);
366         if (charDiff != 0) {
367             return charDiff;
368         }
369     }
370     return lengRet;
371 }
372 
373 template <typename T1, typename T2>
SubstringEquals(Span<const T1> & string,Span<const T2> & pattern,int32_t pos)374 static inline ALWAYS_INLINE int32_t SubstringEquals(Span<const T1> &string, Span<const T2> &pattern, int32_t pos)
375 {
376     ASSERT(pos + pattern.size() <= string.size());
377     if constexpr (std::is_same_v<T1, T2>) {
378         return std::memcmp(string.begin() + pos, pattern.begin(), pattern.size()) == 0;
379     }
380     return std::equal(pattern.begin(), pattern.end(), string.begin() + pos);
381 }
382 
383 /*
384  * Tailed Substring method (based on D. Cantone and S. Faro: Searching for a substring with constant extra-space
385  * complexity). O(nm) worst-case but reported to have good performance both on random and natural language data
386  * Substring s of t is called tailed-substring, if the last character of s does not repeat elsewhere in s
387  */
388 /* static */
389 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)390 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
391 {
392     int32_t maxTailedLen = 1;
393     auto tailedEnd = static_cast<int32_t>(rhsSp.size() - 1);
394     int32_t maxTailedEnd = tailedEnd;
395     // Phase 1: search in the beginning of string while computing maximal tailed-substring length
396     auto searchChar = rhsSp[tailedEnd];
397     auto *shiftedLhs = lhsSp.begin() + tailedEnd;
398     while (pos <= max) {
399         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
400         if (searchChar != shiftedLhs[pos]) {
401             pos++;
402             continue;
403         }
404         if (SubstringEquals(lhsSp, rhsSp, pos)) {
405             return pos;
406         }
407         auto tailedStart = tailedEnd - 1;
408         while (tailedStart >= 0 && rhsSp[tailedStart] != searchChar) {
409             tailedStart--;
410         }
411         if (maxTailedLen < tailedEnd - tailedStart) {
412             maxTailedLen = tailedEnd - tailedStart;
413             maxTailedEnd = tailedEnd;
414         }
415         if (maxTailedLen >= tailedEnd) {
416             break;
417         }
418         pos += tailedEnd - tailedStart;
419         tailedEnd--;
420         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
421         shiftedLhs--;
422         searchChar = rhsSp[tailedEnd];
423     }
424     // Phase 2: search in the remainder of string using computed maximal tailed-substring length
425     searchChar = rhsSp[maxTailedEnd];
426     shiftedLhs = lhsSp.begin() + maxTailedEnd;
427     while (pos <= max) {
428         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
429         if (searchChar != shiftedLhs[pos]) {
430             pos++;
431             continue;
432         }
433         if (SubstringEquals(lhsSp, rhsSp, pos)) {
434             return pos;
435         }
436         pos += maxTailedLen;
437     }
438     return -1;
439 }
440 
441 // Search of the last occurence is equivalent to search of the first occurence of
442 // reversed pattern in reversed string
443 template <typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)444 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
445 {
446     int32_t maxTailedLen = 1;
447     int32_t tailedStart = 0;
448     int32_t maxTailedStart = tailedStart;
449     auto patternSize = static_cast<int32_t>(rhsSp.size());
450     // Phase 1: search in the end of string while computing maximal tailed-substring length
451     auto searchChar = rhsSp[tailedStart];
452     auto *shiftedLhs = lhsSp.begin() + tailedStart;
453     while (pos >= 0) {
454         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
455         if (searchChar != shiftedLhs[pos]) {
456             pos--;
457             continue;
458         }
459         if (SubstringEquals(lhsSp, rhsSp, pos)) {
460             return pos;
461         }
462         auto tailedEnd = tailedStart + 1;
463         while (tailedEnd < patternSize && rhsSp[tailedEnd] != searchChar) {
464             tailedEnd++;
465         }
466         if (maxTailedLen < tailedEnd - tailedStart) {
467             maxTailedLen = tailedEnd - tailedStart;
468             maxTailedStart = tailedStart;
469         }
470         if (maxTailedLen >= patternSize - tailedStart) {
471             break;
472         }
473         pos -= tailedEnd - tailedStart;
474         tailedStart++;
475         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
476         shiftedLhs++;
477         searchChar = rhsSp[tailedStart];
478     }
479     // Phase 2: search in the remainder of string using computed maximal tailed-substring length
480     searchChar = rhsSp[maxTailedStart];
481     shiftedLhs = lhsSp.begin() + maxTailedStart;
482     while (pos >= 0) {
483         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
484         if (searchChar != shiftedLhs[pos]) {
485             pos--;
486             continue;
487         }
488         if (SubstringEquals(lhsSp, rhsSp, pos)) {
489             return pos;
490         }
491         pos -= maxTailedLen;
492     }
493     return -1;
494 }
495 
GetCompressionAndLength(ark::coretypes::String * string)496 static inline ALWAYS_INLINE std::pair<bool, int32_t> GetCompressionAndLength(ark::coretypes::String *string)
497 {
498     ASSERT(string->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
499     ASSERT(string != nullptr);
500     return {string->IsMUtf8(), static_cast<int32_t>(string->GetLength())};
501 }
502 
IndexOf(String * rhs,int32_t pos)503 int32_t String::IndexOf(String *rhs, int32_t pos)
504 {
505     String *lhs = this;
506     auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
507     auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
508 
509     if (pos < 0) {
510         pos = 0;
511     }
512 
513     if (rhs_count == 0) {
514         return std::min(lhs_count, pos);
515     }
516 
517     int32_t max = lhs_count - rhs_count;
518     // for pos > max IndexOf impl will return -1
519     if (lhs_utf8 && rhs_utf8) {
520         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
521         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
522         return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
523     } else if (!lhs_utf8 && !rhs_utf8) {  // NOLINT(readability-else-after-return)
524         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
525         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
526         return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
527     } else if (rhs_utf8) {
528         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
529         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
530         return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
531     } else {  // NOLINT(readability-else-after-return)
532         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
533         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
534         return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
535     }
536 }
537 
LastIndexOf(String * rhs,int32_t pos)538 int32_t String::LastIndexOf(String *rhs, int32_t pos)
539 {
540     String *lhs = this;
541     auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
542     auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
543 
544     int32_t max = lhs_count - rhs_count;
545 
546     if (pos > max) {
547         pos = max;
548     }
549 
550     if (pos < 0) {
551         return -1;
552     }
553 
554     if (rhs_count == 0) {
555         return pos;
556     }
557 
558     if (lhs_utf8 && rhs_utf8) {
559         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
560         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
561         return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
562     } else if (!lhs_utf8 && !rhs_utf8) {  // NOLINT(readability-else-after-return)
563         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
564         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
565         return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
566     } else if (rhs_utf8) {
567         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
568         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
569         return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
570     } else {  // NOLINT(readability-else-after-return)
571         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
572         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
573         return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
574     }
575 }
576 
577 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Length)578 bool String::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Length)
579 {
580     if (!compressedStringsEnabled_) {
581         return false;
582     }
583     bool isCompressed = true;
584     Span<const uint16_t> data(utf16Data, utf16Length);
585     for (uint32_t i = 0; i < utf16Length; i++) {
586         if (!IsASCIICharacter(data[i])) {
587             isCompressed = false;
588             break;
589         }
590     }
591     return isCompressed;
592 }
593 
594 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length)595 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length)
596 {
597     if (!compressedStringsEnabled_) {
598         return false;
599     }
600     bool isCompressed = true;
601     Span<const uint8_t> data(mutf8Data, mutf8Length);
602     for (uint32_t i = 0; i < mutf8Length; i++) {
603         if (!IsASCIICharacter(data[i])) {
604             isCompressed = false;
605             break;
606         }
607     }
608     return isCompressed;
609 }
610 
611 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data)612 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data)
613 {
614     return compressedStringsEnabled_ ? utf::IsMUtf8OnlySingleBytes(mutf8Data) : false;
615 }
616 
617 /* static */
CanBeCompressedUtf16(const uint16_t * utf16Data,uint32_t utf16Length,uint16_t non)618 bool String::CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non)
619 {
620     if (!compressedStringsEnabled_) {
621         return false;
622     }
623     bool isCompressed = true;
624     Span<const uint16_t> data(utf16Data, utf16Length);
625     for (uint32_t i = 0; i < utf16Length; i++) {
626         if (!IsASCIICharacter(data[i]) && data[i] != non) {
627             isCompressed = false;
628             break;
629         }
630     }
631     return isCompressed;
632 }
633 
634 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint16_t non)635 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non)
636 {
637     if (!compressedStringsEnabled_) {
638         return false;
639     }
640     bool isCompressed = true;
641     Span<const uint8_t> data(mutf8Data, mutf8Length);
642     for (uint32_t i = 0; i < mutf8Length; i++) {
643         if (!IsASCIICharacter(data[i]) && data[i] != non) {
644             isCompressed = false;
645             break;
646         }
647     }
648     return isCompressed;
649 }
650 
651 /* static */
StringsAreEqual(String * str1,String * str2)652 bool String::StringsAreEqual(String *str1, String *str2)
653 {
654     ASSERT(str1 != nullptr);
655     ASSERT(str2 != nullptr);
656 
657     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
658         return false;
659     }
660 
661     if (str1->IsUtf16()) {
662         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
663         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
664         return String::StringsAreEquals(data1, data2);
665     } else {  // NOLINT(readability-else-after-return)
666         Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
667         Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
668         return String::StringsAreEquals(data1, data2);
669     }
670 }
671 
672 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length)673 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length)
674 {
675     ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data));
676     if (str1->GetLength() != utf16Length) {
677         return false;
678     }
679     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
680     return StringsAreEqualMUtf8(str1, mutf8Data, utf16Length, canBeCompressed);
681 }
682 
683 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)684 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
685 {
686     bool result = true;
687     if (str1->GetLength() != utf16Length) {
688         result = false;
689     } else {
690         bool str1CanBeCompressed = !str1->IsUtf16();
691         bool data2CanBeCompressed = canBeCompressed;
692         if (str1CanBeCompressed != data2CanBeCompressed) {
693             return false;
694         }
695 
696         ASSERT(str1CanBeCompressed == data2CanBeCompressed);
697         if (str1CanBeCompressed) {
698             Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
699             Span<const uint8_t> data2(mutf8Data, utf16Length);
700             result = String::StringsAreEquals(data1, data2);
701         } else {
702             result = IsMutf8EqualsUtf16(mutf8Data, str1->GetDataUtf16(), str1->GetLength());
703         }
704     }
705     return result;
706 }
707 
708 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16Data,uint32_t utf16DataLength)709 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16Data, uint32_t utf16DataLength)
710 {
711     bool result = true;
712     if (str1->GetLength() != utf16DataLength) {
713         result = false;
714     } else if (!str1->IsUtf16()) {
715         result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16Data, utf16DataLength);
716     } else {
717         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
718         Span<const uint16_t> data2(utf16Data, utf16DataLength);
719         result = String::StringsAreEquals(data1, data2);
720     }
721     return result;
722 }
723 
724 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,uint32_t utf8DataLength,const uint16_t * utf16Data,uint32_t utf16DataLength)725 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8DataLength, const uint16_t *utf16Data,
726                                 uint32_t utf16DataLength)
727 {
728     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
729     auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
730     [[maybe_unused]] auto convertedStringSize =
731         utf::ConvertRegionMUtf8ToUtf16(utf8Data, tmpBuffer, utf8DataLength, utf16DataLength, 0);
732     ASSERT(convertedStringSize == utf16DataLength);
733 
734     Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
735     Span<const uint16_t> data2(utf16Data, utf16DataLength);
736     bool result = String::StringsAreEquals(data1, data2);
737     allocator->Delete(tmpBuffer);
738     return result;
739 }
740 
741 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,const uint16_t * utf16Data,uint32_t utf16DataLength)742 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength)
743 {
744     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
745     auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
746     utf::ConvertMUtf8ToUtf16(utf8Data, utf::Mutf8Size(utf8Data), tmpBuffer);
747 
748     Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
749     Span<const uint16_t> data2(utf16Data, utf16DataLength);
750     bool result = String::StringsAreEquals(data1, data2);
751     allocator->Delete(tmpBuffer);
752     return result;
753 }
754 
755 /* static */
756 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)757 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
758 {
759     return 0 == std::memcmp(str1.Data(), str2.Data(), str1.SizeBytes());
760 }
761 
ToCharArray(const LanguageContext & ctx)762 Array *String::ToCharArray(const LanguageContext &ctx)
763 {
764     // allocator may trig gc and move 'this', need to hold it
765     auto thread = ManagedThread::GetCurrent();
766     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
767     VMHandle<String> str(thread, this);
768     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
769     Array *array = Array::Create(klass, GetLength());
770     if (array == nullptr) {
771         return nullptr;
772     }
773 
774     if (str->IsUtf16()) {
775         Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
776         for (size_t i = 0; i < sp.size(); i++) {
777             array->Set<uint16_t>(i, sp[i]);
778         }
779     } else {
780         Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
781         for (size_t i = 0; i < sp.size(); i++) {
782             array->Set<uint16_t>(i, sp[i]);
783         }
784     }
785 
786     return array;
787 }
788 
789 /* static */
GetChars(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx)790 Array *String::GetChars(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx)
791 {
792     // allocator may trig gc and move 'src', need to hold it
793     auto thread = ManagedThread::GetCurrent();
794     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
795     VMHandle<String> str(thread, src);
796     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
797     Array *array = Array::Create(klass, utf16Length);
798     if (array == nullptr) {
799         return nullptr;
800     }
801 
802     if (str->IsUtf16()) {
803         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
804         Span<uint16_t> sp(str->GetDataUtf16() + start, utf16Length);
805         for (size_t i = 0; i < sp.size(); i++) {
806             array->Set<uint16_t>(i, sp[i]);
807         }
808     } else {
809         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
810         Span<uint8_t> sp(str->GetDataMUtf8() + start, utf16Length);
811         for (size_t i = 0; i < sp.size(); i++) {
812             array->Set<uint16_t>(i, sp[i]);
813         }
814     }
815 
816     return array;
817 }
818 
819 template <class T>
ComputeHashForData(const T * data,size_t size)820 static int32_t ComputeHashForData(const T *data, size_t size)
821 {
822     uint32_t hash = 0;
823 #if defined(__GNUC__)
824 #pragma GCC diagnostic push
825 #pragma GCC diagnostic ignored "-Wignored-attributes"
826     Span<const T> sp(data, size);
827 #pragma GCC diagnostic pop
828 #endif
829     for (auto c : sp) {
830         constexpr size_t SHIFT = 5;
831         hash = (hash << SHIFT) - hash + c;
832     }
833     return static_cast<int32_t>(hash);
834 }
835 
ComputeHashForMutf8(const uint8_t * mutf8Data)836 static int32_t ComputeHashForMutf8(const uint8_t *mutf8Data)
837 {
838     uint32_t hash = 0;
839     while (*mutf8Data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
840         constexpr size_t SHIFT = 5;
841         hash = (hash << SHIFT) - hash + *mutf8Data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
842     }
843     return static_cast<int32_t>(hash);
844 }
845 
ComputeHashcode()846 uint32_t String::ComputeHashcode()
847 {
848     uint32_t hash;
849     if (compressedStringsEnabled_) {
850         if (!IsUtf16()) {
851             hash = static_cast<uint32_t>(ComputeHashForData(GetDataMUtf8(), GetLength()));
852         } else {
853             hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
854         }
855     } else {
856         ASSERT(static_cast<size_t>(GetLength()) < (std::numeric_limits<size_t>::max() >> 1U));
857         hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
858     }
859     return hash;
860 }
861 
862 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length)863 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length)
864 {
865     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
866     return ComputeHashcodeMutf8(mutf8Data, utf16Length, canBeCompressed);
867 }
868 
869 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)870 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
871 {
872     uint32_t hash;
873     if (canBeCompressed) {
874         hash = static_cast<uint32_t>(ComputeHashForMutf8(mutf8Data));
875     } else {
876         // NOTE(alovkov): optimize it without allocation a temporary buffer
877         auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
878         auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16Length);
879         utf::ConvertMUtf8ToUtf16(mutf8Data, utf::Mutf8Size(mutf8Data), tmpBuffer);
880         hash = static_cast<uint32_t>(ComputeHashForData(tmpBuffer, utf16Length));
881         allocator->Delete(tmpBuffer);
882     }
883     return hash;
884 }
885 
886 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)887 uint32_t String::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
888 {
889     return ComputeHashForData(utf16Data, length);
890 }
891 
892 /* static */
DoReplace(String * src,uint16_t oldC,uint16_t newC,const LanguageContext & ctx,PandaVM * vm)893 String *String::DoReplace(String *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, PandaVM *vm)
894 {
895     ASSERT(src != nullptr);
896     auto length = static_cast<int32_t>(src->GetLength());
897     bool canBeCompressed = IsASCIICharacter(newC);
898     if (src->IsUtf16()) {
899         canBeCompressed = canBeCompressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, oldC);
900     } else {
901         canBeCompressed = canBeCompressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, oldC);
902     }
903 
904     // allocator may trig gc and move src, need to hold it
905     auto thread = ManagedThread::GetCurrent();
906     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
907     VMHandle<String> srcHandle(thread, src);
908     ASSERT(srcHandle.GetPtr() != nullptr);
909     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
910     if (string == nullptr) {
911         return nullptr;
912     }
913 
914     // retrieve src after gc
915     src = srcHandle.GetPtr();
916     ASSERT(string->hashcode_ == 0);
917 
918     // After replacing we should have a full barrier, so this writes should happen-before barrier
919     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
920     if (src->IsUtf16()) {
921         if (canBeCompressed) {
922             auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
923             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
924             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
925         } else {
926             auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
927             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
928             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
929         }
930     } else {
931         if (canBeCompressed) {
932             auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
933             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
934             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
935         } else {
936             auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
937             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
938             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
939         }
940     }
941     TSAN_ANNOTATE_IGNORE_WRITES_END();
942     // String is supposed to be a constant object, so all its data should be visible by all threads
943     arch::FullMemoryBarrier();
944     return string;
945 }
946 
947 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm)948 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx,
949                               PandaVM *vm)
950 {
951     ASSERT(src != nullptr);
952     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
953     bool canBeCompressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16Length);
954 
955     // allocator may trig gc and move src, need to hold it
956     auto thread = ManagedThread::GetCurrent();
957     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
958     VMHandle<String> srcHandle(thread, src);
959     ASSERT(srcHandle.GetPtr() != nullptr);
960     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm);
961     if (string == nullptr) {
962         return nullptr;
963     }
964 
965     // retrieve src after gc
966     src = srcHandle.GetPtr();
967     ASSERT(string->hashcode_ == 0);
968 
969     // After copying we should have a full barrier, so this writes should happen-before barrier
970     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
971     if (src->IsUtf16()) {
972         if (canBeCompressed) {
973             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
974             CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16Length);
975         } else {
976             memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()),
977                      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
978                      src->GetDataUtf16() + start, utf16Length << 1UL);
979         }
980     } else {
981         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
982         memcpy_s(string->GetDataMUtf8(), string->GetLength(), src->GetDataMUtf8() + start, utf16Length);
983     }
984     TSAN_ANNOTATE_IGNORE_WRITES_END();
985     // String is supposed to be a constant object, so all its data should be visible by all threads
986     arch::FullMemoryBarrier();
987     return string;
988 }
989 
990 /* static */
Concat(String * string1,String * string2,const LanguageContext & ctx,PandaVM * vm)991 String *String::Concat(String *string1, String *string2, const LanguageContext &ctx, PandaVM *vm)
992 {
993     ASSERT(string1 != nullptr);
994     ASSERT(string2 != nullptr);
995     // allocator may trig gc and move src, need to hold it
996     auto thread = ManagedThread::GetCurrent();
997     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
998     VMHandle<String> str1Handle(thread, string1);
999     VMHandle<String> str2Handle(thread, string2);
1000     ASSERT(str1Handle.GetPtr() != nullptr);
1001     ASSERT(str2Handle.GetPtr() != nullptr);
1002     uint32_t length1 = string1->GetLength();
1003     uint32_t length2 = string2->GetLength();
1004     uint32_t newLength = length1 + length2;
1005     bool compressed = compressedStringsEnabled_ && (!string1->IsUtf16() && !string2->IsUtf16());
1006     auto newString = AllocStringObject(newLength, compressed, ctx, vm);
1007     if (UNLIKELY(newString == nullptr)) {
1008         return nullptr;
1009     }
1010 
1011     ASSERT(newString->hashcode_ == 0);
1012 
1013     // retrieve strings after gc
1014     string1 = str1Handle.GetPtr();
1015     string2 = str2Handle.GetPtr();
1016 
1017     // After copying we should have a full barrier, so this writes should happen-before barrier
1018     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1019     if (compressed) {
1020         Span<uint8_t> sp(newString->GetDataMUtf8(), newLength);
1021         memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataMUtf8(), length1);
1022         sp = sp.SubSpan(length1);
1023         memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataMUtf8(), length2);
1024     } else {
1025         Span<uint16_t> sp(newString->GetDataUtf16(), newLength);
1026         if (!string1->IsUtf16()) {
1027             for (uint32_t i = 0; i < length1; ++i) {
1028                 sp[i] = string1->At<false>(i);
1029             }
1030         } else {
1031             memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataUtf16(), length1 << 1U);
1032         }
1033         sp = sp.SubSpan(length1);
1034         if (!string2->IsUtf16()) {
1035             for (uint32_t i = 0; i < length2; ++i) {
1036                 sp[i] = string2->At<false>(i);
1037             }
1038         } else {
1039             memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataUtf16(), length2 << 1U);
1040         }
1041     }
1042     TSAN_ANNOTATE_IGNORE_WRITES_END();
1043     // String is supposed to be a constant object, so all its data should be visible by all threads
1044     arch::FullMemoryBarrier();
1045 
1046     return newString;
1047 }
1048 
1049 /* static */
AllocStringObject(size_t length,bool compressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)1050 String *String::AllocStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, bool movable,
1051                                   bool pinned)
1052 {
1053     ASSERT(vm != nullptr);
1054     auto *thread = ManagedThread::GetCurrent();
1055     auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
1056     size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
1057     auto string =
1058         movable
1059             ? reinterpret_cast<String *>(
1060                   vm->GetHeapManager()->AllocateObject(stringClass, size, DEFAULT_ALIGNMENT, thread,
1061                                                        mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT, pinned))
1062             : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(
1063                   // CC-OFFNXT(G.FMT.06) project code style
1064                   stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT));
1065     if (string != nullptr) {
1066         // After setting length we should have a full barrier, so this write should happens-before barrier
1067         TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1068         string->SetLength(length, compressed);
1069         string->SetHashcode(0);
1070         TSAN_ANNOTATE_IGNORE_WRITES_END();
1071         // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
1072         // legth before it's set
1073         arch::FullMemoryBarrier();
1074     }
1075     return string;
1076 }
1077 
1078 }  // namespace ark::coretypes
1079