• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19 
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/hash.h"
22 #include "libpandabase/utils/span.h"
23 #include "runtime/arch/memory_helpers.h"
24 #include "runtime/include/coretypes/array.h"
25 #include "runtime/include/coretypes/string-inl.h"
26 #include "runtime/include/runtime.h"
27 #include "runtime/handle_base-inl.h"
28 #include "runtime/include/panda_vm.h"
29 
30 namespace panda::coretypes {
31 
32 bool String::compressedStringsEnabled_ = true;
33 
34 /* static */
CreateFromString(String * str,const LanguageContext & ctx,PandaVM * vm)35 String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaVM *vm)
36 {
37     ASSERT(str != nullptr);
38     // allocator may trig gc and move str, need to hold it
39     auto thread = ManagedThread::GetCurrent();
40     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
41     VMHandle<String> strHandle(thread, str);
42     auto string = AllocStringObject(strHandle->GetLength(), !strHandle->IsUtf16(), ctx, vm);
43     if (string == nullptr) {
44         return nullptr;
45     }
46 
47     // retrive str after gc
48     str = strHandle.GetPtr();
49     string->hashcode_ = str->hashcode_;
50 
51     uint32_t length = str->GetLength();
52     // After memcpy we should have a full barrier, so this writes should happen-before barrier
53     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
54     if (str->IsUtf16()) {
55         std::copy_n(reinterpret_cast<uint8_t *>(str->GetDataUtf16()), ComputeDataSizeUtf16(length),
56                     reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
57     } else {
58         std::copy_n(str->GetDataMUtf8(), length, string->GetDataMUtf8());
59     }
60     TSAN_ANNOTATE_IGNORE_WRITES_END();
61     // String is supposed to be a constant object, so all its data should be visible by all threads
62     arch::FullMemoryBarrier();
63 
64     return string;
65 }
66 
67 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,size_t mutf8Length,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable)68 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, size_t mutf8Length, uint32_t utf16Length,
69                                 bool canBeCompressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
70 {
71     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable);
72     if (string == nullptr) {
73         return nullptr;
74     }
75 
76     ASSERT(string->hashcode_ == 0);
77     // After copying we should have a full barrier, so this writes should happen-before barrier
78     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
79     if (canBeCompressed) {
80         std::copy_n(mutf8Data, utf16Length, string->GetDataMUtf8());
81     } else {
82         utf::ConvertMUtf8ToUtf16(mutf8Data, mutf8Length, string->GetDataUtf16());
83     }
84     TSAN_ANNOTATE_IGNORE_WRITES_END();
85     // String is supposed to be a constant object, so all its data should be visible by all threads
86     arch::FullMemoryBarrier();
87     return string;
88 }
89 
90 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable)91 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, const LanguageContext &ctx, PandaVM *vm,
92                                 bool movable)
93 {
94     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
95     return CreateFromMUtf8(mutf8Data, utf::Mutf8Size(mutf8Data), utf16Length, canBeCompressed, ctx, vm, movable);
96 }
97 
98 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable)99 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed,
100                                 const LanguageContext &ctx, PandaVM *vm, bool movable)
101 {
102     return CreateFromMUtf8(mutf8Data, utf::Mutf8Size(mutf8Data), utf16Length, canBeCompressed, ctx, vm, movable);
103 }
104 
105 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,const LanguageContext & ctx,PandaVM * vm,bool movable)106 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, const LanguageContext &ctx, PandaVM *vm, bool movable)
107 {
108     size_t mutf8Length = utf::Mutf8Size(mutf8Data);
109     size_t utf16Length = utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length);
110     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
111     return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable);
112 }
113 
114 /* static */
CreateFromUtf8(const uint8_t * utf8Data,uint32_t utf8Length,const LanguageContext & ctx,PandaVM * vm,bool movable)115 String *String::CreateFromUtf8(const uint8_t *utf8Data, uint32_t utf8Length, const LanguageContext &ctx, PandaVM *vm,
116                                bool movable)
117 {
118     coretypes::String *s = nullptr;
119     auto utf16Length = utf::Utf8ToUtf16Size(utf8Data, utf8Length);
120     if (CanBeCompressedMUtf8(utf8Data, utf8Length)) {
121         // ascii string have equal representation in utf8 and mutf8 formats
122         s = coretypes::String::CreateFromMUtf8(utf8Data, utf8Length, utf16Length, true, ctx, vm, movable);
123     } else {
124         PandaVector<uint16_t> tmpBuffer(utf16Length);
125         [[maybe_unused]] auto len =
126             utf::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Length, utf16Length, 0);
127         ASSERT(len == utf16Length);
128         s = coretypes::String::CreateFromUtf16(tmpBuffer.data(), utf16Length, ctx, vm, movable);
129     }
130     return s;
131 }
132 
133 /* static */
CreateFromUtf16(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable)134 String *String::CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Length, const LanguageContext &ctx,
135                                 PandaVM *vm, bool movable)
136 {
137     bool canBeCompressed = CanBeCompressed(utf16Data, utf16Length);
138     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable);
139     if (string == nullptr) {
140         return nullptr;
141     }
142 
143     ASSERT(string->hashcode_ == 0);
144     // After copying we should have a full barrier, so this writes should happen-before barrier
145     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
146     if (canBeCompressed) {
147         CopyUtf16AsMUtf8(utf16Data, string->GetDataMUtf8(), utf16Length);
148     } else {
149         std::copy_n(reinterpret_cast<const uint8_t *>(utf16Data), utf16Length << 1UL,
150                     reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
151     }
152     TSAN_ANNOTATE_IGNORE_WRITES_END();
153     // String is supposed to be a constant object, so all its data should be visible by all threads
154     arch::FullMemoryBarrier();
155     return string;
156 }
157 
158 /* static */
CreateEmptyString(const LanguageContext & ctx,PandaVM * vm)159 String *String::CreateEmptyString(const LanguageContext &ctx, PandaVM *vm)
160 {
161     uint16_t data = 0;
162     return CreateFromUtf16(&data, 0, ctx, vm);
163 }
164 
165 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16From,uint8_t * mutf8To,uint32_t utf16Length)166 void String::CopyUtf16AsMUtf8(const uint16_t *utf16From, uint8_t *mutf8To, uint32_t utf16Length)
167 {
168     Span<const uint16_t> from(utf16From, utf16Length);
169     Span<uint8_t> to(mutf8To, utf16Length);
170     for (uint32_t i = 0; i < utf16Length; i++) {
171         to[i] = from[i];
172     }
173 }
174 
175 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,const LanguageContext & ctx,PandaVM * vm)176 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, const LanguageContext &ctx,
177                                          PandaVM *vm)
178 {
179     ASSERT(chararray != nullptr);
180     // allocator may trig gc and move array, need to hold it
181     auto thread = ManagedThread::GetCurrent();
182     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
183     VMHandle<Array> arrayHandle(thread, chararray);
184 
185     // There is a potential data race between read of src in CanBeCompressed and write of destination buf
186     // in CopyDataRegionUtf16. The src is a cast from chararray comming from managed object.
187     // Hence the race is reported on managed object, which has a synchronization on a high level.
188     // TSAN does not see such synchronization, thus we ignore such races here.
189     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
190     // NOLINTNEXTLINE(readability-identifier-naming)
191     const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
192     bool canBeCompressed = CanBeCompressed(src, length);
193     TSAN_ANNOTATE_IGNORE_WRITES_END();
194     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
195     if (string == nullptr) {
196         return nullptr;
197     }
198 
199     // retrieve src since gc may move it
200     src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + (offset << 1UL));
201     ASSERT(string->hashcode_ == 0);
202     // After copying we should have a full barrier, so this writes should happen-before barrier
203     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
204     if (canBeCompressed) {
205         CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
206     } else {
207         std::copy_n(reinterpret_cast<const uint8_t *>(src), length << 1UL,
208                     reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
209     }
210     TSAN_ANNOTATE_IGNORE_WRITES_END();
211     // String is supposed to be a constant object, so all its data should be visible by all threads
212     arch::FullMemoryBarrier();
213     return string;
214 }
215 
216 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t highByte,Array * bytearray,const LanguageContext & ctx,PandaVM * vm)217 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t highByte, Array *bytearray,
218                                          const LanguageContext &ctx, PandaVM *vm)
219 {
220     ASSERT(length != 0);
221     ASSERT(bytearray != nullptr);
222     // allocator may trig gc and move array, need to hold it
223     auto thread = ManagedThread::GetCurrent();
224     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
225     VMHandle<Array> arrayHandle(thread, bytearray);
226 
227     constexpr size_t BYTE_MASK = 0xFF;
228 
229     // NOLINTNEXTLINE(readability-identifier-naming)
230     const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
231     highByte &= BYTE_MASK;
232     bool canBeCompressed = CanBeCompressedMUtf8(src, length) && (highByte == 0);
233     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
234     if (string == nullptr) {
235         return nullptr;
236     }
237 
238     // retrieve src since gc may move it
239     src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + offset);
240     ASSERT(string->hashcode_ == 0);
241     // After copying we should have a full barrier, so this writes should happen-before barrier
242     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
243     if (canBeCompressed) {
244         Span<const uint8_t> from(src, length);
245         Span<uint8_t> to(string->GetDataMUtf8(), length);
246         for (uint32_t i = 0; i < length; ++i) {
247             to[i] = (from[i] & BYTE_MASK);
248         }
249     } else {
250         Span<const uint8_t> from(src, length);
251         Span<uint16_t> to(string->GetDataUtf16(), length);
252         for (uint32_t i = 0; i < length; ++i) {
253             to[i] = (highByte << 8U) + (from[i] & BYTE_MASK);
254         }
255     }
256     TSAN_ANNOTATE_IGNORE_WRITES_END();
257 
258     // String is supposed to be a constant object, so all its data should be visible by all threads
259     arch::FullMemoryBarrier();
260     return string;
261 }
262 
263 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)264 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
265 {
266     for (int32_t i = 0; i < count; ++i) {
267         int32_t charDiff = static_cast<int32_t>(lhsSp[i]) - static_cast<int32_t>(rhsSp[i]);
268         if (charDiff != 0) {
269             return charDiff;
270         }
271     }
272     return 0;
273 }
274 
275 template <typename T>
CompareBytesBlock(T * lstrPt,T * rstrPt,int32_t minCount)276 int32_t CompareBytesBlock(T *lstrPt, T *rstrPt, int32_t minCount)
277 {
278     constexpr int32_t BYTES_CNT = sizeof(size_t);
279     static_assert(BYTES_CNT >= sizeof(T));
280     static_assert(BYTES_CNT % sizeof(T) == 0);
281     int32_t totalBytes = minCount * sizeof(T);
282     auto lhsBlock = reinterpret_cast<size_t *>(lstrPt);
283     auto rhsBlock = reinterpret_cast<size_t *>(rstrPt);
284     int32_t curBytePos = 0;
285     while (curBytePos + BYTES_CNT <= totalBytes) {
286         if (*lhsBlock == *rhsBlock) {
287             curBytePos += BYTES_CNT;
288             lhsBlock++;
289             rhsBlock++;
290         } else {
291             break;
292         }
293     }
294     int32_t curElementPos = curBytePos / sizeof(T);
295     for (int32_t i = curElementPos; i < minCount; ++i) {
296         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
297         int32_t charDiff = static_cast<int32_t>(lstrPt[i]) - static_cast<int32_t>(rstrPt[i]);
298         if (charDiff != 0) {
299             return charDiff;
300         }
301     }
302 
303     return 0;
304 }
305 
Compare(String * rstr)306 int32_t String::Compare(String *rstr)
307 {
308     String *lstr = this;
309     if (lstr == rstr) {
310         return 0;
311     }
312     ASSERT(lstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
313     ASSERT(rstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
314     auto lstrLeng = static_cast<int32_t>(lstr->GetLength());
315     auto rstrLeng = static_cast<int32_t>(rstr->GetLength());
316     int32_t lengRet = lstrLeng - rstrLeng;
317     int32_t minCount = (lengRet < 0) ? lstrLeng : rstrLeng;
318     bool lstrIsUtf16 = lstr->IsUtf16();
319     bool rstrIsUtf16 = rstr->IsUtf16();
320     if (!lstrIsUtf16 && !rstrIsUtf16) {
321         int32_t charDiff = CompareBytesBlock(lstr->GetDataMUtf8(), rstr->GetDataMUtf8(), minCount);
322         if (charDiff != 0) {
323             return charDiff;
324         }
325     } else if (!lstrIsUtf16) {
326         Span<uint8_t> lhsSp(lstr->GetDataMUtf8(), lstrLeng);
327         Span<uint16_t> rhsSp(rstr->GetDataUtf16(), rstrLeng);
328         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
329         if (charDiff != 0) {
330             return charDiff;
331         }
332     } else if (!rstrIsUtf16) {
333         Span<uint16_t> lhsSp(lstr->GetDataUtf16(), lstrLeng);
334         Span<uint8_t> rhsSp(rstr->GetDataMUtf8(), rstrLeng);
335         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
336         if (charDiff != 0) {
337             return charDiff;
338         }
339     } else {
340         int32_t charDiff = CompareBytesBlock(lstr->GetDataUtf16(), rstr->GetDataUtf16(), minCount);
341         if (charDiff != 0) {
342             return charDiff;
343         }
344     }
345     return lengRet;
346 }
347 
348 template <typename T1, typename T2>
SubstringEquals(Span<const T1> & string,Span<const T2> & pattern,int32_t pos)349 static inline ALWAYS_INLINE int32_t SubstringEquals(Span<const T1> &string, Span<const T2> &pattern, int32_t pos)
350 {
351     ASSERT(pos + pattern.size() <= string.size());
352     if constexpr (std::is_same_v<T1, T2>) {
353         return std::memcmp(string.begin() + pos, pattern.begin(), pattern.size()) == 0;
354     }
355     return std::equal(pattern.begin(), pattern.end(), string.begin() + pos);
356 }
357 
358 /*
359  * Tailed Substring method (based on D. Cantone and S. Faro: Searching for a substring with constant extra-space
360  * complexity). O(nm) worst-case but reported to have good performance both on random and natural language data
361  * Substring s of t is called tailed-substring, if the last character of s does not repeat elsewhere in s
362  */
363 /* static */
364 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)365 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
366 {
367     int32_t maxTailedLen = 1;
368     int32_t tailedEnd = rhsSp.size() - 1;
369     int32_t maxTailedEnd = tailedEnd;
370     // Phase 1: search in the beginning of string while computing maximal tailed-substring length
371     auto searchChar = rhsSp[tailedEnd];
372     auto *shiftedLhs = lhsSp.begin() + tailedEnd;
373     while (pos <= max) {
374         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
375         if (searchChar != shiftedLhs[pos]) {
376             pos++;
377             continue;
378         }
379         if (SubstringEquals(lhsSp, rhsSp, pos)) {
380             return pos;
381         }
382         auto tailedStart = tailedEnd - 1;
383         while (tailedStart >= 0 && rhsSp[tailedStart] != searchChar) {
384             tailedStart--;
385         }
386         if (maxTailedLen < tailedEnd - tailedStart) {
387             maxTailedLen = tailedEnd - tailedStart;
388             maxTailedEnd = tailedEnd;
389         }
390         if (maxTailedLen >= tailedEnd) {
391             break;
392         }
393         pos += tailedEnd - tailedStart;
394         tailedEnd--;
395         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
396         shiftedLhs--;
397         searchChar = rhsSp[tailedEnd];
398     }
399     // Phase 2: search in the remainder of string using computed maximal tailed-substring length
400     searchChar = rhsSp[maxTailedEnd];
401     shiftedLhs = lhsSp.begin() + maxTailedEnd;
402     while (pos <= max) {
403         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
404         if (searchChar != shiftedLhs[pos]) {
405             pos++;
406             continue;
407         }
408         if (SubstringEquals(lhsSp, rhsSp, pos)) {
409             return pos;
410         }
411         pos += maxTailedLen;
412     }
413     return -1;
414 }
415 
416 // Search of the last occurence is equivalent to search of the first occurence of
417 // reversed pattern in reversed string
418 template <typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)419 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
420 {
421     int32_t maxTailedLen = 1;
422     int32_t tailedStart = 0;
423     int32_t maxTailedStart = tailedStart;
424     auto patternSize = static_cast<int32_t>(rhsSp.size());
425     // Phase 1: search in the end of string while computing maximal tailed-substring length
426     auto searchChar = rhsSp[tailedStart];
427     auto *shiftedLhs = lhsSp.begin() + tailedStart;
428     while (pos >= 0) {
429         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
430         if (searchChar != shiftedLhs[pos]) {
431             pos--;
432             continue;
433         }
434         if (SubstringEquals(lhsSp, rhsSp, pos)) {
435             return pos;
436         }
437         auto tailedEnd = tailedStart + 1;
438         while (tailedEnd < patternSize && rhsSp[tailedEnd] != searchChar) {
439             tailedEnd++;
440         }
441         if (maxTailedLen < tailedEnd - tailedStart) {
442             maxTailedLen = tailedEnd - tailedStart;
443             maxTailedStart = tailedStart;
444         }
445         if (maxTailedLen >= patternSize - tailedStart) {
446             break;
447         }
448         pos -= tailedEnd - tailedStart;
449         tailedStart++;
450         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
451         shiftedLhs++;
452         searchChar = rhsSp[tailedStart];
453     }
454     // Phase 2: search in the remainder of string using computed maximal tailed-substring length
455     searchChar = rhsSp[maxTailedStart];
456     shiftedLhs = lhsSp.begin() + maxTailedStart;
457     while (pos >= 0) {
458         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
459         if (searchChar != shiftedLhs[pos]) {
460             pos--;
461             continue;
462         }
463         if (SubstringEquals(lhsSp, rhsSp, pos)) {
464             return pos;
465         }
466         pos -= maxTailedLen;
467     }
468     return -1;
469 }
470 
GetCompressionAndLength(panda::coretypes::String * string)471 static inline ALWAYS_INLINE std::pair<bool, int32_t> GetCompressionAndLength(panda::coretypes::String *string)
472 {
473     ASSERT(string->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
474     ASSERT(string != nullptr);
475     return {string->IsMUtf8(), static_cast<int32_t>(string->GetLength())};
476 }
477 
IndexOf(String * rhs,int32_t pos)478 int32_t String::IndexOf(String *rhs, int32_t pos)
479 {
480     String *lhs = this;
481     auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
482     auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
483 
484     if (pos < 0) {
485         pos = 0;
486     }
487 
488     if (rhs_count == 0) {
489         return std::min(lhs_count, pos);
490     }
491 
492     int32_t max = lhs_count - rhs_count;
493     // for pos > max IndexOf impl will return -1
494     if (lhs_utf8 && rhs_utf8) {
495         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
496         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
497         return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
498     } else if (!lhs_utf8 && !rhs_utf8) {  // NOLINT(readability-else-after-return)
499         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
500         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
501         return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
502     } else if (rhs_utf8) {
503         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
504         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
505         return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
506     } else {  // NOLINT(readability-else-after-return)
507         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
508         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
509         return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
510     }
511 }
512 
LastIndexOf(String * rhs,int32_t pos)513 int32_t String::LastIndexOf(String *rhs, int32_t pos)
514 {
515     String *lhs = this;
516     auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
517     auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
518 
519     int32_t max = lhs_count - rhs_count;
520 
521     if (pos > max) {
522         pos = max;
523     }
524 
525     if (pos < 0) {
526         return -1;
527     }
528 
529     if (rhs_count == 0) {
530         return pos;
531     }
532 
533     if (lhs_utf8 && rhs_utf8) {
534         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
535         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
536         return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
537     } else if (!lhs_utf8 && !rhs_utf8) {  // NOLINT(readability-else-after-return)
538         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
539         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
540         return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
541     } else if (rhs_utf8) {
542         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
543         Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
544         return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
545     } else {  // NOLINT(readability-else-after-return)
546         Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
547         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
548         return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
549     }
550 }
551 
552 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Length)553 bool String::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Length)
554 {
555     if (!compressedStringsEnabled_) {
556         return false;
557     }
558     bool isCompressed = true;
559     Span<const uint16_t> data(utf16Data, utf16Length);
560     for (uint32_t i = 0; i < utf16Length; i++) {
561         if (!IsASCIICharacter(data[i])) {
562             isCompressed = false;
563             break;
564         }
565     }
566     return isCompressed;
567 }
568 
569 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length)570 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length)
571 {
572     if (!compressedStringsEnabled_) {
573         return false;
574     }
575     bool isCompressed = true;
576     Span<const uint8_t> data(mutf8Data, mutf8Length);
577     for (uint32_t i = 0; i < mutf8Length; i++) {
578         if (!IsASCIICharacter(data[i])) {
579             isCompressed = false;
580             break;
581         }
582     }
583     return isCompressed;
584 }
585 
586 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data)587 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data)
588 {
589     return compressedStringsEnabled_ ? utf::IsMUtf8OnlySingleBytes(mutf8Data) : false;
590 }
591 
592 /* static */
CanBeCompressedUtf16(const uint16_t * utf16Data,uint32_t utf16Length,uint16_t non)593 bool String::CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non)
594 {
595     if (!compressedStringsEnabled_) {
596         return false;
597     }
598     bool isCompressed = true;
599     Span<const uint16_t> data(utf16Data, utf16Length);
600     for (uint32_t i = 0; i < utf16Length; i++) {
601         if (!IsASCIICharacter(data[i]) && data[i] != non) {
602             isCompressed = false;
603             break;
604         }
605     }
606     return isCompressed;
607 }
608 
609 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint16_t non)610 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non)
611 {
612     if (!compressedStringsEnabled_) {
613         return false;
614     }
615     bool isCompressed = true;
616     Span<const uint8_t> data(mutf8Data, mutf8Length);
617     for (uint32_t i = 0; i < mutf8Length; i++) {
618         if (!IsASCIICharacter(data[i]) && data[i] != non) {
619             isCompressed = false;
620             break;
621         }
622     }
623     return isCompressed;
624 }
625 
626 /* static */
StringsAreEqual(String * str1,String * str2)627 bool String::StringsAreEqual(String *str1, String *str2)
628 {
629     ASSERT(str1 != nullptr);
630     ASSERT(str2 != nullptr);
631 
632     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
633         return false;
634     }
635 
636     if (str1->IsUtf16()) {
637         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
638         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
639         return String::StringsAreEquals(data1, data2);
640     } else {  // NOLINT(readability-else-after-return)
641         Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
642         Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
643         return String::StringsAreEquals(data1, data2);
644     }
645 }
646 
647 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length)648 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length)
649 {
650     if (str1->GetLength() != utf16Length) {
651         return false;
652     }
653     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
654     return StringsAreEqualMUtf8(str1, mutf8Data, utf16Length, canBeCompressed);
655 }
656 
657 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)658 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
659 {
660     bool result = true;
661     if (str1->GetLength() != utf16Length) {
662         result = false;
663     } else {
664         bool str1CanBeCompressed = !str1->IsUtf16();
665         bool data2CanBeCompressed = canBeCompressed;
666         if (str1CanBeCompressed != data2CanBeCompressed) {
667             return false;
668         }
669 
670         ASSERT(str1CanBeCompressed == data2CanBeCompressed);
671         if (str1CanBeCompressed) {
672             Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
673             Span<const uint8_t> data2(mutf8Data, utf16Length);
674             result = String::StringsAreEquals(data1, data2);
675         } else {
676             result = IsMutf8EqualsUtf16(mutf8Data, str1->GetDataUtf16(), str1->GetLength());
677         }
678     }
679     return result;
680 }
681 
682 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16Data,uint32_t utf16DataLength)683 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16Data, uint32_t utf16DataLength)
684 {
685     bool result = true;
686     if (str1->GetLength() != utf16DataLength) {
687         result = false;
688     } else if (!str1->IsUtf16()) {
689         result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16Data, utf16DataLength);
690     } else {
691         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
692         Span<const uint16_t> data2(utf16Data, utf16DataLength);
693         result = String::StringsAreEquals(data1, data2);
694     }
695     return result;
696 }
697 
698 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,uint32_t utf8DataLength,const uint16_t * utf16Data,uint32_t utf16DataLength)699 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8DataLength, const uint16_t *utf16Data,
700                                 uint32_t utf16DataLength)
701 {
702     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
703     auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
704     [[maybe_unused]] auto convertedStringSize =
705         utf::ConvertRegionMUtf8ToUtf16(utf8Data, tmpBuffer, utf8DataLength, utf16DataLength, 0);
706     ASSERT(convertedStringSize == utf16DataLength);
707 
708     Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
709     Span<const uint16_t> data2(utf16Data, utf16DataLength);
710     bool result = String::StringsAreEquals(data1, data2);
711     allocator->Delete(tmpBuffer);
712     return result;
713 }
714 
715 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,const uint16_t * utf16Data,uint32_t utf16DataLength)716 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength)
717 {
718     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
719     auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
720     utf::ConvertMUtf8ToUtf16(utf8Data, utf::Mutf8Size(utf8Data), tmpBuffer);
721 
722     Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
723     Span<const uint16_t> data2(utf16Data, utf16DataLength);
724     bool result = String::StringsAreEquals(data1, data2);
725     allocator->Delete(tmpBuffer);
726     return result;
727 }
728 
729 /* static */
730 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)731 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
732 {
733     return 0 == std::memcmp(str1.Data(), str2.Data(), str1.SizeBytes());
734 }
735 
ToCharArray(const LanguageContext & ctx)736 Array *String::ToCharArray(const LanguageContext &ctx)
737 {
738     // allocator may trig gc and move 'this', need to hold it
739     auto thread = ManagedThread::GetCurrent();
740     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
741     VMHandle<String> str(thread, this);
742     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
743     Array *array = Array::Create(klass, GetLength());
744     if (array == nullptr) {
745         return nullptr;
746     }
747 
748     if (str->IsUtf16()) {
749         Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
750         for (size_t i = 0; i < sp.size(); i++) {
751             array->Set<uint16_t>(i, sp[i]);
752         }
753     } else {
754         Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
755         for (size_t i = 0; i < sp.size(); i++) {
756             array->Set<uint16_t>(i, sp[i]);
757         }
758     }
759 
760     return array;
761 }
762 
763 /* static */
GetChars(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx)764 Array *String::GetChars(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx)
765 {
766     // allocator may trig gc and move 'src', need to hold it
767     auto thread = ManagedThread::GetCurrent();
768     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
769     VMHandle<String> str(thread, src);
770     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
771     Array *array = Array::Create(klass, utf16Length);
772     if (array == nullptr) {
773         return nullptr;
774     }
775 
776     if (str->IsUtf16()) {
777         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
778         Span<uint16_t> sp(str->GetDataUtf16() + start, utf16Length);
779         for (size_t i = 0; i < sp.size(); i++) {
780             array->Set<uint16_t>(i, sp[i]);
781         }
782     } else {
783         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
784         Span<uint8_t> sp(str->GetDataMUtf8() + start, utf16Length);
785         for (size_t i = 0; i < sp.size(); i++) {
786             array->Set<uint16_t>(i, sp[i]);
787         }
788     }
789 
790     return array;
791 }
792 
793 template <class T>
ComputeHashForData(const T * data,size_t size)794 static int32_t ComputeHashForData(const T *data, size_t size)
795 {
796     uint32_t hash = 0;
797 #if defined(__GNUC__)
798 #pragma GCC diagnostic push
799 #pragma GCC diagnostic ignored "-Wignored-attributes"
800     Span<const T> sp(data, size);
801 #pragma GCC diagnostic pop
802 #endif
803     for (auto c : sp) {
804         constexpr size_t SHIFT = 5;
805         hash = (hash << SHIFT) - hash + c;
806     }
807     return static_cast<int32_t>(hash);
808 }
809 
ComputeHashForMutf8(const uint8_t * mutf8Data)810 static int32_t ComputeHashForMutf8(const uint8_t *mutf8Data)
811 {
812     uint32_t hash = 0;
813     while (*mutf8Data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
814         constexpr size_t SHIFT = 5;
815         hash = (hash << SHIFT) - hash + *mutf8Data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
816     }
817     return static_cast<int32_t>(hash);
818 }
819 
ComputeHashcode()820 uint32_t String::ComputeHashcode()
821 {
822     uint32_t hash;
823     if (compressedStringsEnabled_) {
824         if (!IsUtf16()) {
825             hash = static_cast<uint32_t>(ComputeHashForData(GetDataMUtf8(), GetLength()));
826         } else {
827             hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
828         }
829     } else {
830         ASSERT(static_cast<size_t>(GetLength()) < (std::numeric_limits<size_t>::max() >> 1U));
831         hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
832     }
833     return hash;
834 }
835 
836 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length)837 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length)
838 {
839     bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
840     return ComputeHashcodeMutf8(mutf8Data, utf16Length, canBeCompressed);
841 }
842 
843 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)844 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
845 {
846     uint32_t hash;
847     if (canBeCompressed) {
848         hash = static_cast<uint32_t>(ComputeHashForMutf8(mutf8Data));
849     } else {
850         // NOTE(alovkov): optimize it without allocation a temporary buffer
851         auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
852         auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16Length);
853         utf::ConvertMUtf8ToUtf16(mutf8Data, utf::Mutf8Size(mutf8Data), tmpBuffer);
854         hash = static_cast<uint32_t>(ComputeHashForData(tmpBuffer, utf16Length));
855         allocator->Delete(tmpBuffer);
856     }
857     return hash;
858 }
859 
860 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)861 uint32_t String::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
862 {
863     return ComputeHashForData(utf16Data, length);
864 }
865 
866 /* static */
DoReplace(String * src,uint16_t oldC,uint16_t newC,const LanguageContext & ctx,PandaVM * vm)867 String *String::DoReplace(String *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, PandaVM *vm)
868 {
869     ASSERT(src != nullptr);
870     auto length = static_cast<int32_t>(src->GetLength());
871     bool canBeCompressed = IsASCIICharacter(newC);
872     if (src->IsUtf16()) {
873         canBeCompressed = canBeCompressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, oldC);
874     } else {
875         canBeCompressed = canBeCompressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, oldC);
876     }
877 
878     // allocator may trig gc and move src, need to hold it
879     auto thread = ManagedThread::GetCurrent();
880     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
881     VMHandle<String> srcHandle(thread, src);
882     auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
883     if (string == nullptr) {
884         return nullptr;
885     }
886 
887     // retrieve src after gc
888     src = srcHandle.GetPtr();
889     ASSERT(string->hashcode_ == 0);
890 
891     // After replacing we should have a full barrier, so this writes should happen-before barrier
892     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
893     if (src->IsUtf16()) {
894         if (canBeCompressed) {
895             auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
896             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
897             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
898         } else {
899             auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
900             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
901             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
902         }
903     } else {
904         if (canBeCompressed) {
905             auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
906             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
907             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
908         } else {
909             auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
910             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
911             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
912         }
913     }
914     TSAN_ANNOTATE_IGNORE_WRITES_END();
915     // String is supposed to be a constant object, so all its data should be visible by all threads
916     arch::FullMemoryBarrier();
917     return string;
918 }
919 
920 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm)921 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx,
922                               PandaVM *vm)
923 {
924     ASSERT(src != nullptr);
925     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
926     bool canBeCompressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16Length);
927 
928     // allocator may trig gc and move src, need to hold it
929     auto thread = ManagedThread::GetCurrent();
930     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
931     VMHandle<String> srcHandle(thread, src);
932     auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm);
933     if (string == nullptr) {
934         return nullptr;
935     }
936 
937     // retrieve src after gc
938     src = srcHandle.GetPtr();
939     ASSERT(string->hashcode_ == 0);
940 
941     // After copying we should have a full barrier, so this writes should happen-before barrier
942     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
943     if (src->IsUtf16()) {
944         if (canBeCompressed) {
945             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
946             CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16Length);
947         } else {
948             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
949             std::copy_n(reinterpret_cast<const uint8_t *>(src->GetDataUtf16() + start), utf16Length << 1UL,
950                         reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
951         }
952     } else {
953         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
954         std::copy_n(src->GetDataMUtf8() + start, utf16Length, string->GetDataMUtf8());
955     }
956     TSAN_ANNOTATE_IGNORE_WRITES_END();
957     // String is supposed to be a constant object, so all its data should be visible by all threads
958     arch::FullMemoryBarrier();
959     return string;
960 }
961 
962 /* static */
Concat(String * string1,String * string2,const LanguageContext & ctx,PandaVM * vm)963 String *String::Concat(String *string1, String *string2, const LanguageContext &ctx, PandaVM *vm)
964 {
965     ASSERT(string1 != nullptr);
966     ASSERT(string2 != nullptr);
967     // allocator may trig gc and move src, need to hold it
968     auto thread = ManagedThread::GetCurrent();
969     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
970     VMHandle<String> str1Handle(thread, string1);
971     VMHandle<String> str2Handle(thread, string2);
972 
973     uint32_t length1 = string1->GetLength();
974     uint32_t length2 = string2->GetLength();
975     uint32_t newLength = length1 + length2;
976     bool compressed = compressedStringsEnabled_ && (!string1->IsUtf16() && !string2->IsUtf16());
977     auto newString = AllocStringObject(newLength, compressed, ctx, vm);
978     if (UNLIKELY(newString == nullptr)) {
979         return nullptr;
980     }
981 
982     ASSERT(newString->hashcode_ == 0);
983 
984     // retrieve strings after gc
985     string1 = str1Handle.GetPtr();
986     string2 = str2Handle.GetPtr();
987 
988     // After copying we should have a full barrier, so this writes should happen-before barrier
989     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
990     if (compressed) {
991         Span<uint8_t> sp(newString->GetDataMUtf8(), newLength);
992         std::copy_n(string1->GetDataMUtf8(), length1, sp.Data());
993         sp = sp.SubSpan(length1);
994         std::copy_n(string2->GetDataMUtf8(), length2, sp.Data());
995     } else {
996         Span<uint16_t> sp(newString->GetDataUtf16(), newLength);
997         if (!string1->IsUtf16()) {
998             for (uint32_t i = 0; i < length1; ++i) {
999                 sp[i] = string1->At<false>(i);
1000             }
1001         } else {
1002             std::copy_n(reinterpret_cast<uint8_t *>(string1->GetDataUtf16()), length1 << 1U,
1003                         reinterpret_cast<uint8_t *>(sp.Data()));
1004         }
1005         sp = sp.SubSpan(length1);
1006         if (!string2->IsUtf16()) {
1007             for (uint32_t i = 0; i < length2; ++i) {
1008                 sp[i] = string2->At<false>(i);
1009             }
1010         } else {
1011             std::copy_n(reinterpret_cast<uint8_t *>(string2->GetDataUtf16()), length2 << 1U,
1012                         reinterpret_cast<uint8_t *>(sp.Data()));
1013         }
1014     }
1015     TSAN_ANNOTATE_IGNORE_WRITES_END();
1016     // String is supposed to be a constant object, so all its data should be visible by all threads
1017     arch::FullMemoryBarrier();
1018 
1019     return newString;
1020 }
1021 
1022 /* static */
AllocStringObject(size_t length,bool compressed,const LanguageContext & ctx,PandaVM * vm,bool movable)1023 String *String::AllocStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
1024 {
1025     ASSERT(vm != nullptr);
1026     auto *thread = ManagedThread::GetCurrent();
1027     auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
1028     size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
1029     auto string =
1030         movable
1031             ? reinterpret_cast<String *>(vm->GetHeapManager()->AllocateObject(
1032                   stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT))
1033             : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(
1034                   stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT));
1035     if (string != nullptr) {
1036         // After setting length we should have a full barrier, so this write should happens-before barrier
1037         TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1038         string->SetLength(length, compressed);
1039         string->SetHashcode(0);
1040         TSAN_ANNOTATE_IGNORE_WRITES_END();
1041         // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
1042         // legth before it's set
1043         arch::FullMemoryBarrier();
1044     }
1045     return string;
1046 }
1047 
1048 }  // namespace panda::coretypes
1049