1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/hash.h"
22 #include "libpandabase/utils/span.h"
23 #include "runtime/arch/memory_helpers.h"
24 #include "runtime/include/coretypes/array.h"
25 #include "runtime/include/coretypes/string-inl.h"
26 #include "runtime/include/runtime.h"
27 #include "runtime/handle_base-inl.h"
28 #include "runtime/include/panda_vm.h"
29
30 namespace panda::coretypes {
31
32 bool String::compressedStringsEnabled_ = true;
33
34 /* static */
CreateFromString(String * str,const LanguageContext & ctx,PandaVM * vm)35 String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaVM *vm)
36 {
37 ASSERT(str != nullptr);
38 // allocator may trig gc and move str, need to hold it
39 auto thread = ManagedThread::GetCurrent();
40 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
41 VMHandle<String> strHandle(thread, str);
42 auto string = AllocStringObject(strHandle->GetLength(), !strHandle->IsUtf16(), ctx, vm);
43 if (string == nullptr) {
44 return nullptr;
45 }
46
47 // retrive str after gc
48 str = strHandle.GetPtr();
49 string->hashcode_ = str->hashcode_;
50
51 uint32_t length = str->GetLength();
52 // After memcpy we should have a full barrier, so this writes should happen-before barrier
53 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
54 if (str->IsUtf16()) {
55 std::copy_n(reinterpret_cast<uint8_t *>(str->GetDataUtf16()), ComputeDataSizeUtf16(length),
56 reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
57 } else {
58 std::copy_n(str->GetDataMUtf8(), length, string->GetDataMUtf8());
59 }
60 TSAN_ANNOTATE_IGNORE_WRITES_END();
61 // String is supposed to be a constant object, so all its data should be visible by all threads
62 arch::FullMemoryBarrier();
63
64 return string;
65 }
66
67 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,size_t mutf8Length,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable)68 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, size_t mutf8Length, uint32_t utf16Length,
69 bool canBeCompressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
70 {
71 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable);
72 if (string == nullptr) {
73 return nullptr;
74 }
75
76 ASSERT(string->hashcode_ == 0);
77 // After copying we should have a full barrier, so this writes should happen-before barrier
78 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
79 if (canBeCompressed) {
80 std::copy_n(mutf8Data, utf16Length, string->GetDataMUtf8());
81 } else {
82 utf::ConvertMUtf8ToUtf16(mutf8Data, mutf8Length, string->GetDataUtf16());
83 }
84 TSAN_ANNOTATE_IGNORE_WRITES_END();
85 // String is supposed to be a constant object, so all its data should be visible by all threads
86 arch::FullMemoryBarrier();
87 return string;
88 }
89
90 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable)91 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, const LanguageContext &ctx, PandaVM *vm,
92 bool movable)
93 {
94 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
95 return CreateFromMUtf8(mutf8Data, utf::Mutf8Size(mutf8Data), utf16Length, canBeCompressed, ctx, vm, movable);
96 }
97
98 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable)99 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed,
100 const LanguageContext &ctx, PandaVM *vm, bool movable)
101 {
102 return CreateFromMUtf8(mutf8Data, utf::Mutf8Size(mutf8Data), utf16Length, canBeCompressed, ctx, vm, movable);
103 }
104
105 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,const LanguageContext & ctx,PandaVM * vm,bool movable)106 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, const LanguageContext &ctx, PandaVM *vm, bool movable)
107 {
108 size_t mutf8Length = utf::Mutf8Size(mutf8Data);
109 size_t utf16Length = utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length);
110 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
111 return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable);
112 }
113
114 /* static */
CreateFromUtf8(const uint8_t * utf8Data,uint32_t utf8Length,const LanguageContext & ctx,PandaVM * vm,bool movable)115 String *String::CreateFromUtf8(const uint8_t *utf8Data, uint32_t utf8Length, const LanguageContext &ctx, PandaVM *vm,
116 bool movable)
117 {
118 coretypes::String *s = nullptr;
119 auto utf16Length = utf::Utf8ToUtf16Size(utf8Data, utf8Length);
120 if (CanBeCompressedMUtf8(utf8Data, utf8Length)) {
121 // ascii string have equal representation in utf8 and mutf8 formats
122 s = coretypes::String::CreateFromMUtf8(utf8Data, utf8Length, utf16Length, true, ctx, vm, movable);
123 } else {
124 PandaVector<uint16_t> tmpBuffer(utf16Length);
125 [[maybe_unused]] auto len =
126 utf::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Length, utf16Length, 0);
127 ASSERT(len == utf16Length);
128 s = coretypes::String::CreateFromUtf16(tmpBuffer.data(), utf16Length, ctx, vm, movable);
129 }
130 return s;
131 }
132
133 /* static */
CreateFromUtf16(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable)134 String *String::CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Length, const LanguageContext &ctx,
135 PandaVM *vm, bool movable)
136 {
137 bool canBeCompressed = CanBeCompressed(utf16Data, utf16Length);
138 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable);
139 if (string == nullptr) {
140 return nullptr;
141 }
142
143 ASSERT(string->hashcode_ == 0);
144 // After copying we should have a full barrier, so this writes should happen-before barrier
145 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
146 if (canBeCompressed) {
147 CopyUtf16AsMUtf8(utf16Data, string->GetDataMUtf8(), utf16Length);
148 } else {
149 std::copy_n(reinterpret_cast<const uint8_t *>(utf16Data), utf16Length << 1UL,
150 reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
151 }
152 TSAN_ANNOTATE_IGNORE_WRITES_END();
153 // String is supposed to be a constant object, so all its data should be visible by all threads
154 arch::FullMemoryBarrier();
155 return string;
156 }
157
158 /* static */
CreateEmptyString(const LanguageContext & ctx,PandaVM * vm)159 String *String::CreateEmptyString(const LanguageContext &ctx, PandaVM *vm)
160 {
161 uint16_t data = 0;
162 return CreateFromUtf16(&data, 0, ctx, vm);
163 }
164
165 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16From,uint8_t * mutf8To,uint32_t utf16Length)166 void String::CopyUtf16AsMUtf8(const uint16_t *utf16From, uint8_t *mutf8To, uint32_t utf16Length)
167 {
168 Span<const uint16_t> from(utf16From, utf16Length);
169 Span<uint8_t> to(mutf8To, utf16Length);
170 for (uint32_t i = 0; i < utf16Length; i++) {
171 to[i] = from[i];
172 }
173 }
174
175 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,const LanguageContext & ctx,PandaVM * vm)176 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, const LanguageContext &ctx,
177 PandaVM *vm)
178 {
179 ASSERT(chararray != nullptr);
180 // allocator may trig gc and move array, need to hold it
181 auto thread = ManagedThread::GetCurrent();
182 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
183 VMHandle<Array> arrayHandle(thread, chararray);
184
185 // There is a potential data race between read of src in CanBeCompressed and write of destination buf
186 // in CopyDataRegionUtf16. The src is a cast from chararray comming from managed object.
187 // Hence the race is reported on managed object, which has a synchronization on a high level.
188 // TSAN does not see such synchronization, thus we ignore such races here.
189 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
190 // NOLINTNEXTLINE(readability-identifier-naming)
191 const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
192 bool canBeCompressed = CanBeCompressed(src, length);
193 TSAN_ANNOTATE_IGNORE_WRITES_END();
194 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
195 if (string == nullptr) {
196 return nullptr;
197 }
198
199 // retrieve src since gc may move it
200 src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + (offset << 1UL));
201 ASSERT(string->hashcode_ == 0);
202 // After copying we should have a full barrier, so this writes should happen-before barrier
203 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
204 if (canBeCompressed) {
205 CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
206 } else {
207 std::copy_n(reinterpret_cast<const uint8_t *>(src), length << 1UL,
208 reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
209 }
210 TSAN_ANNOTATE_IGNORE_WRITES_END();
211 // String is supposed to be a constant object, so all its data should be visible by all threads
212 arch::FullMemoryBarrier();
213 return string;
214 }
215
216 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t highByte,Array * bytearray,const LanguageContext & ctx,PandaVM * vm)217 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t highByte, Array *bytearray,
218 const LanguageContext &ctx, PandaVM *vm)
219 {
220 ASSERT(length != 0);
221 ASSERT(bytearray != nullptr);
222 // allocator may trig gc and move array, need to hold it
223 auto thread = ManagedThread::GetCurrent();
224 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
225 VMHandle<Array> arrayHandle(thread, bytearray);
226
227 constexpr size_t BYTE_MASK = 0xFF;
228
229 // NOLINTNEXTLINE(readability-identifier-naming)
230 const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
231 highByte &= BYTE_MASK;
232 bool canBeCompressed = CanBeCompressedMUtf8(src, length) && (highByte == 0);
233 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
234 if (string == nullptr) {
235 return nullptr;
236 }
237
238 // retrieve src since gc may move it
239 src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + offset);
240 ASSERT(string->hashcode_ == 0);
241 // After copying we should have a full barrier, so this writes should happen-before barrier
242 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
243 if (canBeCompressed) {
244 Span<const uint8_t> from(src, length);
245 Span<uint8_t> to(string->GetDataMUtf8(), length);
246 for (uint32_t i = 0; i < length; ++i) {
247 to[i] = (from[i] & BYTE_MASK);
248 }
249 } else {
250 Span<const uint8_t> from(src, length);
251 Span<uint16_t> to(string->GetDataUtf16(), length);
252 for (uint32_t i = 0; i < length; ++i) {
253 to[i] = (highByte << 8U) + (from[i] & BYTE_MASK);
254 }
255 }
256 TSAN_ANNOTATE_IGNORE_WRITES_END();
257
258 // String is supposed to be a constant object, so all its data should be visible by all threads
259 arch::FullMemoryBarrier();
260 return string;
261 }
262
263 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)264 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
265 {
266 for (int32_t i = 0; i < count; ++i) {
267 int32_t charDiff = static_cast<int32_t>(lhsSp[i]) - static_cast<int32_t>(rhsSp[i]);
268 if (charDiff != 0) {
269 return charDiff;
270 }
271 }
272 return 0;
273 }
274
275 template <typename T>
CompareBytesBlock(T * lstrPt,T * rstrPt,int32_t minCount)276 int32_t CompareBytesBlock(T *lstrPt, T *rstrPt, int32_t minCount)
277 {
278 constexpr int32_t BYTES_CNT = sizeof(size_t);
279 static_assert(BYTES_CNT >= sizeof(T));
280 static_assert(BYTES_CNT % sizeof(T) == 0);
281 int32_t totalBytes = minCount * sizeof(T);
282 auto lhsBlock = reinterpret_cast<size_t *>(lstrPt);
283 auto rhsBlock = reinterpret_cast<size_t *>(rstrPt);
284 int32_t curBytePos = 0;
285 while (curBytePos + BYTES_CNT <= totalBytes) {
286 if (*lhsBlock == *rhsBlock) {
287 curBytePos += BYTES_CNT;
288 lhsBlock++;
289 rhsBlock++;
290 } else {
291 break;
292 }
293 }
294 int32_t curElementPos = curBytePos / sizeof(T);
295 for (int32_t i = curElementPos; i < minCount; ++i) {
296 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
297 int32_t charDiff = static_cast<int32_t>(lstrPt[i]) - static_cast<int32_t>(rstrPt[i]);
298 if (charDiff != 0) {
299 return charDiff;
300 }
301 }
302
303 return 0;
304 }
305
Compare(String * rstr)306 int32_t String::Compare(String *rstr)
307 {
308 String *lstr = this;
309 if (lstr == rstr) {
310 return 0;
311 }
312 ASSERT(lstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
313 ASSERT(rstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
314 auto lstrLeng = static_cast<int32_t>(lstr->GetLength());
315 auto rstrLeng = static_cast<int32_t>(rstr->GetLength());
316 int32_t lengRet = lstrLeng - rstrLeng;
317 int32_t minCount = (lengRet < 0) ? lstrLeng : rstrLeng;
318 bool lstrIsUtf16 = lstr->IsUtf16();
319 bool rstrIsUtf16 = rstr->IsUtf16();
320 if (!lstrIsUtf16 && !rstrIsUtf16) {
321 int32_t charDiff = CompareBytesBlock(lstr->GetDataMUtf8(), rstr->GetDataMUtf8(), minCount);
322 if (charDiff != 0) {
323 return charDiff;
324 }
325 } else if (!lstrIsUtf16) {
326 Span<uint8_t> lhsSp(lstr->GetDataMUtf8(), lstrLeng);
327 Span<uint16_t> rhsSp(rstr->GetDataUtf16(), rstrLeng);
328 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
329 if (charDiff != 0) {
330 return charDiff;
331 }
332 } else if (!rstrIsUtf16) {
333 Span<uint16_t> lhsSp(lstr->GetDataUtf16(), lstrLeng);
334 Span<uint8_t> rhsSp(rstr->GetDataMUtf8(), rstrLeng);
335 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
336 if (charDiff != 0) {
337 return charDiff;
338 }
339 } else {
340 int32_t charDiff = CompareBytesBlock(lstr->GetDataUtf16(), rstr->GetDataUtf16(), minCount);
341 if (charDiff != 0) {
342 return charDiff;
343 }
344 }
345 return lengRet;
346 }
347
348 template <typename T1, typename T2>
SubstringEquals(Span<const T1> & string,Span<const T2> & pattern,int32_t pos)349 static inline ALWAYS_INLINE int32_t SubstringEquals(Span<const T1> &string, Span<const T2> &pattern, int32_t pos)
350 {
351 ASSERT(pos + pattern.size() <= string.size());
352 if constexpr (std::is_same_v<T1, T2>) {
353 return std::memcmp(string.begin() + pos, pattern.begin(), pattern.size()) == 0;
354 }
355 return std::equal(pattern.begin(), pattern.end(), string.begin() + pos);
356 }
357
358 /*
359 * Tailed Substring method (based on D. Cantone and S. Faro: Searching for a substring with constant extra-space
360 * complexity). O(nm) worst-case but reported to have good performance both on random and natural language data
361 * Substring s of t is called tailed-substring, if the last character of s does not repeat elsewhere in s
362 */
363 /* static */
364 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)365 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
366 {
367 int32_t maxTailedLen = 1;
368 int32_t tailedEnd = rhsSp.size() - 1;
369 int32_t maxTailedEnd = tailedEnd;
370 // Phase 1: search in the beginning of string while computing maximal tailed-substring length
371 auto searchChar = rhsSp[tailedEnd];
372 auto *shiftedLhs = lhsSp.begin() + tailedEnd;
373 while (pos <= max) {
374 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
375 if (searchChar != shiftedLhs[pos]) {
376 pos++;
377 continue;
378 }
379 if (SubstringEquals(lhsSp, rhsSp, pos)) {
380 return pos;
381 }
382 auto tailedStart = tailedEnd - 1;
383 while (tailedStart >= 0 && rhsSp[tailedStart] != searchChar) {
384 tailedStart--;
385 }
386 if (maxTailedLen < tailedEnd - tailedStart) {
387 maxTailedLen = tailedEnd - tailedStart;
388 maxTailedEnd = tailedEnd;
389 }
390 if (maxTailedLen >= tailedEnd) {
391 break;
392 }
393 pos += tailedEnd - tailedStart;
394 tailedEnd--;
395 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
396 shiftedLhs--;
397 searchChar = rhsSp[tailedEnd];
398 }
399 // Phase 2: search in the remainder of string using computed maximal tailed-substring length
400 searchChar = rhsSp[maxTailedEnd];
401 shiftedLhs = lhsSp.begin() + maxTailedEnd;
402 while (pos <= max) {
403 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
404 if (searchChar != shiftedLhs[pos]) {
405 pos++;
406 continue;
407 }
408 if (SubstringEquals(lhsSp, rhsSp, pos)) {
409 return pos;
410 }
411 pos += maxTailedLen;
412 }
413 return -1;
414 }
415
416 // Search of the last occurence is equivalent to search of the first occurence of
417 // reversed pattern in reversed string
418 template <typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)419 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
420 {
421 int32_t maxTailedLen = 1;
422 int32_t tailedStart = 0;
423 int32_t maxTailedStart = tailedStart;
424 auto patternSize = static_cast<int32_t>(rhsSp.size());
425 // Phase 1: search in the end of string while computing maximal tailed-substring length
426 auto searchChar = rhsSp[tailedStart];
427 auto *shiftedLhs = lhsSp.begin() + tailedStart;
428 while (pos >= 0) {
429 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
430 if (searchChar != shiftedLhs[pos]) {
431 pos--;
432 continue;
433 }
434 if (SubstringEquals(lhsSp, rhsSp, pos)) {
435 return pos;
436 }
437 auto tailedEnd = tailedStart + 1;
438 while (tailedEnd < patternSize && rhsSp[tailedEnd] != searchChar) {
439 tailedEnd++;
440 }
441 if (maxTailedLen < tailedEnd - tailedStart) {
442 maxTailedLen = tailedEnd - tailedStart;
443 maxTailedStart = tailedStart;
444 }
445 if (maxTailedLen >= patternSize - tailedStart) {
446 break;
447 }
448 pos -= tailedEnd - tailedStart;
449 tailedStart++;
450 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
451 shiftedLhs++;
452 searchChar = rhsSp[tailedStart];
453 }
454 // Phase 2: search in the remainder of string using computed maximal tailed-substring length
455 searchChar = rhsSp[maxTailedStart];
456 shiftedLhs = lhsSp.begin() + maxTailedStart;
457 while (pos >= 0) {
458 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
459 if (searchChar != shiftedLhs[pos]) {
460 pos--;
461 continue;
462 }
463 if (SubstringEquals(lhsSp, rhsSp, pos)) {
464 return pos;
465 }
466 pos -= maxTailedLen;
467 }
468 return -1;
469 }
470
GetCompressionAndLength(panda::coretypes::String * string)471 static inline ALWAYS_INLINE std::pair<bool, int32_t> GetCompressionAndLength(panda::coretypes::String *string)
472 {
473 ASSERT(string->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
474 ASSERT(string != nullptr);
475 return {string->IsMUtf8(), static_cast<int32_t>(string->GetLength())};
476 }
477
IndexOf(String * rhs,int32_t pos)478 int32_t String::IndexOf(String *rhs, int32_t pos)
479 {
480 String *lhs = this;
481 auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
482 auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
483
484 if (pos < 0) {
485 pos = 0;
486 }
487
488 if (rhs_count == 0) {
489 return std::min(lhs_count, pos);
490 }
491
492 int32_t max = lhs_count - rhs_count;
493 // for pos > max IndexOf impl will return -1
494 if (lhs_utf8 && rhs_utf8) {
495 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
496 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
497 return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
498 } else if (!lhs_utf8 && !rhs_utf8) { // NOLINT(readability-else-after-return)
499 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
500 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
501 return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
502 } else if (rhs_utf8) {
503 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
504 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
505 return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
506 } else { // NOLINT(readability-else-after-return)
507 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
508 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
509 return panda::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
510 }
511 }
512
LastIndexOf(String * rhs,int32_t pos)513 int32_t String::LastIndexOf(String *rhs, int32_t pos)
514 {
515 String *lhs = this;
516 auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
517 auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
518
519 int32_t max = lhs_count - rhs_count;
520
521 if (pos > max) {
522 pos = max;
523 }
524
525 if (pos < 0) {
526 return -1;
527 }
528
529 if (rhs_count == 0) {
530 return pos;
531 }
532
533 if (lhs_utf8 && rhs_utf8) {
534 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
535 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
536 return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
537 } else if (!lhs_utf8 && !rhs_utf8) { // NOLINT(readability-else-after-return)
538 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
539 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
540 return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
541 } else if (rhs_utf8) {
542 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
543 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
544 return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
545 } else { // NOLINT(readability-else-after-return)
546 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
547 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
548 return panda::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
549 }
550 }
551
552 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Length)553 bool String::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Length)
554 {
555 if (!compressedStringsEnabled_) {
556 return false;
557 }
558 bool isCompressed = true;
559 Span<const uint16_t> data(utf16Data, utf16Length);
560 for (uint32_t i = 0; i < utf16Length; i++) {
561 if (!IsASCIICharacter(data[i])) {
562 isCompressed = false;
563 break;
564 }
565 }
566 return isCompressed;
567 }
568
569 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length)570 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length)
571 {
572 if (!compressedStringsEnabled_) {
573 return false;
574 }
575 bool isCompressed = true;
576 Span<const uint8_t> data(mutf8Data, mutf8Length);
577 for (uint32_t i = 0; i < mutf8Length; i++) {
578 if (!IsASCIICharacter(data[i])) {
579 isCompressed = false;
580 break;
581 }
582 }
583 return isCompressed;
584 }
585
586 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data)587 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data)
588 {
589 return compressedStringsEnabled_ ? utf::IsMUtf8OnlySingleBytes(mutf8Data) : false;
590 }
591
592 /* static */
CanBeCompressedUtf16(const uint16_t * utf16Data,uint32_t utf16Length,uint16_t non)593 bool String::CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non)
594 {
595 if (!compressedStringsEnabled_) {
596 return false;
597 }
598 bool isCompressed = true;
599 Span<const uint16_t> data(utf16Data, utf16Length);
600 for (uint32_t i = 0; i < utf16Length; i++) {
601 if (!IsASCIICharacter(data[i]) && data[i] != non) {
602 isCompressed = false;
603 break;
604 }
605 }
606 return isCompressed;
607 }
608
609 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint16_t non)610 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non)
611 {
612 if (!compressedStringsEnabled_) {
613 return false;
614 }
615 bool isCompressed = true;
616 Span<const uint8_t> data(mutf8Data, mutf8Length);
617 for (uint32_t i = 0; i < mutf8Length; i++) {
618 if (!IsASCIICharacter(data[i]) && data[i] != non) {
619 isCompressed = false;
620 break;
621 }
622 }
623 return isCompressed;
624 }
625
626 /* static */
StringsAreEqual(String * str1,String * str2)627 bool String::StringsAreEqual(String *str1, String *str2)
628 {
629 ASSERT(str1 != nullptr);
630 ASSERT(str2 != nullptr);
631
632 if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
633 return false;
634 }
635
636 if (str1->IsUtf16()) {
637 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
638 Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
639 return String::StringsAreEquals(data1, data2);
640 } else { // NOLINT(readability-else-after-return)
641 Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
642 Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
643 return String::StringsAreEquals(data1, data2);
644 }
645 }
646
647 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length)648 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length)
649 {
650 if (str1->GetLength() != utf16Length) {
651 return false;
652 }
653 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
654 return StringsAreEqualMUtf8(str1, mutf8Data, utf16Length, canBeCompressed);
655 }
656
657 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)658 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
659 {
660 bool result = true;
661 if (str1->GetLength() != utf16Length) {
662 result = false;
663 } else {
664 bool str1CanBeCompressed = !str1->IsUtf16();
665 bool data2CanBeCompressed = canBeCompressed;
666 if (str1CanBeCompressed != data2CanBeCompressed) {
667 return false;
668 }
669
670 ASSERT(str1CanBeCompressed == data2CanBeCompressed);
671 if (str1CanBeCompressed) {
672 Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
673 Span<const uint8_t> data2(mutf8Data, utf16Length);
674 result = String::StringsAreEquals(data1, data2);
675 } else {
676 result = IsMutf8EqualsUtf16(mutf8Data, str1->GetDataUtf16(), str1->GetLength());
677 }
678 }
679 return result;
680 }
681
682 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16Data,uint32_t utf16DataLength)683 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16Data, uint32_t utf16DataLength)
684 {
685 bool result = true;
686 if (str1->GetLength() != utf16DataLength) {
687 result = false;
688 } else if (!str1->IsUtf16()) {
689 result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16Data, utf16DataLength);
690 } else {
691 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
692 Span<const uint16_t> data2(utf16Data, utf16DataLength);
693 result = String::StringsAreEquals(data1, data2);
694 }
695 return result;
696 }
697
698 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,uint32_t utf8DataLength,const uint16_t * utf16Data,uint32_t utf16DataLength)699 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8DataLength, const uint16_t *utf16Data,
700 uint32_t utf16DataLength)
701 {
702 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
703 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
704 [[maybe_unused]] auto convertedStringSize =
705 utf::ConvertRegionMUtf8ToUtf16(utf8Data, tmpBuffer, utf8DataLength, utf16DataLength, 0);
706 ASSERT(convertedStringSize == utf16DataLength);
707
708 Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
709 Span<const uint16_t> data2(utf16Data, utf16DataLength);
710 bool result = String::StringsAreEquals(data1, data2);
711 allocator->Delete(tmpBuffer);
712 return result;
713 }
714
715 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,const uint16_t * utf16Data,uint32_t utf16DataLength)716 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength)
717 {
718 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
719 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
720 utf::ConvertMUtf8ToUtf16(utf8Data, utf::Mutf8Size(utf8Data), tmpBuffer);
721
722 Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
723 Span<const uint16_t> data2(utf16Data, utf16DataLength);
724 bool result = String::StringsAreEquals(data1, data2);
725 allocator->Delete(tmpBuffer);
726 return result;
727 }
728
729 /* static */
730 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)731 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
732 {
733 return 0 == std::memcmp(str1.Data(), str2.Data(), str1.SizeBytes());
734 }
735
ToCharArray(const LanguageContext & ctx)736 Array *String::ToCharArray(const LanguageContext &ctx)
737 {
738 // allocator may trig gc and move 'this', need to hold it
739 auto thread = ManagedThread::GetCurrent();
740 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
741 VMHandle<String> str(thread, this);
742 auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
743 Array *array = Array::Create(klass, GetLength());
744 if (array == nullptr) {
745 return nullptr;
746 }
747
748 if (str->IsUtf16()) {
749 Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
750 for (size_t i = 0; i < sp.size(); i++) {
751 array->Set<uint16_t>(i, sp[i]);
752 }
753 } else {
754 Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
755 for (size_t i = 0; i < sp.size(); i++) {
756 array->Set<uint16_t>(i, sp[i]);
757 }
758 }
759
760 return array;
761 }
762
763 /* static */
GetChars(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx)764 Array *String::GetChars(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx)
765 {
766 // allocator may trig gc and move 'src', need to hold it
767 auto thread = ManagedThread::GetCurrent();
768 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
769 VMHandle<String> str(thread, src);
770 auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
771 Array *array = Array::Create(klass, utf16Length);
772 if (array == nullptr) {
773 return nullptr;
774 }
775
776 if (str->IsUtf16()) {
777 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
778 Span<uint16_t> sp(str->GetDataUtf16() + start, utf16Length);
779 for (size_t i = 0; i < sp.size(); i++) {
780 array->Set<uint16_t>(i, sp[i]);
781 }
782 } else {
783 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
784 Span<uint8_t> sp(str->GetDataMUtf8() + start, utf16Length);
785 for (size_t i = 0; i < sp.size(); i++) {
786 array->Set<uint16_t>(i, sp[i]);
787 }
788 }
789
790 return array;
791 }
792
793 template <class T>
ComputeHashForData(const T * data,size_t size)794 static int32_t ComputeHashForData(const T *data, size_t size)
795 {
796 uint32_t hash = 0;
797 #if defined(__GNUC__)
798 #pragma GCC diagnostic push
799 #pragma GCC diagnostic ignored "-Wignored-attributes"
800 Span<const T> sp(data, size);
801 #pragma GCC diagnostic pop
802 #endif
803 for (auto c : sp) {
804 constexpr size_t SHIFT = 5;
805 hash = (hash << SHIFT) - hash + c;
806 }
807 return static_cast<int32_t>(hash);
808 }
809
ComputeHashForMutf8(const uint8_t * mutf8Data)810 static int32_t ComputeHashForMutf8(const uint8_t *mutf8Data)
811 {
812 uint32_t hash = 0;
813 while (*mutf8Data != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
814 constexpr size_t SHIFT = 5;
815 hash = (hash << SHIFT) - hash + *mutf8Data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
816 }
817 return static_cast<int32_t>(hash);
818 }
819
ComputeHashcode()820 uint32_t String::ComputeHashcode()
821 {
822 uint32_t hash;
823 if (compressedStringsEnabled_) {
824 if (!IsUtf16()) {
825 hash = static_cast<uint32_t>(ComputeHashForData(GetDataMUtf8(), GetLength()));
826 } else {
827 hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
828 }
829 } else {
830 ASSERT(static_cast<size_t>(GetLength()) < (std::numeric_limits<size_t>::max() >> 1U));
831 hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
832 }
833 return hash;
834 }
835
836 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length)837 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length)
838 {
839 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
840 return ComputeHashcodeMutf8(mutf8Data, utf16Length, canBeCompressed);
841 }
842
843 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)844 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
845 {
846 uint32_t hash;
847 if (canBeCompressed) {
848 hash = static_cast<uint32_t>(ComputeHashForMutf8(mutf8Data));
849 } else {
850 // NOTE(alovkov): optimize it without allocation a temporary buffer
851 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
852 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16Length);
853 utf::ConvertMUtf8ToUtf16(mutf8Data, utf::Mutf8Size(mutf8Data), tmpBuffer);
854 hash = static_cast<uint32_t>(ComputeHashForData(tmpBuffer, utf16Length));
855 allocator->Delete(tmpBuffer);
856 }
857 return hash;
858 }
859
860 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)861 uint32_t String::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
862 {
863 return ComputeHashForData(utf16Data, length);
864 }
865
866 /* static */
DoReplace(String * src,uint16_t oldC,uint16_t newC,const LanguageContext & ctx,PandaVM * vm)867 String *String::DoReplace(String *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, PandaVM *vm)
868 {
869 ASSERT(src != nullptr);
870 auto length = static_cast<int32_t>(src->GetLength());
871 bool canBeCompressed = IsASCIICharacter(newC);
872 if (src->IsUtf16()) {
873 canBeCompressed = canBeCompressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, oldC);
874 } else {
875 canBeCompressed = canBeCompressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, oldC);
876 }
877
878 // allocator may trig gc and move src, need to hold it
879 auto thread = ManagedThread::GetCurrent();
880 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
881 VMHandle<String> srcHandle(thread, src);
882 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
883 if (string == nullptr) {
884 return nullptr;
885 }
886
887 // retrieve src after gc
888 src = srcHandle.GetPtr();
889 ASSERT(string->hashcode_ == 0);
890
891 // After replacing we should have a full barrier, so this writes should happen-before barrier
892 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
893 if (src->IsUtf16()) {
894 if (canBeCompressed) {
895 auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
896 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
897 std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
898 } else {
899 auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
900 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
901 std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
902 }
903 } else {
904 if (canBeCompressed) {
905 auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
906 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
907 std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
908 } else {
909 auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
910 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
911 std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
912 }
913 }
914 TSAN_ANNOTATE_IGNORE_WRITES_END();
915 // String is supposed to be a constant object, so all its data should be visible by all threads
916 arch::FullMemoryBarrier();
917 return string;
918 }
919
920 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm)921 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx,
922 PandaVM *vm)
923 {
924 ASSERT(src != nullptr);
925 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
926 bool canBeCompressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16Length);
927
928 // allocator may trig gc and move src, need to hold it
929 auto thread = ManagedThread::GetCurrent();
930 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
931 VMHandle<String> srcHandle(thread, src);
932 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm);
933 if (string == nullptr) {
934 return nullptr;
935 }
936
937 // retrieve src after gc
938 src = srcHandle.GetPtr();
939 ASSERT(string->hashcode_ == 0);
940
941 // After copying we should have a full barrier, so this writes should happen-before barrier
942 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
943 if (src->IsUtf16()) {
944 if (canBeCompressed) {
945 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
946 CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16Length);
947 } else {
948 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
949 std::copy_n(reinterpret_cast<const uint8_t *>(src->GetDataUtf16() + start), utf16Length << 1UL,
950 reinterpret_cast<uint8_t *>(string->GetDataUtf16()));
951 }
952 } else {
953 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
954 std::copy_n(src->GetDataMUtf8() + start, utf16Length, string->GetDataMUtf8());
955 }
956 TSAN_ANNOTATE_IGNORE_WRITES_END();
957 // String is supposed to be a constant object, so all its data should be visible by all threads
958 arch::FullMemoryBarrier();
959 return string;
960 }
961
962 /* static */
Concat(String * string1,String * string2,const LanguageContext & ctx,PandaVM * vm)963 String *String::Concat(String *string1, String *string2, const LanguageContext &ctx, PandaVM *vm)
964 {
965 ASSERT(string1 != nullptr);
966 ASSERT(string2 != nullptr);
967 // allocator may trig gc and move src, need to hold it
968 auto thread = ManagedThread::GetCurrent();
969 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
970 VMHandle<String> str1Handle(thread, string1);
971 VMHandle<String> str2Handle(thread, string2);
972
973 uint32_t length1 = string1->GetLength();
974 uint32_t length2 = string2->GetLength();
975 uint32_t newLength = length1 + length2;
976 bool compressed = compressedStringsEnabled_ && (!string1->IsUtf16() && !string2->IsUtf16());
977 auto newString = AllocStringObject(newLength, compressed, ctx, vm);
978 if (UNLIKELY(newString == nullptr)) {
979 return nullptr;
980 }
981
982 ASSERT(newString->hashcode_ == 0);
983
984 // retrieve strings after gc
985 string1 = str1Handle.GetPtr();
986 string2 = str2Handle.GetPtr();
987
988 // After copying we should have a full barrier, so this writes should happen-before barrier
989 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
990 if (compressed) {
991 Span<uint8_t> sp(newString->GetDataMUtf8(), newLength);
992 std::copy_n(string1->GetDataMUtf8(), length1, sp.Data());
993 sp = sp.SubSpan(length1);
994 std::copy_n(string2->GetDataMUtf8(), length2, sp.Data());
995 } else {
996 Span<uint16_t> sp(newString->GetDataUtf16(), newLength);
997 if (!string1->IsUtf16()) {
998 for (uint32_t i = 0; i < length1; ++i) {
999 sp[i] = string1->At<false>(i);
1000 }
1001 } else {
1002 std::copy_n(reinterpret_cast<uint8_t *>(string1->GetDataUtf16()), length1 << 1U,
1003 reinterpret_cast<uint8_t *>(sp.Data()));
1004 }
1005 sp = sp.SubSpan(length1);
1006 if (!string2->IsUtf16()) {
1007 for (uint32_t i = 0; i < length2; ++i) {
1008 sp[i] = string2->At<false>(i);
1009 }
1010 } else {
1011 std::copy_n(reinterpret_cast<uint8_t *>(string2->GetDataUtf16()), length2 << 1U,
1012 reinterpret_cast<uint8_t *>(sp.Data()));
1013 }
1014 }
1015 TSAN_ANNOTATE_IGNORE_WRITES_END();
1016 // String is supposed to be a constant object, so all its data should be visible by all threads
1017 arch::FullMemoryBarrier();
1018
1019 return newString;
1020 }
1021
1022 /* static */
AllocStringObject(size_t length,bool compressed,const LanguageContext & ctx,PandaVM * vm,bool movable)1023 String *String::AllocStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
1024 {
1025 ASSERT(vm != nullptr);
1026 auto *thread = ManagedThread::GetCurrent();
1027 auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
1028 size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
1029 auto string =
1030 movable
1031 ? reinterpret_cast<String *>(vm->GetHeapManager()->AllocateObject(
1032 stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT))
1033 : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(
1034 stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT));
1035 if (string != nullptr) {
1036 // After setting length we should have a full barrier, so this write should happens-before barrier
1037 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1038 string->SetLength(length, compressed);
1039 string->SetHashcode(0);
1040 TSAN_ANNOTATE_IGNORE_WRITES_END();
1041 // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
1042 // legth before it's set
1043 arch::FullMemoryBarrier();
1044 }
1045 return string;
1046 }
1047
1048 } // namespace panda::coretypes
1049