1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/hash.h"
22 #include "libpandabase/utils/span.h"
23 #include "runtime/arch/memory_helpers.h"
24 #include "runtime/include/coretypes/array.h"
25 #include "runtime/include/coretypes/string-inl.h"
26 #include "runtime/include/runtime.h"
27 #include "runtime/handle_base-inl.h"
28 #include "runtime/include/panda_vm.h"
29
30 namespace ark::coretypes {
31
32 bool String::compressedStringsEnabled_ = true;
33
34 /* static */
CreateFromString(String * str,const LanguageContext & ctx,PandaVM * vm)35 String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaVM *vm)
36 {
37 ASSERT(str != nullptr);
38 // allocator may trig gc and move str, need to hold it
39 auto thread = ManagedThread::GetCurrent();
40 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
41 VMHandle<String> strHandle(thread, str);
42 auto string = AllocStringObject(strHandle->GetLength(), !strHandle->IsUtf16(), ctx, vm);
43 if (string == nullptr) {
44 return nullptr;
45 }
46
47 // retrive str after gc
48 str = strHandle.GetPtr();
49 string->hashcode_ = str->hashcode_;
50
51 uint32_t length = str->GetLength();
52 // After memcpy we should have a full barrier, so this writes should happen-before barrier
53 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
54 if (str->IsUtf16()) {
55 memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), str->GetDataUtf16(),
56 ComputeDataSizeUtf16(length));
57 } else {
58 memcpy_s(string->GetDataMUtf8(), string->GetLength(), str->GetDataMUtf8(), length);
59 }
60 TSAN_ANNOTATE_IGNORE_WRITES_END();
61 // String is supposed to be a constant object, so all its data should be visible by all threads
62 arch::FullMemoryBarrier();
63
64 return string;
65 }
66
67 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,size_t mutf8Length,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)68 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, size_t mutf8Length, uint32_t utf16Length,
69 bool canBeCompressed, const LanguageContext &ctx, PandaVM *vm, bool movable,
70 bool pinned)
71 {
72 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable, pinned);
73 if (string == nullptr) {
74 return nullptr;
75 }
76
77 ASSERT(string->hashcode_ == 0);
78 // After copying we should have a full barrier, so this writes should happen-before barrier
79 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
80 if (canBeCompressed) {
81 memcpy_s(string->GetDataMUtf8(), string->GetLength(), mutf8Data, utf16Length);
82 } else {
83 utf::ConvertMUtf8ToUtf16(mutf8Data, mutf8Length, string->GetDataUtf16());
84 }
85 TSAN_ANNOTATE_IGNORE_WRITES_END();
86 // String is supposed to be a constant object, so all its data should be visible by all threads
87 arch::FullMemoryBarrier();
88 return string;
89 }
90
91 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)92 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, const LanguageContext &ctx, PandaVM *vm,
93 bool movable, bool pinned)
94 {
95 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
96 auto mutf8Length = utf::Mutf8Size(mutf8Data);
97 ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
98 return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
99 }
100
101 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)102 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed,
103 const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned)
104 {
105 auto mutf8Length = utf::Mutf8Size(mutf8Data);
106 ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
107 ASSERT(canBeCompressed == CanBeCompressedMUtf8(mutf8Data));
108 return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
109 }
110
111 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)112 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, const LanguageContext &ctx, PandaVM *vm, bool movable,
113 bool pinned)
114 {
115 size_t mutf8Length = utf::Mutf8Size(mutf8Data);
116 size_t utf16Length = utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length);
117 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
118 return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
119 }
120
121 /* static */
CreateFromMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)122 String *String::CreateFromMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint32_t utf16Length,
123 const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned)
124 {
125 ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data, mutf8Length));
126 auto canBeCompressed = CanBeCompressedMUtf8(mutf8Data, mutf8Length);
127 return CreateFromMUtf8(mutf8Data, mutf8Length, utf16Length, canBeCompressed, ctx, vm, movable, pinned);
128 }
129
130 /* static */
CreateFromUtf8(const uint8_t * utf8Data,uint32_t utf8Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)131 String *String::CreateFromUtf8(const uint8_t *utf8Data, uint32_t utf8Length, const LanguageContext &ctx, PandaVM *vm,
132 bool movable, bool pinned)
133 {
134 coretypes::String *s = nullptr;
135 auto utf16Length = utf::Utf8ToUtf16Size(utf8Data, utf8Length);
136 if (CanBeCompressedMUtf8(utf8Data, utf8Length)) {
137 // ascii string have equal representation in utf8 and mutf8 formats
138 s = coretypes::String::CreateFromMUtf8(utf8Data, utf8Length, utf16Length, true, ctx, vm, movable, pinned);
139 } else {
140 PandaVector<uint16_t> tmpBuffer(utf16Length);
141 [[maybe_unused]] auto len =
142 utf::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Length, utf16Length, 0);
143 ASSERT(len == utf16Length);
144 s = coretypes::String::CreateFromUtf16(tmpBuffer.data(), utf16Length, ctx, vm, movable, pinned);
145 }
146 return s;
147 }
148
149 /* static */
CreateFromUtf16(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)150 String *String::CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Length, const LanguageContext &ctx,
151 PandaVM *vm, bool movable, bool pinned)
152 {
153 bool canBeCompressed = CanBeCompressed(utf16Data, utf16Length);
154 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm, movable, pinned);
155 if (string == nullptr) {
156 return nullptr;
157 }
158
159 ASSERT(string->hashcode_ == 0);
160 // After copying we should have a full barrier, so this writes should happen-before barrier
161 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
162 if (canBeCompressed) {
163 CopyUtf16AsMUtf8(utf16Data, string->GetDataMUtf8(), utf16Length);
164 } else {
165 memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), utf16Data, utf16Length << 1UL);
166 }
167 TSAN_ANNOTATE_IGNORE_WRITES_END();
168 // String is supposed to be a constant object, so all its data should be visible by all threads
169 arch::FullMemoryBarrier();
170 return string;
171 }
172
173 /* static */
CreateEmptyString(const LanguageContext & ctx,PandaVM * vm)174 String *String::CreateEmptyString(const LanguageContext &ctx, PandaVM *vm)
175 {
176 uint16_t data = 0;
177 return CreateFromUtf16(&data, 0, ctx, vm);
178 }
179
180 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16From,uint8_t * mutf8To,uint32_t utf16Length)181 void String::CopyUtf16AsMUtf8(const uint16_t *utf16From, uint8_t *mutf8To, uint32_t utf16Length)
182 {
183 Span<const uint16_t> from(utf16From, utf16Length);
184 Span<uint8_t> to(mutf8To, utf16Length);
185 for (uint32_t i = 0; i < utf16Length; i++) {
186 to[i] = from[i];
187 }
188 }
189
190 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,const LanguageContext & ctx,PandaVM * vm)191 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, const LanguageContext &ctx,
192 PandaVM *vm)
193 {
194 ASSERT(chararray != nullptr);
195 // allocator may trig gc and move array, need to hold it
196 auto thread = ManagedThread::GetCurrent();
197 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
198 VMHandle<Array> arrayHandle(thread, chararray);
199
200 // There is a potential data race between read of src in CanBeCompressed and write of destination buf
201 // in CopyDataRegionUtf16. The src is a cast from chararray comming from managed object.
202 // Hence the race is reported on managed object, which has a synchronization on a high level.
203 // TSAN does not see such synchronization, thus we ignore such races here.
204 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
205 // NOLINTNEXTLINE(readability-identifier-naming)
206 const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
207 bool canBeCompressed = CanBeCompressed(src, length);
208 TSAN_ANNOTATE_IGNORE_WRITES_END();
209 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
210 if (string == nullptr) {
211 return nullptr;
212 }
213
214 // retrieve src since gc may move it
215 src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + (offset << 1UL));
216 ASSERT(string->hashcode_ == 0);
217 // After copying we should have a full barrier, so this writes should happen-before barrier
218 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
219 if (canBeCompressed) {
220 CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
221 } else {
222 memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), src, length << 1UL);
223 }
224 TSAN_ANNOTATE_IGNORE_WRITES_END();
225 // String is supposed to be a constant object, so all its data should be visible by all threads
226 arch::FullMemoryBarrier();
227 return string;
228 }
229
230 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t highByte,Array * bytearray,const LanguageContext & ctx,PandaVM * vm)231 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t highByte, Array *bytearray,
232 const LanguageContext &ctx, PandaVM *vm)
233 {
234 ASSERT(length != 0);
235 ASSERT(bytearray != nullptr);
236 // allocator may trig gc and move array, need to hold it
237 auto thread = ManagedThread::GetCurrent();
238 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
239 VMHandle<Array> arrayHandle(thread, bytearray);
240
241 constexpr size_t BYTE_MASK = 0xFF;
242
243 // NOLINTNEXTLINE(readability-identifier-naming)
244 const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
245 highByte &= BYTE_MASK;
246 bool canBeCompressed = CanBeCompressedMUtf8(src, length) && (highByte == 0);
247 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
248 if (string == nullptr) {
249 return nullptr;
250 }
251
252 // retrieve src since gc may move it
253 src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(arrayHandle->GetData()) + offset);
254 ASSERT(string->hashcode_ == 0);
255 // After copying we should have a full barrier, so this writes should happen-before barrier
256 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
257 if (canBeCompressed) {
258 Span<const uint8_t> from(src, length);
259 Span<uint8_t> to(string->GetDataMUtf8(), length);
260 for (uint32_t i = 0; i < length; ++i) {
261 to[i] = (from[i] & BYTE_MASK);
262 }
263 } else {
264 Span<const uint8_t> from(src, length);
265 Span<uint16_t> to(string->GetDataUtf16(), length);
266 for (uint32_t i = 0; i < length; ++i) {
267 to[i] = (highByte << 8U) + (from[i] & BYTE_MASK);
268 }
269 }
270 TSAN_ANNOTATE_IGNORE_WRITES_END();
271
272 // String is supposed to be a constant object, so all its data should be visible by all threads
273 arch::FullMemoryBarrier();
274 return string;
275 }
276
277 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)278 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
279 {
280 for (int32_t i = 0; i < count; ++i) {
281 int32_t charDiff = static_cast<int32_t>(lhsSp[i]) - static_cast<int32_t>(rhsSp[i]);
282 if (charDiff != 0) {
283 return charDiff;
284 }
285 }
286 return 0;
287 }
288
289 template <typename T>
CompareBytesBlock(T * lstrPt,T * rstrPt,int32_t minCount)290 int32_t CompareBytesBlock(T *lstrPt, T *rstrPt, int32_t minCount)
291 {
292 constexpr int32_t BYTES_CNT = sizeof(size_t);
293 static_assert(BYTES_CNT >= sizeof(T));
294 static_assert(BYTES_CNT % sizeof(T) == 0);
295 int32_t totalBytes = minCount * sizeof(T);
296 auto lhsBlock = reinterpret_cast<size_t *>(lstrPt);
297 auto rhsBlock = reinterpret_cast<size_t *>(rstrPt);
298 int32_t curBytePos = 0;
299 while (curBytePos + BYTES_CNT <= totalBytes) {
300 if (*lhsBlock == *rhsBlock) {
301 curBytePos += BYTES_CNT;
302 lhsBlock++;
303 rhsBlock++;
304 } else {
305 break;
306 }
307 }
308 int32_t curElementPos = curBytePos / sizeof(T);
309 for (int32_t i = curElementPos; i < minCount; ++i) {
310 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
311 int32_t charDiff = static_cast<int32_t>(lstrPt[i]) - static_cast<int32_t>(rstrPt[i]);
312 if (charDiff != 0) {
313 return charDiff;
314 }
315 }
316
317 return 0;
318 }
319
Compare(String * rstr)320 int32_t String::Compare(String *rstr)
321 {
322 String *lstr = this;
323 if (lstr == rstr) {
324 return 0;
325 }
326 ASSERT(lstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
327 ASSERT(rstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
328 auto lstrLeng = static_cast<int32_t>(lstr->GetLength());
329 auto rstrLeng = static_cast<int32_t>(rstr->GetLength());
330 int32_t lengRet = lstrLeng - rstrLeng;
331 int32_t minCount = (lengRet < 0) ? lstrLeng : rstrLeng;
332 bool lstrIsUtf16 = lstr->IsUtf16();
333 bool rstrIsUtf16 = rstr->IsUtf16();
334 if (!lstrIsUtf16 && !rstrIsUtf16) {
335 int32_t charDiff = CompareBytesBlock(lstr->GetDataMUtf8(), rstr->GetDataMUtf8(), minCount);
336 if (charDiff != 0) {
337 return charDiff;
338 }
339 } else if (!lstrIsUtf16) {
340 Span<uint8_t> lhsSp(lstr->GetDataMUtf8(), lstrLeng);
341 Span<uint16_t> rhsSp(rstr->GetDataUtf16(), rstrLeng);
342 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
343 if (charDiff != 0) {
344 return charDiff;
345 }
346 } else if (!rstrIsUtf16) {
347 Span<uint16_t> lhsSp(lstr->GetDataUtf16(), lstrLeng);
348 Span<uint8_t> rhsSp(rstr->GetDataMUtf8(), rstrLeng);
349 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
350 if (charDiff != 0) {
351 return charDiff;
352 }
353 } else {
354 int32_t charDiff = CompareBytesBlock(lstr->GetDataUtf16(), rstr->GetDataUtf16(), minCount);
355 if (charDiff != 0) {
356 return charDiff;
357 }
358 }
359 return lengRet;
360 }
361
362 template <typename T1, typename T2>
SubstringEquals(Span<const T1> & string,Span<const T2> & pattern,int32_t pos)363 static inline ALWAYS_INLINE int32_t SubstringEquals(Span<const T1> &string, Span<const T2> &pattern, int32_t pos)
364 {
365 ASSERT(pos + pattern.size() <= string.size());
366 if constexpr (std::is_same_v<T1, T2>) {
367 return std::memcmp(string.begin() + pos, pattern.begin(), pattern.size()) == 0;
368 }
369 return std::equal(pattern.begin(), pattern.end(), string.begin() + pos);
370 }
371
372 /*
373 * Tailed Substring method (based on D. Cantone and S. Faro: Searching for a substring with constant extra-space
374 * complexity). O(nm) worst-case but reported to have good performance both on random and natural language data
375 * Substring s of t is called tailed-substring, if the last character of s does not repeat elsewhere in s
376 */
377 /* static */
378 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)379 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
380 {
381 int32_t maxTailedLen = 1;
382 auto tailedEnd = static_cast<int32_t>(rhsSp.size() - 1);
383 int32_t maxTailedEnd = tailedEnd;
384 // Phase 1: search in the beginning of string while computing maximal tailed-substring length
385 auto searchChar = rhsSp[tailedEnd];
386 auto *shiftedLhs = lhsSp.begin() + tailedEnd;
387 while (pos <= max) {
388 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
389 if (searchChar != shiftedLhs[pos]) {
390 pos++;
391 continue;
392 }
393 if (SubstringEquals(lhsSp, rhsSp, pos)) {
394 return pos;
395 }
396 auto tailedStart = tailedEnd - 1;
397 while (tailedStart >= 0 && rhsSp[tailedStart] != searchChar) {
398 tailedStart--;
399 }
400 if (maxTailedLen < tailedEnd - tailedStart) {
401 maxTailedLen = tailedEnd - tailedStart;
402 maxTailedEnd = tailedEnd;
403 }
404 if (maxTailedLen >= tailedEnd) {
405 break;
406 }
407 pos += tailedEnd - tailedStart;
408 tailedEnd--;
409 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
410 shiftedLhs--;
411 searchChar = rhsSp[tailedEnd];
412 }
413 // Phase 2: search in the remainder of string using computed maximal tailed-substring length
414 searchChar = rhsSp[maxTailedEnd];
415 shiftedLhs = lhsSp.begin() + maxTailedEnd;
416 while (pos <= max) {
417 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
418 if (searchChar != shiftedLhs[pos]) {
419 pos++;
420 continue;
421 }
422 if (SubstringEquals(lhsSp, rhsSp, pos)) {
423 return pos;
424 }
425 pos += maxTailedLen;
426 }
427 return -1;
428 }
429
430 // Search of the last occurence is equivalent to search of the first occurence of
431 // reversed pattern in reversed string
432 template <typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)433 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
434 {
435 int32_t maxTailedLen = 1;
436 int32_t tailedStart = 0;
437 int32_t maxTailedStart = tailedStart;
438 auto patternSize = static_cast<int32_t>(rhsSp.size());
439 // Phase 1: search in the end of string while computing maximal tailed-substring length
440 auto searchChar = rhsSp[tailedStart];
441 auto *shiftedLhs = lhsSp.begin() + tailedStart;
442 while (pos >= 0) {
443 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
444 if (searchChar != shiftedLhs[pos]) {
445 pos--;
446 continue;
447 }
448 if (SubstringEquals(lhsSp, rhsSp, pos)) {
449 return pos;
450 }
451 auto tailedEnd = tailedStart + 1;
452 while (tailedEnd < patternSize && rhsSp[tailedEnd] != searchChar) {
453 tailedEnd++;
454 }
455 if (maxTailedLen < tailedEnd - tailedStart) {
456 maxTailedLen = tailedEnd - tailedStart;
457 maxTailedStart = tailedStart;
458 }
459 if (maxTailedLen >= patternSize - tailedStart) {
460 break;
461 }
462 pos -= tailedEnd - tailedStart;
463 tailedStart++;
464 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
465 shiftedLhs++;
466 searchChar = rhsSp[tailedStart];
467 }
468 // Phase 2: search in the remainder of string using computed maximal tailed-substring length
469 searchChar = rhsSp[maxTailedStart];
470 shiftedLhs = lhsSp.begin() + maxTailedStart;
471 while (pos >= 0) {
472 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
473 if (searchChar != shiftedLhs[pos]) {
474 pos--;
475 continue;
476 }
477 if (SubstringEquals(lhsSp, rhsSp, pos)) {
478 return pos;
479 }
480 pos -= maxTailedLen;
481 }
482 return -1;
483 }
484
GetCompressionAndLength(ark::coretypes::String * string)485 static inline ALWAYS_INLINE std::pair<bool, int32_t> GetCompressionAndLength(ark::coretypes::String *string)
486 {
487 ASSERT(string->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
488 ASSERT(string != nullptr);
489 return {string->IsMUtf8(), static_cast<int32_t>(string->GetLength())};
490 }
491
IndexOf(String * rhs,int32_t pos)492 int32_t String::IndexOf(String *rhs, int32_t pos)
493 {
494 String *lhs = this;
495 auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
496 auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
497
498 if (pos < 0) {
499 pos = 0;
500 }
501
502 if (rhs_count == 0) {
503 return std::min(lhs_count, pos);
504 }
505
506 int32_t max = lhs_count - rhs_count;
507 // for pos > max IndexOf impl will return -1
508 if (lhs_utf8 && rhs_utf8) {
509 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
510 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
511 return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
512 } else if (!lhs_utf8 && !rhs_utf8) { // NOLINT(readability-else-after-return)
513 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
514 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
515 return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
516 } else if (rhs_utf8) {
517 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
518 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
519 return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
520 } else { // NOLINT(readability-else-after-return)
521 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
522 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
523 return ark::coretypes::IndexOf(lhsSp, rhsSp, pos, max);
524 }
525 }
526
LastIndexOf(String * rhs,int32_t pos)527 int32_t String::LastIndexOf(String *rhs, int32_t pos)
528 {
529 String *lhs = this;
530 auto [lhs_utf8, lhs_count] = GetCompressionAndLength(lhs);
531 auto [rhs_utf8, rhs_count] = GetCompressionAndLength(rhs);
532
533 int32_t max = lhs_count - rhs_count;
534
535 if (pos > max) {
536 pos = max;
537 }
538
539 if (pos < 0) {
540 return -1;
541 }
542
543 if (rhs_count == 0) {
544 return pos;
545 }
546
547 if (lhs_utf8 && rhs_utf8) {
548 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
549 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
550 return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
551 } else if (!lhs_utf8 && !rhs_utf8) { // NOLINT(readability-else-after-return)
552 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
553 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
554 return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
555 } else if (rhs_utf8) {
556 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhs_count);
557 Span<const uint8_t> rhsSp(rhs->GetDataMUtf8(), rhs_count);
558 return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
559 } else { // NOLINT(readability-else-after-return)
560 Span<const uint8_t> lhsSp(lhs->GetDataMUtf8(), lhs_count);
561 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhs_count);
562 return ark::coretypes::LastIndexOf(lhsSp, rhsSp, pos);
563 }
564 }
565
566 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Length)567 bool String::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Length)
568 {
569 if (!compressedStringsEnabled_) {
570 return false;
571 }
572 bool isCompressed = true;
573 Span<const uint16_t> data(utf16Data, utf16Length);
574 for (uint32_t i = 0; i < utf16Length; i++) {
575 if (!IsASCIICharacter(data[i])) {
576 isCompressed = false;
577 break;
578 }
579 }
580 return isCompressed;
581 }
582
583 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length)584 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length)
585 {
586 if (!compressedStringsEnabled_) {
587 return false;
588 }
589 bool isCompressed = true;
590 Span<const uint8_t> data(mutf8Data, mutf8Length);
591 for (uint32_t i = 0; i < mutf8Length; i++) {
592 if (!IsASCIICharacter(data[i])) {
593 isCompressed = false;
594 break;
595 }
596 }
597 return isCompressed;
598 }
599
600 // static
CanBeCompressedMUtf8(const uint8_t * mutf8Data)601 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data)
602 {
603 return compressedStringsEnabled_ ? utf::IsMUtf8OnlySingleBytes(mutf8Data) : false;
604 }
605
606 /* static */
CanBeCompressedUtf16(const uint16_t * utf16Data,uint32_t utf16Length,uint16_t non)607 bool String::CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non)
608 {
609 if (!compressedStringsEnabled_) {
610 return false;
611 }
612 bool isCompressed = true;
613 Span<const uint16_t> data(utf16Data, utf16Length);
614 for (uint32_t i = 0; i < utf16Length; i++) {
615 if (!IsASCIICharacter(data[i]) && data[i] != non) {
616 isCompressed = false;
617 break;
618 }
619 }
620 return isCompressed;
621 }
622
623 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8Data,uint32_t mutf8Length,uint16_t non)624 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non)
625 {
626 if (!compressedStringsEnabled_) {
627 return false;
628 }
629 bool isCompressed = true;
630 Span<const uint8_t> data(mutf8Data, mutf8Length);
631 for (uint32_t i = 0; i < mutf8Length; i++) {
632 if (!IsASCIICharacter(data[i]) && data[i] != non) {
633 isCompressed = false;
634 break;
635 }
636 }
637 return isCompressed;
638 }
639
640 /* static */
StringsAreEqual(String * str1,String * str2)641 bool String::StringsAreEqual(String *str1, String *str2)
642 {
643 ASSERT(str1 != nullptr);
644 ASSERT(str2 != nullptr);
645
646 if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
647 return false;
648 }
649
650 if (str1->IsUtf16()) {
651 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
652 Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
653 return String::StringsAreEquals(data1, data2);
654 } else { // NOLINT(readability-else-after-return)
655 Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
656 Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
657 return String::StringsAreEquals(data1, data2);
658 }
659 }
660
661 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length)662 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length)
663 {
664 ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data));
665 if (str1->GetLength() != utf16Length) {
666 return false;
667 }
668 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
669 return StringsAreEqualMUtf8(str1, mutf8Data, utf16Length, canBeCompressed);
670 }
671
672 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)673 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
674 {
675 bool result = true;
676 if (str1->GetLength() != utf16Length) {
677 result = false;
678 } else {
679 bool str1CanBeCompressed = !str1->IsUtf16();
680 bool data2CanBeCompressed = canBeCompressed;
681 if (str1CanBeCompressed != data2CanBeCompressed) {
682 return false;
683 }
684
685 ASSERT(str1CanBeCompressed == data2CanBeCompressed);
686 if (str1CanBeCompressed) {
687 Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
688 Span<const uint8_t> data2(mutf8Data, utf16Length);
689 result = String::StringsAreEquals(data1, data2);
690 } else {
691 result = IsMutf8EqualsUtf16(mutf8Data, str1->GetDataUtf16(), str1->GetLength());
692 }
693 }
694 return result;
695 }
696
697 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16Data,uint32_t utf16DataLength)698 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16Data, uint32_t utf16DataLength)
699 {
700 bool result = true;
701 if (str1->GetLength() != utf16DataLength) {
702 result = false;
703 } else if (!str1->IsUtf16()) {
704 result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16Data, utf16DataLength);
705 } else {
706 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
707 Span<const uint16_t> data2(utf16Data, utf16DataLength);
708 result = String::StringsAreEquals(data1, data2);
709 }
710 return result;
711 }
712
713 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,uint32_t utf8DataLength,const uint16_t * utf16Data,uint32_t utf16DataLength)714 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8DataLength, const uint16_t *utf16Data,
715 uint32_t utf16DataLength)
716 {
717 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
718 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
719 [[maybe_unused]] auto convertedStringSize =
720 utf::ConvertRegionMUtf8ToUtf16(utf8Data, tmpBuffer, utf8DataLength, utf16DataLength, 0);
721 ASSERT(convertedStringSize == utf16DataLength);
722
723 Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
724 Span<const uint16_t> data2(utf16Data, utf16DataLength);
725 bool result = String::StringsAreEquals(data1, data2);
726 allocator->Delete(tmpBuffer);
727 return result;
728 }
729
730 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8Data,const uint16_t * utf16Data,uint32_t utf16DataLength)731 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength)
732 {
733 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
734 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16DataLength);
735 utf::ConvertMUtf8ToUtf16(utf8Data, utf::Mutf8Size(utf8Data), tmpBuffer);
736
737 Span<const uint16_t> data1(tmpBuffer, utf16DataLength);
738 Span<const uint16_t> data2(utf16Data, utf16DataLength);
739 bool result = String::StringsAreEquals(data1, data2);
740 allocator->Delete(tmpBuffer);
741 return result;
742 }
743
744 /* static */
745 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)746 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
747 {
748 return 0 == std::memcmp(str1.Data(), str2.Data(), str1.SizeBytes());
749 }
750
ToCharArray(const LanguageContext & ctx)751 Array *String::ToCharArray(const LanguageContext &ctx)
752 {
753 // allocator may trig gc and move 'this', need to hold it
754 auto thread = ManagedThread::GetCurrent();
755 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
756 VMHandle<String> str(thread, this);
757 auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
758 Array *array = Array::Create(klass, GetLength());
759 if (array == nullptr) {
760 return nullptr;
761 }
762
763 if (str->IsUtf16()) {
764 Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
765 for (size_t i = 0; i < sp.size(); i++) {
766 array->Set<uint16_t>(i, sp[i]);
767 }
768 } else {
769 Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
770 for (size_t i = 0; i < sp.size(); i++) {
771 array->Set<uint16_t>(i, sp[i]);
772 }
773 }
774
775 return array;
776 }
777
778 /* static */
GetChars(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx)779 Array *String::GetChars(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx)
780 {
781 // allocator may trig gc and move 'src', need to hold it
782 auto thread = ManagedThread::GetCurrent();
783 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
784 VMHandle<String> str(thread, src);
785 auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
786 Array *array = Array::Create(klass, utf16Length);
787 if (array == nullptr) {
788 return nullptr;
789 }
790
791 if (str->IsUtf16()) {
792 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
793 Span<uint16_t> sp(str->GetDataUtf16() + start, utf16Length);
794 for (size_t i = 0; i < sp.size(); i++) {
795 array->Set<uint16_t>(i, sp[i]);
796 }
797 } else {
798 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
799 Span<uint8_t> sp(str->GetDataMUtf8() + start, utf16Length);
800 for (size_t i = 0; i < sp.size(); i++) {
801 array->Set<uint16_t>(i, sp[i]);
802 }
803 }
804
805 return array;
806 }
807
808 template <class T>
ComputeHashForData(const T * data,size_t size)809 static int32_t ComputeHashForData(const T *data, size_t size)
810 {
811 uint32_t hash = 0;
812 #if defined(__GNUC__)
813 #pragma GCC diagnostic push
814 #pragma GCC diagnostic ignored "-Wignored-attributes"
815 Span<const T> sp(data, size);
816 #pragma GCC diagnostic pop
817 #endif
818 for (auto c : sp) {
819 constexpr size_t SHIFT = 5;
820 hash = (hash << SHIFT) - hash + c;
821 }
822 return static_cast<int32_t>(hash);
823 }
824
ComputeHashForMutf8(const uint8_t * mutf8Data)825 static int32_t ComputeHashForMutf8(const uint8_t *mutf8Data)
826 {
827 uint32_t hash = 0;
828 while (*mutf8Data != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
829 constexpr size_t SHIFT = 5;
830 hash = (hash << SHIFT) - hash + *mutf8Data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
831 }
832 return static_cast<int32_t>(hash);
833 }
834
ComputeHashcode()835 uint32_t String::ComputeHashcode()
836 {
837 uint32_t hash;
838 if (compressedStringsEnabled_) {
839 if (!IsUtf16()) {
840 hash = static_cast<uint32_t>(ComputeHashForData(GetDataMUtf8(), GetLength()));
841 } else {
842 hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
843 }
844 } else {
845 ASSERT(static_cast<size_t>(GetLength()) < (std::numeric_limits<size_t>::max() >> 1U));
846 hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
847 }
848 return hash;
849 }
850
851 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length)852 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length)
853 {
854 bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data);
855 return ComputeHashcodeMutf8(mutf8Data, utf16Length, canBeCompressed);
856 }
857
858 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed)859 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length, bool canBeCompressed)
860 {
861 uint32_t hash;
862 if (canBeCompressed) {
863 hash = static_cast<uint32_t>(ComputeHashForMutf8(mutf8Data));
864 } else {
865 // NOTE(alovkov): optimize it without allocation a temporary buffer
866 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
867 auto tmpBuffer = allocator->AllocArray<uint16_t>(utf16Length);
868 utf::ConvertMUtf8ToUtf16(mutf8Data, utf::Mutf8Size(mutf8Data), tmpBuffer);
869 hash = static_cast<uint32_t>(ComputeHashForData(tmpBuffer, utf16Length));
870 allocator->Delete(tmpBuffer);
871 }
872 return hash;
873 }
874
875 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)876 uint32_t String::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
877 {
878 return ComputeHashForData(utf16Data, length);
879 }
880
881 /* static */
DoReplace(String * src,uint16_t oldC,uint16_t newC,const LanguageContext & ctx,PandaVM * vm)882 String *String::DoReplace(String *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, PandaVM *vm)
883 {
884 ASSERT(src != nullptr);
885 auto length = static_cast<int32_t>(src->GetLength());
886 bool canBeCompressed = IsASCIICharacter(newC);
887 if (src->IsUtf16()) {
888 canBeCompressed = canBeCompressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, oldC);
889 } else {
890 canBeCompressed = canBeCompressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, oldC);
891 }
892
893 // allocator may trig gc and move src, need to hold it
894 auto thread = ManagedThread::GetCurrent();
895 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
896 VMHandle<String> srcHandle(thread, src);
897 auto string = AllocStringObject(length, canBeCompressed, ctx, vm);
898 if (string == nullptr) {
899 return nullptr;
900 }
901
902 // retrieve src after gc
903 src = srcHandle.GetPtr();
904 ASSERT(string->hashcode_ == 0);
905
906 // After replacing we should have a full barrier, so this writes should happen-before barrier
907 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
908 if (src->IsUtf16()) {
909 if (canBeCompressed) {
910 auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
911 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
912 std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
913 } else {
914 auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
915 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
916 std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
917 }
918 } else {
919 if (canBeCompressed) {
920 auto replace = [oldC, newC](uint16_t c) { return static_cast<uint8_t>((oldC != c) ? c : newC); };
921 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
922 std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
923 } else {
924 auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; };
925 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
926 std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
927 }
928 }
929 TSAN_ANNOTATE_IGNORE_WRITES_END();
930 // String is supposed to be a constant object, so all its data should be visible by all threads
931 arch::FullMemoryBarrier();
932 return string;
933 }
934
935 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16Length,const LanguageContext & ctx,PandaVM * vm)936 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx,
937 PandaVM *vm)
938 {
939 ASSERT(src != nullptr);
940 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
941 bool canBeCompressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16Length);
942
943 // allocator may trig gc and move src, need to hold it
944 auto thread = ManagedThread::GetCurrent();
945 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
946 VMHandle<String> srcHandle(thread, src);
947 auto string = AllocStringObject(utf16Length, canBeCompressed, ctx, vm);
948 if (string == nullptr) {
949 return nullptr;
950 }
951
952 // retrieve src after gc
953 src = srcHandle.GetPtr();
954 ASSERT(string->hashcode_ == 0);
955
956 // After copying we should have a full barrier, so this writes should happen-before barrier
957 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
958 if (src->IsUtf16()) {
959 if (canBeCompressed) {
960 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
961 CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16Length);
962 } else {
963 memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()),
964 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
965 src->GetDataUtf16() + start, utf16Length << 1UL);
966 }
967 } else {
968 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
969 memcpy_s(string->GetDataMUtf8(), string->GetLength(), src->GetDataMUtf8() + start, utf16Length);
970 }
971 TSAN_ANNOTATE_IGNORE_WRITES_END();
972 // String is supposed to be a constant object, so all its data should be visible by all threads
973 arch::FullMemoryBarrier();
974 return string;
975 }
976
977 /* static */
Concat(String * string1,String * string2,const LanguageContext & ctx,PandaVM * vm)978 String *String::Concat(String *string1, String *string2, const LanguageContext &ctx, PandaVM *vm)
979 {
980 ASSERT(string1 != nullptr);
981 ASSERT(string2 != nullptr);
982 // allocator may trig gc and move src, need to hold it
983 auto thread = ManagedThread::GetCurrent();
984 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
985 VMHandle<String> str1Handle(thread, string1);
986 VMHandle<String> str2Handle(thread, string2);
987
988 uint32_t length1 = string1->GetLength();
989 uint32_t length2 = string2->GetLength();
990 uint32_t newLength = length1 + length2;
991 bool compressed = compressedStringsEnabled_ && (!string1->IsUtf16() && !string2->IsUtf16());
992 auto newString = AllocStringObject(newLength, compressed, ctx, vm);
993 if (UNLIKELY(newString == nullptr)) {
994 return nullptr;
995 }
996
997 ASSERT(newString->hashcode_ == 0);
998
999 // retrieve strings after gc
1000 string1 = str1Handle.GetPtr();
1001 string2 = str2Handle.GetPtr();
1002
1003 // After copying we should have a full barrier, so this writes should happen-before barrier
1004 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1005 if (compressed) {
1006 Span<uint8_t> sp(newString->GetDataMUtf8(), newLength);
1007 memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataMUtf8(), length1);
1008 sp = sp.SubSpan(length1);
1009 memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataMUtf8(), length2);
1010 } else {
1011 Span<uint16_t> sp(newString->GetDataUtf16(), newLength);
1012 if (!string1->IsUtf16()) {
1013 for (uint32_t i = 0; i < length1; ++i) {
1014 sp[i] = string1->At<false>(i);
1015 }
1016 } else {
1017 memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataUtf16(), length1 << 1U);
1018 }
1019 sp = sp.SubSpan(length1);
1020 if (!string2->IsUtf16()) {
1021 for (uint32_t i = 0; i < length2; ++i) {
1022 sp[i] = string2->At<false>(i);
1023 }
1024 } else {
1025 memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataUtf16(), length2 << 1U);
1026 }
1027 }
1028 TSAN_ANNOTATE_IGNORE_WRITES_END();
1029 // String is supposed to be a constant object, so all its data should be visible by all threads
1030 arch::FullMemoryBarrier();
1031
1032 return newString;
1033 }
1034
1035 /* static */
AllocStringObject(size_t length,bool compressed,const LanguageContext & ctx,PandaVM * vm,bool movable,bool pinned)1036 String *String::AllocStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, bool movable,
1037 bool pinned)
1038 {
1039 ASSERT(vm != nullptr);
1040 auto *thread = ManagedThread::GetCurrent();
1041 auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
1042 size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
1043 auto string =
1044 movable
1045 ? reinterpret_cast<String *>(
1046 vm->GetHeapManager()->AllocateObject(stringClass, size, DEFAULT_ALIGNMENT, thread,
1047 mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT, pinned))
1048 : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(
1049 // CC-OFFNXT(G.FMT.06) project code style
1050 stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT));
1051 if (string != nullptr) {
1052 // After setting length we should have a full barrier, so this write should happens-before barrier
1053 TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
1054 string->SetLength(length, compressed);
1055 string->SetHashcode(0);
1056 TSAN_ANNOTATE_IGNORE_WRITES_END();
1057 // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
1058 // legth before it's set
1059 arch::FullMemoryBarrier();
1060 }
1061 return string;
1062 }
1063
1064 } // namespace ark::coretypes
1065