• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19 
20 #include "libpandabase/utils/hash.h"
21 #include "libpandabase/utils/span.h"
22 #include "runtime/arch/memory_helpers.h"
23 #include "runtime/include/coretypes/array.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/handle_base-inl.h"
27 #include "runtime/include/panda_vm.h"
28 
29 namespace panda::coretypes {
30 
31 bool String::compressed_strings_enabled = true;
32 
33 /* static */
CreateFromString(String * str,const LanguageContext & ctx,PandaVM * vm)34 String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaVM *vm)
35 {
36     ASSERT(str != nullptr);
37     // allocator may trig gc and move str, need to hold it
38     auto thread = ManagedThread::GetCurrent();
39     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
40     VMHandle<String> str_handle(thread, str);
41     auto string = AllocStringObject(str_handle->GetLength(), !str_handle->IsUtf16(), ctx, vm);
42     if (string == nullptr) {
43         return nullptr;
44     }
45 
46     // retrive str after gc
47     str = str_handle.GetPtr();
48     string->length_ = str->length_;
49     string->hashcode_ = str->hashcode_;
50 
51     uint32_t length = str->GetLength();
52     // After memcpy we should have a full barrier, so this writes should happen-before barrier
53     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
54     if (str->IsUtf16()) {
55         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), str->GetDataUtf16(),
56                  ComputeDataSizeUtf16(length));
57     } else {
58         memcpy_s(string->GetDataMUtf8(), string->GetLength(), str->GetDataMUtf8(), length);
59     }
60     TSAN_ANNOTATE_IGNORE_WRITES_END();
61     // String is supposed to be a constant object, so all its data should be visible by all threads
62     arch::FullMemoryBarrier();
63 
64     return string;
65 }
66 
67 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,size_t mutf8_length,uint32_t utf16_length,bool can_be_compressed,const LanguageContext & ctx,PandaVM * vm,bool movable)68 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, size_t mutf8_length, uint32_t utf16_length,
69                                 bool can_be_compressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
70 {
71     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm, movable);
72     if (string == nullptr) {
73         return nullptr;
74     }
75 
76     ASSERT(string->hashcode_ == 0);
77     // After copying we should have a full barrier, so this writes should happen-before barrier
78     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
79     if (can_be_compressed) {
80         memcpy_s(string->GetDataMUtf8(), string->GetLength(), mutf8_data, utf16_length);
81     } else {
82         utf::ConvertMUtf8ToUtf16(mutf8_data, mutf8_length, string->GetDataUtf16());
83     }
84     TSAN_ANNOTATE_IGNORE_WRITES_END();
85     // String is supposed to be a constant object, so all its data should be visible by all threads
86     arch::FullMemoryBarrier();
87     return string;
88 }
89 
90 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,uint32_t utf16_length,const LanguageContext & ctx,PandaVM * vm,bool movable)91 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, uint32_t utf16_length, const LanguageContext &ctx,
92                                 PandaVM *vm, bool movable)
93 {
94     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
95     return CreateFromMUtf8(mutf8_data, utf::Mutf8Size(mutf8_data), utf16_length, can_be_compressed, ctx, vm, movable);
96 }
97 
98 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed,const LanguageContext & ctx,PandaVM * vm,bool movable)99 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, uint32_t utf16_length, bool can_be_compressed,
100                                 const LanguageContext &ctx, PandaVM *vm, bool movable)
101 {
102     return CreateFromMUtf8(mutf8_data, utf::Mutf8Size(mutf8_data), utf16_length, can_be_compressed, ctx, vm, movable);
103 }
104 
105 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,const LanguageContext & ctx,PandaVM * vm,bool movable)106 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, const LanguageContext &ctx, PandaVM *vm, bool movable)
107 {
108     size_t mutf8_length = utf::Mutf8Size(mutf8_data);
109     size_t utf16_length = utf::MUtf8ToUtf16Size(mutf8_data, mutf8_length);
110     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
111     return CreateFromMUtf8(mutf8_data, mutf8_length, utf16_length, can_be_compressed, ctx, vm, movable);
112 }
113 
114 /* static */
CreateFromUtf16(const uint16_t * utf16_data,uint32_t utf16_length,const LanguageContext & ctx,PandaVM * vm,bool movable)115 String *String::CreateFromUtf16(const uint16_t *utf16_data, uint32_t utf16_length, const LanguageContext &ctx,
116                                 PandaVM *vm, bool movable)
117 {
118     bool can_be_compressed = CanBeCompressed(utf16_data, utf16_length);
119     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm, movable);
120     if (string == nullptr) {
121         return nullptr;
122     }
123 
124     ASSERT(string->hashcode_ == 0);
125     // After copying we should have a full barrier, so this writes should happen-before barrier
126     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
127     if (can_be_compressed) {
128         CopyUtf16AsMUtf8(utf16_data, string->GetDataMUtf8(), utf16_length);
129     } else {
130         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), utf16_data, utf16_length << 1UL);
131     }
132     TSAN_ANNOTATE_IGNORE_WRITES_END();
133     // String is supposed to be a constant object, so all its data should be visible by all threads
134     arch::FullMemoryBarrier();
135     return string;
136 }
137 
138 /* static */
CreateEmptyString(const LanguageContext & ctx,PandaVM * vm)139 String *String::CreateEmptyString(const LanguageContext &ctx, PandaVM *vm)
140 {
141     uint16_t data = 0;
142     return CreateFromUtf16(&data, 0, ctx, vm);
143 }
144 
145 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16_from,uint8_t * mutf8_to,uint32_t utf16_length)146 void String::CopyUtf16AsMUtf8(const uint16_t *utf16_from, uint8_t *mutf8_to, uint32_t utf16_length)
147 {
148     Span<const uint16_t> from(utf16_from, utf16_length);
149     Span<uint8_t> to(mutf8_to, utf16_length);
150     for (uint32_t i = 0; i < utf16_length; i++) {
151         to[i] = from[i];
152     }
153 }
154 
155 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,const LanguageContext & ctx,PandaVM * vm)156 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, const LanguageContext &ctx,
157                                          PandaVM *vm)
158 {
159     ASSERT(chararray != nullptr);
160     // allocator may trig gc and move array, need to hold it
161     auto thread = ManagedThread::GetCurrent();
162     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
163     VMHandle<Array> array_handle(thread, chararray);
164 
165     // NOLINTNEXTLINE(readability-identifier-naming)
166     const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
167     bool can_be_compressed = CanBeCompressed(src, length);
168     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
169     if (string == nullptr) {
170         return nullptr;
171     }
172 
173     // retrieve src since gc may move it
174     src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(array_handle->GetData()) + (offset << 1UL));
175     ASSERT(string->hashcode_ == 0);
176     // After copying we should have a full barrier, so this writes should happen-before barrier
177     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
178     if (can_be_compressed) {
179         CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
180     } else {
181         memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), src, length << 1UL);
182     }
183     TSAN_ANNOTATE_IGNORE_WRITES_END();
184     // String is supposed to be a constant object, so all its data should be visible by all threads
185     arch::FullMemoryBarrier();
186     return string;
187 }
188 
189 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t high_byte,Array * bytearray,const LanguageContext & ctx,PandaVM * vm)190 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t high_byte, Array *bytearray,
191                                          const LanguageContext &ctx, PandaVM *vm)
192 {
193     ASSERT(length != 0);
194     ASSERT(bytearray != nullptr);
195     // allocator may trig gc and move array, need to hold it
196     auto thread = ManagedThread::GetCurrent();
197     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
198     VMHandle<Array> array_handle(thread, bytearray);
199 
200     constexpr size_t BYTE_MASK = 0xFF;
201 
202     // NOLINTNEXTLINE(readability-identifier-naming)
203     const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
204     high_byte &= BYTE_MASK;
205     bool can_be_compressed = CanBeCompressedMUtf8(src, length) && (high_byte == 0);
206     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
207     if (string == nullptr) {
208         return nullptr;
209     }
210 
211     // retrieve src since gc may move it
212     src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(array_handle->GetData()) + offset);
213     ASSERT(string->hashcode_ == 0);
214     // After copying we should have a full barrier, so this writes should happen-before barrier
215     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
216     if (can_be_compressed) {
217         Span<const uint8_t> from(src, length);
218         Span<uint8_t> to(string->GetDataMUtf8(), length);
219         for (uint32_t i = 0; i < length; ++i) {
220             to[i] = (from[i] & BYTE_MASK);
221         }
222     } else {
223         Span<const uint8_t> from(src, length);
224         Span<uint16_t> to(string->GetDataUtf16(), length);
225         for (uint32_t i = 0; i < length; ++i) {
226             to[i] = (high_byte << 8U) + (from[i] & BYTE_MASK);
227         }
228     }
229     TSAN_ANNOTATE_IGNORE_WRITES_END();
230 
231     // String is supposed to be a constant object, so all its data should be visible by all threads
232     arch::FullMemoryBarrier();
233     return string;
234 }
235 
236 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhs_sp,Span<T2> & rhs_sp,int32_t count)237 int32_t CompareStringSpan(Span<T1> &lhs_sp, Span<T2> &rhs_sp, int32_t count)
238 {
239     for (int32_t i = 0; i < count; ++i) {
240         int32_t char_diff = static_cast<int32_t>(lhs_sp[i]) - static_cast<int32_t>(rhs_sp[i]);
241         if (char_diff != 0) {
242             return char_diff;
243         }
244     }
245     return 0;
246 }
247 
248 template <typename T>
CompareBytesBlock(T * lstr_pt,T * rstr_pt,int32_t min_count)249 int32_t CompareBytesBlock(T *lstr_pt, T *rstr_pt, int32_t min_count)
250 {
251     constexpr int32_t bytes_cnt = sizeof(size_t);
252     static_assert(bytes_cnt >= sizeof(T));
253     static_assert(bytes_cnt % sizeof(T) == 0);
254     int32_t total_bytes = min_count * sizeof(T);
255     auto lhs_block = reinterpret_cast<size_t *>(lstr_pt);
256     auto rhs_block = reinterpret_cast<size_t *>(rstr_pt);
257     int32_t cur_byte_pos = 0;
258     while (cur_byte_pos + bytes_cnt <= total_bytes) {
259         if (*lhs_block == *rhs_block) {
260             cur_byte_pos += bytes_cnt;
261             lhs_block++;
262             rhs_block++;
263         } else {
264             break;
265         }
266     }
267     int32_t cur_element_pos = cur_byte_pos / sizeof(T);
268     for (int32_t i = cur_element_pos; i < min_count; ++i) {
269         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
270         int32_t char_diff = static_cast<int32_t>(lstr_pt[i]) - static_cast<int32_t>(rstr_pt[i]);
271         if (char_diff != 0) {
272             return char_diff;
273         }
274     }
275 
276     return 0;
277 }
278 
Compare(String * rstr)279 int32_t String::Compare(String *rstr)
280 {
281     String *lstr = this;
282     if (lstr == rstr) {
283         return 0;
284     }
285     ASSERT(lstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
286     ASSERT(rstr->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
287     auto lstr_leng = static_cast<int32_t>(lstr->GetLength());
288     auto rstr_leng = static_cast<int32_t>(rstr->GetLength());
289     int32_t leng_ret = lstr_leng - rstr_leng;
290     int32_t min_count = (leng_ret < 0) ? lstr_leng : rstr_leng;
291     bool lstr_isUtf16 = lstr->IsUtf16();
292     bool rstr_isUtf16 = rstr->IsUtf16();
293     if (!lstr_isUtf16 && !rstr_isUtf16) {
294         int32_t char_diff = CompareBytesBlock(lstr->GetDataMUtf8(), rstr->GetDataMUtf8(), min_count);
295         if (char_diff != 0) {
296             return char_diff;
297         }
298     } else if (!lstr_isUtf16) {
299         Span<uint8_t> lhs_sp(lstr->GetDataMUtf8(), lstr_leng);
300         Span<uint16_t> rhs_sp(rstr->GetDataUtf16(), rstr_leng);
301         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
302         if (char_diff != 0) {
303             return char_diff;
304         }
305     } else if (!rstr_isUtf16) {
306         Span<uint16_t> lhs_sp(lstr->GetDataUtf16(), lstr_leng);
307         Span<uint8_t> rhs_sp(rstr->GetDataMUtf8(), rstr_leng);
308         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
309         if (char_diff != 0) {
310             return char_diff;
311         }
312     } else {
313         int32_t char_diff = CompareBytesBlock(lstr->GetDataUtf16(), rstr->GetDataUtf16(), min_count);
314         if (char_diff != 0) {
315             return char_diff;
316         }
317     }
318     return leng_ret;
319 }
320 
321 /* static */
322 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhs_sp,Span<const T2> & rhs_sp,int32_t pos,int32_t max)323 int32_t String::IndexOf(Span<const T1> &lhs_sp, Span<const T2> &rhs_sp, int32_t pos, int32_t max)
324 {
325     auto first = static_cast<int32_t>(rhs_sp[0]);
326     for (int32_t i = pos; i <= max; i++) {
327         if (static_cast<int32_t>(lhs_sp[i]) != first) {
328             i++;
329             while (i <= max && static_cast<int32_t>(lhs_sp[i]) != first) {
330                 i++;
331             }
332         }
333         /* Found first character, now look at the rest of rhs_sp */
334         if (i <= max) {
335             int j = i + 1;
336             int end = j + rhs_sp.size() - 1;
337 
338             for (int k = 1; j < end && static_cast<int32_t>(lhs_sp[j]) == static_cast<int32_t>(rhs_sp[k]); j++, k++) {
339             }
340             if (j == end) {
341                 /* Found whole string. */
342                 return i;
343             }
344         }
345     }
346     return -1;
347 }
348 
IndexOf(String * rhs,int32_t pos)349 int32_t String::IndexOf(String *rhs, int32_t pos)
350 {
351     if (rhs == nullptr) {
352         return -1;
353     }
354     String *lhs = this;
355     ASSERT(lhs->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
356     ASSERT(rhs->GetLength() <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
357     auto lhs_count = static_cast<int32_t>(lhs->GetLength());
358     auto rhs_count = static_cast<int32_t>(rhs->GetLength());
359 
360     if (rhs_count == 0) {
361         return pos;
362     }
363 
364     if (pos >= lhs_count) {
365         return -1;
366     }
367 
368     if (pos < 0) {
369         pos = 0;
370     }
371 
372     int32_t max = lhs_count - rhs_count;
373     if (rhs->IsMUtf8() && lhs->IsMUtf8()) {
374         Span<const uint8_t> lhs_sp(lhs->GetDataMUtf8(), lhs_count);
375         Span<const uint8_t> rhs_sp(rhs->GetDataMUtf8(), rhs_count);
376         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
377     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
378         Span<const uint16_t> lhs_sp(lhs->GetDataUtf16(), lhs_count);
379         Span<const uint16_t> rhs_sp(rhs->GetDataUtf16(), rhs_count);
380         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
381     } else if (rhs->IsUtf16()) {
382         Span<const uint8_t> lhs_sp(lhs->GetDataMUtf8(), lhs_count);
383         Span<const uint16_t> rhs_sp(rhs->GetDataUtf16(), rhs_count);
384         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
385     } else {  // NOLINT(readability-else-after-return)
386         Span<const uint16_t> lhs_sp(lhs->GetDataUtf16(), lhs_count);
387         Span<const uint8_t> rhs_sp(rhs->GetDataMUtf8(), rhs_count);
388         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
389     }
390 
391     return -1;
392 }
393 
394 /* static */
CanBeCompressed(const uint16_t * utf16_data,uint32_t utf16_length)395 bool String::CanBeCompressed(const uint16_t *utf16_data, uint32_t utf16_length)
396 {
397     if (!compressed_strings_enabled) {
398         return false;
399     }
400     bool is_compressed = true;
401     Span<const uint16_t> data(utf16_data, utf16_length);
402     for (uint32_t i = 0; i < utf16_length; i++) {
403         if (!IsASCIICharacter(data[i])) {
404             is_compressed = false;
405             break;
406         }
407     }
408     return is_compressed;
409 }
410 
411 // static
CanBeCompressedMUtf8(const uint8_t * mutf8_data,uint32_t mutf8_length)412 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data, uint32_t mutf8_length)
413 {
414     if (!compressed_strings_enabled) {
415         return false;
416     }
417     bool is_compressed = true;
418     Span<const uint8_t> data(mutf8_data, mutf8_length);
419     for (uint32_t i = 0; i < mutf8_length; i++) {
420         if (!IsASCIICharacter(data[i])) {
421             is_compressed = false;
422             break;
423         }
424     }
425     return is_compressed;
426 }
427 
428 // static
CanBeCompressedMUtf8(const uint8_t * mutf8_data)429 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data)
430 {
431     return compressed_strings_enabled ? utf::IsMUtf8OnlySingleBytes(mutf8_data) : false;
432 }
433 
434 /* static */
CanBeCompressedUtf16(const uint16_t * utf16_data,uint32_t utf16_length,uint16_t non)435 bool String::CanBeCompressedUtf16(const uint16_t *utf16_data, uint32_t utf16_length, uint16_t non)
436 {
437     if (!compressed_strings_enabled) {
438         return false;
439     }
440     bool is_compressed = true;
441     Span<const uint16_t> data(utf16_data, utf16_length);
442     for (uint32_t i = 0; i < utf16_length; i++) {
443         if (!IsASCIICharacter(data[i]) && data[i] != non) {
444             is_compressed = false;
445             break;
446         }
447     }
448     return is_compressed;
449 }
450 
451 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8_data,uint32_t mutf8_length,uint16_t non)452 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data, uint32_t mutf8_length, uint16_t non)
453 {
454     if (!compressed_strings_enabled) {
455         return false;
456     }
457     bool is_compressed = true;
458     Span<const uint8_t> data(mutf8_data, mutf8_length);
459     for (uint32_t i = 0; i < mutf8_length; i++) {
460         if (!IsASCIICharacter(data[i]) && data[i] != non) {
461             is_compressed = false;
462             break;
463         }
464     }
465     return is_compressed;
466 }
467 
468 /* static */
StringsAreEqual(String * str1,String * str2)469 bool String::StringsAreEqual(String *str1, String *str2)
470 {
471     ASSERT(str1 != nullptr);
472     ASSERT(str2 != nullptr);
473 
474     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
475         return false;
476     }
477 
478     if (str1->IsUtf16()) {
479         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
480         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
481         return String::StringsAreEquals(data1, data2);
482     } else {  // NOLINT(readability-else-after-return)
483         Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
484         Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
485         return String::StringsAreEquals(data1, data2);
486     }
487 }
488 
489 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8_data,uint32_t utf16_length)490 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8_data, uint32_t utf16_length)
491 {
492     if (str1->GetLength() != utf16_length) {
493         return false;
494     }
495     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
496     return StringsAreEqualMUtf8(str1, mutf8_data, utf16_length, can_be_compressed);
497 }
498 
499 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed)500 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8_data, uint32_t utf16_length,
501                                   bool can_be_compressed)
502 {
503     bool result = true;
504     if (str1->GetLength() != utf16_length) {
505         result = false;
506     } else {
507         bool str1_can_be_compressed = !str1->IsUtf16();
508         bool data2_can_be_compressed = can_be_compressed;
509         if (str1_can_be_compressed != data2_can_be_compressed) {
510             return false;
511         }
512 
513         ASSERT(str1_can_be_compressed == data2_can_be_compressed);
514         if (str1_can_be_compressed) {
515             Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
516             Span<const uint8_t> data2(mutf8_data, utf16_length);
517             result = String::StringsAreEquals(data1, data2);
518         } else {
519             result = IsMutf8EqualsUtf16(mutf8_data, str1->GetDataUtf16(), str1->GetLength());
520         }
521     }
522     return result;
523 }
524 
525 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16_data,uint32_t utf16_data_length)526 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16_data, uint32_t utf16_data_length)
527 {
528     bool result = true;
529     if (str1->GetLength() != utf16_data_length) {
530         result = false;
531     } else if (!str1->IsUtf16()) {
532         result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16_data, utf16_data_length);
533     } else {
534         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
535         Span<const uint16_t> data2(utf16_data, utf16_data_length);
536         result = String::StringsAreEquals(data1, data2);
537     }
538     return result;
539 }
540 
541 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8_data,uint32_t utf8_data_length,const uint16_t * utf16_data,uint32_t utf16_data_length)542 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8_data, uint32_t utf8_data_length, const uint16_t *utf16_data,
543                                 uint32_t utf16_data_length)
544 {
545     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
546     auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_data_length);
547     [[maybe_unused]] auto converted_string_size =
548         utf::ConvertRegionMUtf8ToUtf16(utf8_data, tmp_buffer, utf8_data_length, utf16_data_length, 0);
549     ASSERT(converted_string_size == utf16_data_length);
550 
551     Span<const uint16_t> data1(tmp_buffer, utf16_data_length);
552     Span<const uint16_t> data2(utf16_data, utf16_data_length);
553     bool result = String::StringsAreEquals(data1, data2);
554     allocator->Delete(tmp_buffer);
555     return result;
556 }
557 
558 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8_data,const uint16_t * utf16_data,uint32_t utf16_data_length)559 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8_data, const uint16_t *utf16_data, uint32_t utf16_data_length)
560 {
561     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
562     auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_data_length);
563     utf::ConvertMUtf8ToUtf16(utf8_data, utf::Mutf8Size(utf8_data), tmp_buffer);
564 
565     Span<const uint16_t> data1(tmp_buffer, utf16_data_length);
566     Span<const uint16_t> data2(utf16_data, utf16_data_length);
567     bool result = String::StringsAreEquals(data1, data2);
568     allocator->Delete(tmp_buffer);
569     return result;
570 }
571 
572 /* static */
573 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)574 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
575 {
576     return std::memcmp(str1.Data(), str2.Data(), str1.SizeBytes()) == 0;
577 }
578 
ToCharArray(const LanguageContext & ctx)579 Array *String::ToCharArray(const LanguageContext &ctx)
580 {
581     // allocator may trig gc and move 'this', need to hold it
582     auto thread = ManagedThread::GetCurrent();
583     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
584     VMHandle<String> str(thread, this);
585     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
586     Array *array = Array::Create(klass, GetLength());
587     if (array == nullptr) {
588         return nullptr;
589     }
590 
591     if (str->IsUtf16()) {
592         Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
593         for (size_t i = 0; i < sp.size(); i++) {
594             array->Set<uint16_t>(i, sp[i]);
595         }
596     } else {
597         Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
598         for (size_t i = 0; i < sp.size(); i++) {
599             array->Set<uint16_t>(i, sp[i]);
600         }
601     }
602 
603     return array;
604 }
605 
606 template <class T>
ComputeHashForData(const T * data,size_t size)607 static int32_t ComputeHashForData(const T *data, size_t size)
608 {
609     uint32_t hash = 0;
610 #if defined(__GNUC__)
611 #pragma GCC diagnostic push
612 #pragma GCC diagnostic ignored "-Wignored-attributes"
613     Span<const T> sp(data, size);
614 #pragma GCC diagnostic pop
615 #endif
616     for (auto c : sp) {
617         constexpr size_t SHIFT = 5;
618         hash = (hash << SHIFT) - hash + c;
619     }
620     return static_cast<int32_t>(hash);
621 }
622 
ComputeHashForMutf8(const uint8_t * mutf8_data)623 static int32_t ComputeHashForMutf8(const uint8_t *mutf8_data)
624 {
625     uint32_t hash = 0;
626     while (*mutf8_data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
627         constexpr size_t SHIFT = 5;
628         hash = (hash << SHIFT) - hash + *mutf8_data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
629     }
630     return static_cast<int32_t>(hash);
631 }
632 
ComputeHashcode()633 uint32_t String::ComputeHashcode()
634 {
635     uint32_t hash;
636     if (compressed_strings_enabled) {
637         if (!IsUtf16()) {
638             hash = static_cast<uint32_t>(ComputeHashForData(GetDataMUtf8(), GetLength()));
639         } else {
640             hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
641         }
642     } else {
643         ASSERT(static_cast<size_t>(GetLength()) > (std::numeric_limits<size_t>::max() >> 1U));
644         hash = static_cast<uint32_t>(ComputeHashForData(GetDataUtf16(), GetLength()));
645     }
646     return hash;
647 }
648 
649 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8_data,uint32_t utf16_length)650 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8_data, uint32_t utf16_length)
651 {
652     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
653     return ComputeHashcodeMutf8(mutf8_data, utf16_length, can_be_compressed);
654 }
655 
656 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed)657 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8_data, uint32_t utf16_length, bool can_be_compressed)
658 {
659     uint32_t hash;
660     if (can_be_compressed) {
661         hash = static_cast<uint32_t>(ComputeHashForMutf8(mutf8_data));
662     } else {
663         // TODO(alovkov): optimize it without allocation a temporary buffer
664         auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
665         auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_length);
666         utf::ConvertMUtf8ToUtf16(mutf8_data, utf::Mutf8Size(mutf8_data), tmp_buffer);
667         hash = static_cast<uint32_t>(ComputeHashForData(tmp_buffer, utf16_length));
668         allocator->Delete(tmp_buffer);
669     }
670     return hash;
671 }
672 
673 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16_data,uint32_t length)674 uint32_t String::ComputeHashcodeUtf16(const uint16_t *utf16_data, uint32_t length)
675 {
676     return ComputeHashForData(utf16_data, length);
677 }
678 
679 /* static */
DoReplace(String * src,uint16_t old_c,uint16_t new_c,const LanguageContext & ctx,PandaVM * vm)680 String *String::DoReplace(String *src, uint16_t old_c, uint16_t new_c, const LanguageContext &ctx, PandaVM *vm)
681 {
682     ASSERT(src != nullptr);
683     auto length = static_cast<int32_t>(src->GetLength());
684     bool can_be_compressed = IsASCIICharacter(new_c);
685     if (src->IsUtf16()) {
686         can_be_compressed = can_be_compressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, old_c);
687     } else {
688         can_be_compressed = can_be_compressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, old_c);
689     }
690 
691     // allocator may trig gc and move src, need to hold it
692     auto thread = ManagedThread::GetCurrent();
693     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
694     VMHandle<String> src_handle(thread, src);
695     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
696     if (string == nullptr) {
697         return nullptr;
698     }
699 
700     // retrieve src after gc
701     src = src_handle.GetPtr();
702     ASSERT(string->hashcode_ == 0);
703 
704     // After replacing we should have a full barrier, so this writes should happen-before barrier
705     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
706     if (src->IsUtf16()) {
707         if (can_be_compressed) {
708             auto replace = [old_c, new_c](uint16_t c) { return static_cast<uint8_t>((old_c != c) ? c : new_c); };
709             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
710             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
711         } else {
712             auto replace = [old_c, new_c](uint16_t c) { return (old_c != c) ? c : new_c; };
713             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
714             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
715         }
716     } else {
717         if (can_be_compressed) {
718             auto replace = [old_c, new_c](uint16_t c) { return static_cast<uint8_t>((old_c != c) ? c : new_c); };
719             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
720             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
721         } else {
722             auto replace = [old_c, new_c](uint16_t c) { return (old_c != c) ? c : new_c; };
723             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
724             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
725         }
726     }
727     TSAN_ANNOTATE_IGNORE_WRITES_END();
728     // String is supposed to be a constant object, so all its data should be visible by all threads
729     arch::FullMemoryBarrier();
730     return string;
731 }
732 
733 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16_length,const LanguageContext & ctx,PandaVM * vm)734 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16_length, const LanguageContext &ctx,
735                               PandaVM *vm)
736 {
737     ASSERT(src != nullptr);
738     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
739     bool can_be_compressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16_length);
740 
741     // allocator may trig gc and move src, need to hold it
742     auto thread = ManagedThread::GetCurrent();
743     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
744     VMHandle<String> src_handle(thread, src);
745     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm);
746     if (string == nullptr) {
747         return nullptr;
748     }
749 
750     // retrieve src after gc
751     src = src_handle.GetPtr();
752     ASSERT(string->hashcode_ == 0);
753 
754     // After copying we should have a full barrier, so this writes should happen-before barrier
755     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
756     if (src->IsUtf16()) {
757         if (can_be_compressed) {
758             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
759             CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16_length);
760         } else {
761             memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()),
762                      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
763                      src->GetDataUtf16() + start, utf16_length << 1UL);
764         }
765     } else {
766         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
767         memcpy_s(string->GetDataMUtf8(), string->GetLength(), src->GetDataMUtf8() + start, utf16_length);
768     }
769     TSAN_ANNOTATE_IGNORE_WRITES_END();
770     // String is supposed to be a constant object, so all its data should be visible by all threads
771     arch::FullMemoryBarrier();
772     return string;
773 }
774 
775 /* static */
Concat(String * string1,String * string2,const LanguageContext & ctx,PandaVM * vm)776 String *String::Concat(String *string1, String *string2, const LanguageContext &ctx, PandaVM *vm)
777 {
778     ASSERT(string1 != nullptr);
779     ASSERT(string2 != nullptr);
780     // allocator may trig gc and move src, need to hold it
781     auto thread = ManagedThread::GetCurrent();
782     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
783     VMHandle<String> str1_handle(thread, string1);
784     VMHandle<String> str2_handle(thread, string2);
785 
786     uint32_t length1 = string1->GetLength();
787     uint32_t length2 = string2->GetLength();
788     uint32_t new_length = length1 + length2;
789     bool compressed = compressed_strings_enabled && (!string1->IsUtf16() && !string2->IsUtf16());
790     auto new_string = AllocStringObject(new_length, compressed, ctx, vm);
791     if (UNLIKELY(new_string == nullptr)) {
792         return nullptr;
793     }
794 
795     ASSERT(new_string->hashcode_ == 0);
796 
797     // retrieve strings after gc
798     string1 = str1_handle.GetPtr();
799     string2 = str2_handle.GetPtr();
800 
801     // After copying we should have a full barrier, so this writes should happen-before barrier
802     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
803     if (compressed) {
804         Span<uint8_t> sp(new_string->GetDataMUtf8(), new_length);
805         memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataMUtf8(), length1);
806         sp = sp.SubSpan(length1);
807         memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataMUtf8(), length2);
808     } else {
809         Span<uint16_t> sp(new_string->GetDataUtf16(), new_length);
810         if (!string1->IsUtf16()) {
811             for (uint32_t i = 0; i < length1; ++i) {
812                 sp[i] = string1->At<false>(i);
813             }
814         } else {
815             memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataUtf16(), length1 << 1U);
816         }
817         sp = sp.SubSpan(length1);
818         if (!string2->IsUtf16()) {
819             for (uint32_t i = 0; i < length2; ++i) {
820                 sp[i] = string2->At<false>(i);
821             }
822         } else {
823             memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataUtf16(), length2 << 1U);
824         }
825     }
826     TSAN_ANNOTATE_IGNORE_WRITES_END();
827     // String is supposed to be a constant object, so all its data should be visible by all threads
828     arch::FullMemoryBarrier();
829 
830     return new_string;
831 }
832 
833 /* static */
AllocStringObject(size_t length,bool compressed,const LanguageContext & ctx,PandaVM * vm,bool movable)834 String *String::AllocStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, bool movable)
835 {
836     ASSERT(vm != nullptr);
837     auto *string_class = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
838     size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
839     auto string = movable
840                       ? reinterpret_cast<String *>(vm->GetHeapManager()->AllocateObject(string_class, size))
841                       : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(string_class, size));
842     if (string != nullptr) {
843         // After setting length we should have a full barrier, so this write should happens-before barrier
844         TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
845         string->SetLength(length, compressed);
846         TSAN_ANNOTATE_IGNORE_WRITES_END();
847         // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
848         // legth before it's set
849         arch::FullMemoryBarrier();
850     }
851     return string;
852 }
853 
854 }  // namespace panda::coretypes
855