• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <cstddef>
17 #include <cstring>
18 #include <limits>
19 
20 #include "libpandabase/utils/hash.h"
21 #include "libpandabase/utils/span.h"
22 #include "runtime/arch/memory_helpers.h"
23 #include "runtime/include/coretypes/array.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/handle_base-inl.h"
27 #include "runtime/include/panda_vm.h"
28 
29 namespace panda::coretypes {
30 
31 bool String::compressed_strings_enabled = true;
32 
33 /* static */
CreateFromString(String * str,LanguageContext ctx,PandaVM * vm)34 String *String::CreateFromString(String *str, LanguageContext ctx, PandaVM *vm)
35 {
36     // Allocator may trig gc and move str, need to hold it
37     auto thread = ManagedThread::GetCurrent();
38     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
39     VMHandle<String> str_handle(thread, str);
40     auto string = AllocStringObject(str_handle->GetLength(), !str_handle->IsUtf16(), ctx, vm);
41     if (string == nullptr) {
42         return nullptr;
43     }
44 
45     // Retrieve str after gc
46     str = str_handle.GetPtr();
47     string->length_ = str->length_;
48     string->hashcode_ = str->hashcode_;
49 
50     uint32_t length = str->GetLength();
51     // After memcpy we should have a full barrier, so this writes should happen-before barrier
52     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
53     if (str->IsUtf16()) {
54         if (memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), str->GetDataUtf16(),
55                      ComputeDataSizeUtf16(length)) != EOK) {
56             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
57             UNREACHABLE();
58         }
59     } else {
60         if (memcpy_s(string->GetDataMUtf8(), string->GetLength(), str->GetDataMUtf8(), length) != EOK) {
61             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
62             UNREACHABLE();
63         }
64     }
65     TSAN_ANNOTATE_IGNORE_WRITES_END();
66     // String is supposed to be a constant object, so all its data should be visible to all threads
67     arch::FullMemoryBarrier();
68 
69     return string;
70 }
71 
72 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,size_t mutf8_length,uint32_t utf16_length,bool can_be_compressed,LanguageContext ctx,PandaVM * vm,bool movable)73 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, size_t mutf8_length, uint32_t utf16_length,
74                                 bool can_be_compressed, LanguageContext ctx, PandaVM *vm, bool movable)
75 {
76     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm, movable);
77     if (string == nullptr) {
78         return nullptr;
79     }
80 
81     ASSERT(string->hashcode_ == 0);
82     // After copying we should have a full barrier, so this writes should happen-before barrier
83     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
84     if (can_be_compressed) {
85         if (utf16_length != 0 &&
86             memcpy_s(string->GetDataMUtf8(), string->GetLength(), mutf8_data, utf16_length) != EOK) {
87             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
88             UNREACHABLE();
89         }
90     } else {
91         utf::ConvertMUtf8ToUtf16(mutf8_data, mutf8_length, string->GetDataUtf16());
92     }
93     TSAN_ANNOTATE_IGNORE_WRITES_END();
94     // String is supposed to be a constant object, so all its data should be visible to all threads
95     arch::FullMemoryBarrier();
96     return string;
97 }
98 
99 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,uint32_t utf16_length,LanguageContext ctx,PandaVM * vm,bool movable)100 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, uint32_t utf16_length, LanguageContext ctx, PandaVM *vm,
101                                 bool movable)
102 {
103     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
104     return CreateFromMUtf8(mutf8_data, utf::Mutf8Size(mutf8_data), utf16_length, can_be_compressed, ctx, vm, movable);
105 }
106 
107 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed,LanguageContext ctx,PandaVM * vm,bool movable)108 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, uint32_t utf16_length, bool can_be_compressed,
109                                 LanguageContext ctx, PandaVM *vm, bool movable)
110 {
111     return CreateFromMUtf8(mutf8_data, utf::Mutf8Size(mutf8_data), utf16_length, can_be_compressed, ctx, vm, movable);
112 }
113 
114 /* static */
CreateFromMUtf8(const uint8_t * mutf8_data,LanguageContext ctx,PandaVM * vm,bool movable)115 String *String::CreateFromMUtf8(const uint8_t *mutf8_data, LanguageContext ctx, PandaVM *vm, bool movable)
116 {
117     size_t mutf8_length = utf::Mutf8Size(mutf8_data);
118     size_t utf16_length = utf::MUtf8ToUtf16Size(mutf8_data, mutf8_length);
119     bool can_be_compressed = CanBeCompressedMUtf8(mutf8_data);
120     return CreateFromMUtf8(mutf8_data, mutf8_length, utf16_length, can_be_compressed, ctx, vm, movable);
121 }
122 
123 /* static */
CreateFromUtf16(const uint16_t * utf16_data,uint32_t utf16_length,LanguageContext ctx,PandaVM * vm,bool movable)124 String *String::CreateFromUtf16(const uint16_t *utf16_data, uint32_t utf16_length, LanguageContext ctx, PandaVM *vm,
125                                 bool movable)
126 {
127     bool can_be_compressed = CanBeCompressed(utf16_data, utf16_length);
128     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm, movable);
129     if (string == nullptr) {
130         return nullptr;
131     }
132 
133     ASSERT(string->hashcode_ == 0);
134     // After copying we should have a full barrier, so this writes should happen-before barrier
135     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
136     if (can_be_compressed) {
137         CopyUtf16AsMUtf8(utf16_data, string->GetDataMUtf8(), utf16_length);
138     } else {
139         if (utf16_length != 0 && memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), utf16_data,
140                                           utf16_length << 1UL) != EOK) {
141             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
142             UNREACHABLE();
143         }
144     }
145     TSAN_ANNOTATE_IGNORE_WRITES_END();
146     // String is supposed to be a constant object, so all its data should be visible to all threads
147     arch::FullMemoryBarrier();
148     return string;
149 }
150 
151 /* static */
CreateEmptyString(LanguageContext ctx,PandaVM * vm)152 String *String::CreateEmptyString(LanguageContext ctx, PandaVM *vm)
153 {
154     uint16_t data = 0;
155     return CreateFromUtf16(&data, 0, ctx, vm);
156 }
157 
158 /* static */
CopyUtf16AsMUtf8(const uint16_t * utf16_from,uint8_t * mutf8_to,uint32_t utf16_length)159 void String::CopyUtf16AsMUtf8(const uint16_t *utf16_from, uint8_t *mutf8_to, uint32_t utf16_length)
160 {
161     Span<const uint16_t> from(utf16_from, utf16_length);
162     Span<uint8_t> to(mutf8_to, utf16_length);
163     for (uint32_t i = 0; i < utf16_length; i++) {
164         to[i] = from[i];
165     }
166 }
167 
168 // static
CreateNewStringFromChars(uint32_t offset,uint32_t length,Array * chararray,LanguageContext ctx,PandaVM * vm)169 String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array *chararray, LanguageContext ctx,
170                                          PandaVM *vm)
171 {
172     // Allocator may trig gc and move array, need to hold it
173     auto thread = ManagedThread::GetCurrent();
174     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
175     VMHandle<Array> array_handle(thread, chararray);
176 
177     // NOLINTNEXTLINE(readability-identifier-naming)
178     const uint16_t *src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(chararray->GetData()) + (offset << 1UL));
179     bool can_be_compressed = CanBeCompressed(src, length);
180     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
181     if (string == nullptr) {
182         return nullptr;
183     }
184 
185     // Retrieve src since gc may move it
186     src = reinterpret_cast<uint16_t *>(ToUintPtr<uint32_t>(array_handle->GetData()) + (offset << 1UL));
187     ASSERT(string->hashcode_ == 0);
188     // After copying we should have a full barrier, so this writes should happen-before barrier
189     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
190     if (can_be_compressed) {
191         CopyUtf16AsMUtf8(src, string->GetDataMUtf8(), length);
192     } else {
193         if (memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()), src, length << 1UL) != EOK) {
194             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
195             UNREACHABLE();
196         }
197     }
198     TSAN_ANNOTATE_IGNORE_WRITES_END();
199     // String is supposed to be a constant object, so all its data should be visible to all threads
200     arch::FullMemoryBarrier();
201     return string;
202 }
203 
204 // static
CreateNewStringFromBytes(uint32_t offset,uint32_t length,uint32_t high_byte,Array * bytearray,LanguageContext ctx,PandaVM * vm)205 String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint32_t high_byte, Array *bytearray,
206                                          LanguageContext ctx, PandaVM *vm)
207 {
208     // Allocator may trig gc and move array, need to hold it
209     auto thread = ManagedThread::GetCurrent();
210     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
211     VMHandle<Array> array_handle(thread, bytearray);
212 
213     constexpr size_t BYTE_MASK = 0xFF;
214 
215     // NOLINTNEXTLINE(readability-identifier-naming)
216     const uint8_t *src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(bytearray->GetData()) + offset);
217     high_byte &= BYTE_MASK;
218     bool can_be_compressed = CanBeCompressedMUtf8(src, length) && (high_byte == 0);
219     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
220     if (string == nullptr) {
221         return nullptr;
222     }
223 
224     // Retrieve src since gc may move it
225     src = reinterpret_cast<uint8_t *>(ToUintPtr<uint32_t>(array_handle->GetData()) + offset);
226     ASSERT(string->hashcode_ == 0);
227     // After copying we should have a full barrier, so this writes should happen-before barrier
228     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
229     if (can_be_compressed) {
230         Span<const uint8_t> from(src, length);
231         Span<uint8_t> to(string->GetDataMUtf8(), length);
232         for (uint32_t i = 0; i < length; ++i) {
233             to[i] = (from[i] & BYTE_MASK);
234         }
235     } else {
236         Span<const uint8_t> from(src, length);
237         Span<uint16_t> to(string->GetDataUtf16(), length);
238         for (uint32_t i = 0; i < length; ++i) {
239             to[i] = (high_byte << 8U) + (from[i] & BYTE_MASK);
240         }
241     }
242     TSAN_ANNOTATE_IGNORE_WRITES_END();
243 
244     // String is supposed to be a constant object, so all its data should be visible to all threads
245     arch::FullMemoryBarrier();
246     return string;
247 }
248 
249 template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhs_sp,Span<T2> & rhs_sp,int32_t count)250 int32_t CompareStringSpan(Span<T1> &lhs_sp, Span<T2> &rhs_sp, int32_t count)
251 {
252     for (int32_t i = 0; i < count; ++i) {
253         int32_t char_diff = static_cast<int32_t>(lhs_sp[i]) - static_cast<int32_t>(rhs_sp[i]);
254         if (char_diff != 0) {
255             return char_diff;
256         }
257     }
258     return 0;
259 }
260 
Compare(String * rstr)261 int32_t String::Compare(String *rstr)
262 {
263     String *lstr = this;
264     if (lstr == rstr) {
265         return 0;
266     }
267     int32_t lstr_leng = lstr->GetLength();
268     int32_t rstr_leng = rstr->GetLength();
269     int32_t leng_ret = lstr_leng - rstr_leng;
270     int32_t min_count = (leng_ret < 0) ? lstr_leng : rstr_leng;
271     if (!lstr->IsUtf16() && !rstr->IsUtf16()) {
272         Span<uint8_t> lhs_sp(lstr->GetDataMUtf8(), lstr_leng);
273         Span<uint8_t> rhs_sp(rstr->GetDataMUtf8(), rstr_leng);
274         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
275         if (char_diff != 0) {
276             return char_diff;
277         }
278     } else if (!lstr->IsUtf16()) {
279         Span<uint8_t> lhs_sp(lstr->GetDataMUtf8(), lstr_leng);
280         Span<uint16_t> rhs_sp(rstr->GetDataUtf16(), rstr_leng);
281         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
282         if (char_diff != 0) {
283             return char_diff;
284         }
285     } else if (!rstr->IsUtf16()) {
286         Span<uint16_t> lhs_sp(lstr->GetDataUtf16(), rstr_leng);
287         Span<uint8_t> rhs_sp(rstr->GetDataMUtf8(), lstr_leng);
288         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
289         if (char_diff != 0) {
290             return char_diff;
291         }
292     } else {
293         Span<uint16_t> lhs_sp(lstr->GetDataUtf16(), lstr_leng);
294         Span<uint16_t> rhs_sp(rstr->GetDataUtf16(), rstr_leng);
295         int32_t char_diff = CompareStringSpan(lhs_sp, rhs_sp, min_count);
296         if (char_diff != 0) {
297             return char_diff;
298         }
299     }
300     return leng_ret;
301 }
302 
303 /* static */
304 template <typename T1, typename T2>
IndexOf(Span<const T1> & lhs_sp,Span<const T2> & rhs_sp,int32_t pos,int32_t max)305 int32_t String::IndexOf(Span<const T1> &lhs_sp, Span<const T2> &rhs_sp, int32_t pos, int32_t max)
306 {
307     auto first = static_cast<int32_t>(rhs_sp[0]);
308     for (int32_t i = pos; i <= max; i++) {
309         if (static_cast<int32_t>(lhs_sp[i]) != first) {
310             i++;
311             while (i <= max && static_cast<int32_t>(lhs_sp[i]) != first) {
312                 i++;
313             }
314         }
315         /* Found the first character, now look at the rest of rhs_sp */
316         if (i <= max) {
317             int j = i + 1;
318             int end = j + rhs_sp.size() - 1;
319 
320             for (int k = 1; j < end && static_cast<int32_t>(lhs_sp[j]) == static_cast<int32_t>(rhs_sp[k]); j++, k++) {
321             }
322             if (j == end) {
323                 /* Found whole string. */
324                 return i;
325             }
326         }
327     }
328     return -1;
329 }
330 
IndexOf(String * rhs,int32_t pos)331 int32_t String::IndexOf(String *rhs, int32_t pos)
332 {
333     if (rhs == nullptr) {
334         return -1;
335     }
336     String *lhs = this;
337     int32_t lhs_count = lhs->GetLength();
338     int32_t rhs_count = rhs->GetLength();
339 
340     if (rhs_count == 0) {
341         return pos;
342     }
343 
344     if (pos >= lhs_count) {
345         return -1;
346     }
347 
348     if (pos < 0) {
349         pos = 0;
350     }
351 
352     int32_t max = lhs_count - rhs_count;
353     if (rhs->IsMUtf8() && lhs->IsMUtf8()) {
354         Span<const uint8_t> lhs_sp(lhs->GetDataMUtf8(), lhs_count);
355         Span<const uint8_t> rhs_sp(rhs->GetDataMUtf8(), rhs_count);
356         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
357     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
358         Span<const uint16_t> lhs_sp(lhs->GetDataUtf16(), lhs_count);
359         Span<const uint16_t> rhs_sp(rhs->GetDataUtf16(), rhs_count);
360         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
361     } else if (rhs->IsUtf16()) {
362         Span<const uint8_t> lhs_sp(lhs->GetDataMUtf8(), lhs_count);
363         Span<const uint16_t> rhs_sp(rhs->GetDataUtf16(), rhs_count);
364         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
365     } else {  // NOLINT(readability-else-after-return)
366         Span<const uint16_t> lhs_sp(lhs->GetDataUtf16(), lhs_count);
367         Span<const uint8_t> rhs_sp(rhs->GetDataMUtf8(), rhs_count);
368         return String::IndexOf(lhs_sp, rhs_sp, pos, max);
369     }
370 
371     return -1;
372 }
373 
374 /* static */
CanBeCompressed(const uint16_t * utf16_data,uint32_t utf16_length)375 bool String::CanBeCompressed(const uint16_t *utf16_data, uint32_t utf16_length)
376 {
377     if (!compressed_strings_enabled) {
378         return false;
379     }
380     bool is_compressed = true;
381     Span<const uint16_t> data(utf16_data, utf16_length);
382     for (uint32_t i = 0; i < utf16_length; i++) {
383         if (!IsASCIICharacter(data[i])) {
384             is_compressed = false;
385             break;
386         }
387     }
388     return is_compressed;
389 }
390 
391 // static
CanBeCompressedMUtf8(const uint8_t * mutf8_data,uint32_t mutf8_length)392 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data, uint32_t mutf8_length)
393 {
394     if (!compressed_strings_enabled) {
395         return false;
396     }
397     bool is_compressed = true;
398     Span<const uint8_t> data(mutf8_data, mutf8_length);
399     for (uint32_t i = 0; i < mutf8_length; i++) {
400         if (!IsASCIICharacter(data[i])) {
401             is_compressed = false;
402             break;
403         }
404     }
405     return is_compressed;
406 }
407 
408 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8_data)409 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data)
410 {
411     return compressed_strings_enabled ? utf::IsMUtf8OnlySingleBytes(mutf8_data) : false;
412 }
413 
414 /* static */
CanBeCompressedUtf16(const uint16_t * utf16_data,uint32_t utf16_length,uint16_t non)415 bool String::CanBeCompressedUtf16(const uint16_t *utf16_data, uint32_t utf16_length, uint16_t non)
416 {
417     if (!compressed_strings_enabled) {
418         return false;
419     }
420     bool is_compressed = true;
421     Span<const uint16_t> data(utf16_data, utf16_length);
422     for (uint32_t i = 0; i < utf16_length; i++) {
423         if (!IsASCIICharacter(data[i]) && data[i] != non) {
424             is_compressed = false;
425             break;
426         }
427     }
428     return is_compressed;
429 }
430 
431 /* static */
CanBeCompressedMUtf8(const uint8_t * mutf8_data,uint32_t mutf8_length,uint16_t non)432 bool String::CanBeCompressedMUtf8(const uint8_t *mutf8_data, uint32_t mutf8_length, uint16_t non)
433 {
434     if (!compressed_strings_enabled) {
435         return false;
436     }
437     bool is_compressed = true;
438     Span<const uint8_t> data(mutf8_data, mutf8_length);
439     for (uint32_t i = 0; i < mutf8_length; i++) {
440         if (!IsASCIICharacter(data[i]) && data[i] != non) {
441             is_compressed = false;
442             break;
443         }
444     }
445     return is_compressed;
446 }
447 
448 /* static */
StringsAreEqual(String * str1,String * str2)449 bool String::StringsAreEqual(String *str1, String *str2)
450 {
451     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength())) {
452         return false;
453     }
454 
455     if (str1->IsUtf16()) {
456         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
457         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
458         return String::StringsAreEquals(data1, data2);
459     } else {  // NOLINT(readability-else-after-return)
460         Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
461         Span<const uint8_t> data2(str2->GetDataMUtf8(), str1->GetLength());
462         return String::StringsAreEquals(data1, data2);
463     }
464 }
465 
466 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8_data,uint32_t utf16_length)467 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8_data, uint32_t utf16_length)
468 {
469     if (str1->GetLength() != utf16_length) {
470         return false;
471     }
472     return StringsAreEqualMUtf8(str1, mutf8_data, utf16_length, CanBeCompressedMUtf8(mutf8_data));
473 }
474 
475 /* static */
StringsAreEqualMUtf8(String * str1,const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed)476 bool String::StringsAreEqualMUtf8(String *str1, const uint8_t *mutf8_data, uint32_t utf16_length,
477                                   bool can_be_compressed)
478 {
479     bool result = true;
480     if (str1->GetLength() != utf16_length) {
481         result = false;
482     } else {
483         bool str1_can_be_compressed = !str1->IsUtf16();
484         if (str1_can_be_compressed != can_be_compressed) {
485             return false;
486         }
487 
488         ASSERT(str1_can_be_compressed == can_be_compressed);
489         if (str1_can_be_compressed) {
490             Span<const uint8_t> data1(str1->GetDataMUtf8(), str1->GetLength());
491             Span<const uint8_t> data2(mutf8_data, utf16_length);
492             result = String::StringsAreEquals(data1, data2);
493         } else {
494             result = IsMutf8EqualsUtf16(mutf8_data, str1->GetDataUtf16(), str1->GetLength());
495         }
496     }
497     return result;
498 }
499 
500 /* static */
StringsAreEqualUtf16(String * str1,const uint16_t * utf16_data,uint32_t utf16_data_length)501 bool String::StringsAreEqualUtf16(String *str1, const uint16_t *utf16_data, uint32_t utf16_data_length)
502 {
503     bool result = true;
504     if (str1->GetLength() != utf16_data_length) {
505         result = false;
506     } else if (!str1->IsUtf16()) {
507         result = IsMutf8EqualsUtf16(str1->GetDataMUtf8(), str1->GetLength(), utf16_data, utf16_data_length);
508     } else {
509         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
510         Span<const uint16_t> data2(utf16_data, utf16_data_length);
511         result = String::StringsAreEquals(data1, data2);
512     }
513     return result;
514 }
515 
516 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8_data,uint32_t utf8_data_length,const uint16_t * utf16_data,uint32_t utf16_data_length)517 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8_data, uint32_t utf8_data_length, const uint16_t *utf16_data,
518                                 uint32_t utf16_data_length)
519 {
520     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
521     auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_data_length);
522     [[maybe_unused]] auto converted_string_size =
523         utf::ConvertRegionMUtf8ToUtf16(utf8_data, tmp_buffer, utf8_data_length, utf16_data_length, 0);
524     ASSERT(converted_string_size == utf16_data_length);
525 
526     Span<const uint16_t> data1(tmp_buffer, utf16_data_length);
527     Span<const uint16_t> data2(utf16_data, utf16_data_length);
528     bool result = String::StringsAreEquals(data1, data2);
529     allocator->Delete(tmp_buffer);
530     return result;
531 }
532 
533 /* static */
IsMutf8EqualsUtf16(const uint8_t * utf8_data,const uint16_t * utf16_data,uint32_t utf16_data_length)534 bool String::IsMutf8EqualsUtf16(const uint8_t *utf8_data, const uint16_t *utf16_data, uint32_t utf16_data_length)
535 {
536     auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
537     auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_data_length);
538     utf::ConvertMUtf8ToUtf16(utf8_data, utf::Mutf8Size(utf8_data), tmp_buffer);
539 
540     Span<const uint16_t> data1(tmp_buffer, utf16_data_length);
541     Span<const uint16_t> data2(utf16_data, utf16_data_length);
542     bool result = String::StringsAreEquals(data1, data2);
543     allocator->Delete(tmp_buffer);
544     return result;
545 }
546 
547 /* static */
548 template <typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)549 bool String::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
550 {
551     for (size_t i = 0; i < str1.Size(); i++) {
552         if (str1[i] != str2[i]) {
553             return false;
554         }
555     }
556     return true;
557 }
558 
ToCharArray(LanguageContext ctx)559 Array *String::ToCharArray(LanguageContext ctx)
560 {
561     // allocator may trig gc and move 'this', need to hold it
562     auto thread = ManagedThread::GetCurrent();
563     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
564     VMHandle<String> str(thread, this);
565     auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16);
566     Array *array = Array::Create(klass, GetLength());
567     if (array == nullptr) {
568         return nullptr;
569     }
570 
571     if (str->IsUtf16()) {
572         Span<uint16_t> sp(str->GetDataUtf16(), str->GetLength());
573         for (size_t i = 0; i < sp.size(); i++) {
574             array->Set<uint16_t>(i, sp[i]);
575         }
576     } else {
577         Span<uint8_t> sp(str->GetDataMUtf8(), str->GetLength());
578         for (size_t i = 0; i < sp.size(); i++) {
579             array->Set<uint16_t>(i, sp[i]);
580         }
581     }
582 
583     return array;
584 }
585 
586 // We need to use java compatible hash algorithm as javac relies on it
587 // when compiles switch-case statement with strings
588 template <class T>
ComputeHashForData(const T * data,size_t size)589 static int32_t ComputeHashForData(const T *data, size_t size)
590 {
591     uint32_t hash = 0;
592 #if defined(__GNUC__)
593 #pragma GCC diagnostic push
594 #pragma GCC diagnostic ignored "-Wignored-attributes"
595     Span<const T> sp(data, size);
596 #pragma GCC diagnostic pop
597 #endif
598     for (auto c : sp) {
599         constexpr size_t SHIFT = 5;
600         hash = (hash << SHIFT) - hash + c;
601     }
602     return static_cast<int32_t>(hash);
603 }
604 
ComputeHashForMutf8(const uint8_t * mutf8_data)605 static int32_t ComputeHashForMutf8(const uint8_t *mutf8_data)
606 {
607     uint32_t hash = 0;
608     while (*mutf8_data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
609         constexpr size_t SHIFT = 5;
610         hash = (hash << SHIFT) - hash + *mutf8_data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
611     }
612     return static_cast<int32_t>(hash);
613 }
614 
ComputeHashcode()615 uint32_t String::ComputeHashcode()
616 {
617     uint32_t hash;
618     if (compressed_strings_enabled) {
619         if (!IsUtf16()) {
620             hash = ComputeHashForData(GetDataMUtf8(), GetLength());
621         } else {
622             hash = ComputeHashForData(GetDataUtf16(), GetLength());
623         }
624     } else {
625         ASSERT(static_cast<size_t>(GetLength()) > (std::numeric_limits<size_t>::max() >> 1U));
626         hash = ComputeHashForData(GetDataUtf16(), GetLength());
627     }
628     return hash;
629 }
630 
631 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8_data,uint32_t utf16_length)632 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8_data, uint32_t utf16_length)
633 {
634     return ComputeHashcodeMutf8(mutf8_data, utf16_length, CanBeCompressedMUtf8(mutf8_data));
635 }
636 
637 /* static */
ComputeHashcodeMutf8(const uint8_t * mutf8_data,uint32_t utf16_length,bool can_be_compressed)638 uint32_t String::ComputeHashcodeMutf8(const uint8_t *mutf8_data, uint32_t utf16_length, bool can_be_compressed)
639 {
640     uint32_t hash;
641     if (can_be_compressed) {
642         hash = ComputeHashForMutf8(mutf8_data);
643     } else {
644         auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
645         auto tmp_buffer = allocator->AllocArray<uint16_t>(utf16_length);
646         utf::ConvertMUtf8ToUtf16(mutf8_data, utf::Mutf8Size(mutf8_data), tmp_buffer);
647         hash = ComputeHashForData(tmp_buffer, utf16_length);
648         allocator->Delete(tmp_buffer);
649     }
650     return hash;
651 }
652 
653 /* static */
ComputeHashcodeUtf16(uint16_t * utf16_data,uint32_t length)654 uint32_t String::ComputeHashcodeUtf16(uint16_t *utf16_data, uint32_t length)
655 {
656     return ComputeHashForData(utf16_data, length);
657 }
658 
659 /* static */
DoReplace(String * src,uint16_t old_c,uint16_t new_c,LanguageContext ctx,PandaVM * vm)660 String *String::DoReplace(String *src, uint16_t old_c, uint16_t new_c, LanguageContext ctx, PandaVM *vm)
661 {
662     int32_t length = src->GetLength();
663     bool can_be_compressed = IsASCIICharacter(new_c);
664     if (src->IsUtf16()) {
665         can_be_compressed = can_be_compressed && CanBeCompressedUtf16(src->GetDataUtf16(), length, old_c);
666     } else {
667         can_be_compressed = can_be_compressed && CanBeCompressedMUtf8(src->GetDataMUtf8(), length, old_c);
668     }
669 
670     // allocator may trig gc and move src, need to hold it
671     auto thread = ManagedThread::GetCurrent();
672     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
673     VMHandle<String> src_handle(thread, src);
674     auto string = AllocStringObject(length, can_be_compressed, ctx, vm);
675     if (string == nullptr) {
676         return nullptr;
677     }
678 
679     // Retrieve src after gc
680     src = src_handle.GetPtr();
681     ASSERT(string->hashcode_ == 0);
682 
683     // After replacing we should have a full barrier, so this writes should happen-before barrier
684     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
685     if (src->IsUtf16()) {
686         if (can_be_compressed) {
687             auto replace = [old_c, new_c](uint16_t c) { return static_cast<uint8_t>((old_c != c) ? c : new_c); };
688             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
689             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataMUtf8(), replace);
690         } else {
691             auto replace = [old_c, new_c](uint16_t c) { return (old_c != c) ? c : new_c; };
692             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
693             std::transform(src->GetDataUtf16(), src->GetDataUtf16() + length, string->GetDataUtf16(), replace);
694         }
695     } else {
696         if (can_be_compressed) {
697             auto replace = [old_c, new_c](uint16_t c) { return static_cast<uint8_t>((old_c != c) ? c : new_c); };
698             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
699             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataMUtf8(), replace);
700         } else {
701             auto replace = [old_c, new_c](uint16_t c) { return (old_c != c) ? c : new_c; };
702             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
703             std::transform(src->GetDataMUtf8(), src->GetDataMUtf8() + length, string->GetDataUtf16(), replace);
704         }
705     }
706     TSAN_ANNOTATE_IGNORE_WRITES_END();
707     // String is supposed to be a constant object, so all its data should be visible to all threads
708     arch::FullMemoryBarrier();
709     return string;
710 }
711 
712 /* static */
FastSubString(String * src,uint32_t start,uint32_t utf16_length,LanguageContext ctx,PandaVM * vm)713 String *String::FastSubString(String *src, uint32_t start, uint32_t utf16_length, LanguageContext ctx, PandaVM *vm)
714 {
715     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
716     bool can_be_compressed = !src->IsUtf16() || CanBeCompressed(src->GetDataUtf16() + start, utf16_length);
717 
718     // allocator may trig gc and move src, need to hold it
719     auto thread = ManagedThread::GetCurrent();
720     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
721     VMHandle<String> src_handle(thread, src);
722     auto string = AllocStringObject(utf16_length, can_be_compressed, ctx, vm);
723     if (string == nullptr) {
724         return nullptr;
725     }
726 
727     // Retrieve src after gc
728     src = src_handle.GetPtr();
729     ASSERT(string->hashcode_ == 0);
730 
731     // After copying we should have a full barrier, so this writes should happen-before barrier
732     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
733     if (src->IsUtf16()) {
734         if (can_be_compressed) {
735             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
736             CopyUtf16AsMUtf8(src->GetDataUtf16() + start, string->GetDataMUtf8(), utf16_length);
737         } else {
738             if (memcpy_s(string->GetDataUtf16(), ComputeDataSizeUtf16(string->GetLength()),
739                          // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
740                          src->GetDataUtf16() + start, utf16_length << 1UL) != EOK) {
741                 LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
742                 UNREACHABLE();
743             }
744         }
745     } else {
746         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
747         if (memcpy_s(string->GetDataMUtf8(), string->GetLength(), src->GetDataMUtf8() + start, utf16_length) != EOK) {
748             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
749             UNREACHABLE();
750         }
751     }
752     TSAN_ANNOTATE_IGNORE_WRITES_END();
753     // String is supposed to be a constant object, so all its data should be visible to all threads
754     arch::FullMemoryBarrier();
755     return string;
756 }
757 
758 /* static */
Concat(String * string1,String * string2,LanguageContext ctx,PandaVM * vm)759 String *String::Concat(String *string1, String *string2, LanguageContext ctx, PandaVM *vm)
760 {
761     // allocator may trig gc and move src, need to hold it
762     auto thread = ManagedThread::GetCurrent();
763     [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
764     VMHandle<String> str1_handle(thread, string1);
765     VMHandle<String> str2_handle(thread, string2);
766 
767     uint32_t length1 = string1->GetLength();
768     uint32_t length2 = string2->GetLength();
769     uint32_t new_length = length1 + length2;
770     bool compressed = compressed_strings_enabled && (!string1->IsUtf16() && !string2->IsUtf16());
771     auto new_string = AllocStringObject(new_length, compressed, ctx, vm);
772     if (UNLIKELY(new_string == nullptr)) {
773         return nullptr;
774     }
775 
776     ASSERT(new_string->hashcode_ == 0);
777 
778     // Retrieve strings after gc
779     string1 = str1_handle.GetPtr();
780     string2 = str2_handle.GetPtr();
781 
782     // After copying we should have a full barrier, so this writes should happen-before barrier
783     TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
784     if (compressed) {
785         Span<uint8_t> sp(new_string->GetDataMUtf8(), new_length);
786         if (memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataMUtf8(), length1) != EOK) {
787             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
788             UNREACHABLE();
789         }
790         sp = sp.SubSpan(length1);
791         if (memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataMUtf8(), length2) != EOK) {
792             LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
793             UNREACHABLE();
794         }
795     } else {
796         Span<uint16_t> sp(new_string->GetDataUtf16(), new_length);
797         if (!string1->IsUtf16()) {
798             for (uint32_t i = 0; i < length1; ++i) {
799                 sp[i] = string1->At<false>(i);
800             }
801         } else {
802             if (memcpy_s(sp.Data(), sp.SizeBytes(), string1->GetDataUtf16(), length1 << 1U) != EOK) {
803                 LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
804                 UNREACHABLE();
805             }
806         }
807         sp = sp.SubSpan(length1);
808         if (!string2->IsUtf16()) {
809             for (uint32_t i = 0; i < length2; ++i) {
810                 sp[i] = string2->At<false>(i);
811             }
812         } else {
813             if (memcpy_s(sp.Data(), sp.SizeBytes(), string2->GetDataUtf16(), length2 << 1U) != EOK) {
814                 LOG(FATAL, RUNTIME) << __func__ << " memcpy_s failed";
815                 UNREACHABLE();
816             }
817         }
818     }
819     TSAN_ANNOTATE_IGNORE_WRITES_END();
820     // String is supposed to be a constant object, so all its data should be visible to all threads
821     arch::FullMemoryBarrier();
822 
823     return new_string;
824 }
825 
826 /* static */
AllocStringObject(size_t length,bool compressed,LanguageContext ctx,PandaVM * vm,bool movable)827 String *String::AllocStringObject(size_t length, bool compressed, LanguageContext ctx, PandaVM *vm, bool movable)
828 {
829     ASSERT(vm != nullptr);
830     auto *string_class = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING);
831     size_t size = compressed ? String::ComputeSizeMUtf8(length) : String::ComputeSizeUtf16(length);
832     auto string = movable
833                       ? reinterpret_cast<String *>(vm->GetHeapManager()->AllocateObject(string_class, size))
834                       : reinterpret_cast<String *>(vm->GetHeapManager()->AllocateNonMovableObject(string_class, size));
835     if (string != nullptr) {
836         // After setting length we should have a full barrier, so this write should happens-before barrier
837         TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
838         string->SetLength(length, compressed);
839         TSAN_ANNOTATE_IGNORE_WRITES_END();
840         // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string
841         // legth before it's set
842         arch::FullMemoryBarrier();
843     }
844     return string;
845 }
846 
847 }  // namespace panda::coretypes
848