1 /**
2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "libpandabase/utils/utils.h"
17 #include "libpandabase/utils/utf.h"
18 #include "runtime/arch/memory_helpers.h"
19 #include "runtime/include/runtime.h"
20 #include "plugins/ets/runtime/ets_coroutine.h"
21 #include "plugins/ets/runtime/ets_handle.h"
22 #include "plugins/ets/runtime/ets_handle_scope.h"
23 #include "plugins/ets/runtime/types/ets_primitives.h"
24 #include "plugins/ets/runtime/types/ets_string.h"
25 #include "plugins/ets/runtime/types/ets_array.h"
26 #include "plugins/ets/runtime/types/ets_string_builder.h"
27 #include "plugins/ets/runtime/intrinsics/helpers/ets_intrinsics_helpers.h"
28 #include "plugins/ets/runtime/intrinsics/helpers/ets_to_string_cache.h"
29 #include <cstdint>
30 #include <cmath>
31
32 namespace ark::ets {
33
34 /// StringBuilder fields offsets
35 static constexpr uint32_t SB_BUFFER_OFFSET = ark::ObjectHeader::ObjectHeaderSize();
36 static constexpr uint32_t SB_INDEX_OFFSET = SB_BUFFER_OFFSET + ark::OBJECT_POINTER_SIZE;
37 static constexpr uint32_t SB_LENGTH_OFFSET = SB_INDEX_OFFSET + sizeof(int32_t);
38 static constexpr uint32_t SB_COMPRESS_OFFSET = SB_LENGTH_OFFSET + sizeof(int32_t);
39
40 /// "null", "true" and "false" packed to integral types
41 static constexpr uint64_t NULL_CODE = 0x006C006C0075006E;
42 static constexpr uint64_t TRUE_CODE = 0x0065007500720074;
43 static constexpr uint64_t FALS_CODE = 0x0073006c00610066;
44 static constexpr uint16_t E_CODE = 0x0065;
45
46 static_assert(std::is_same_v<EtsBoolean, uint8_t>);
47 static_assert(std::is_same_v<EtsChar, uint16_t> &&
48 std::is_same_v<EtsCharArray, EtsPrimitiveArray<EtsChar, EtsClassRoot::CHAR_ARRAY>>);
49
50 // The following implementation is based on ObjectHeader::ShallowCopy
ReallocateBuffer(EtsHandle<EtsObjectArray> & bufHandle)51 static EtsObjectArray *ReallocateBuffer(EtsHandle<EtsObjectArray> &bufHandle)
52 {
53 uint32_t bufLen = bufHandle->GetLength();
54 ASSERT(bufLen < (UINT_MAX >> 1U));
55 // Allocate the new buffer - may trigger GC
56 auto *newBuf = EtsObjectArray::Create(bufHandle->GetClass(), 2 * bufLen);
57 ASSERT(newBuf != nullptr);
58 // Copy the old buffer data
59 bufHandle->CopyDataTo(newBuf);
60 EVENT_SB_BUFFER_REALLOC(ManagedThread::GetCurrent()->GetId(), newBuf, newBuf->GetLength(), newBuf->GetElementSize(),
61 newBuf->ObjectSize());
62 return newBuf;
63 }
64
65 // A string representations of nullptr, bool, short, int, long, float and double
66 // do not contain uncompressable chars. So we may skip 'compress' check in these cases.
67 template <bool CHECK_IF_COMPRESSABLE = true>
AppendCharArrayToBuffer(VMHandle<EtsObject> & sbHandle,EtsCharArray * arr)68 ObjectHeader *AppendCharArrayToBuffer(VMHandle<EtsObject> &sbHandle, EtsCharArray *arr)
69 {
70 auto *sb = sbHandle.GetPtr();
71 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
72 auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
73 auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
74
75 // Check the case of the buf overflow
76 uint32_t bufLen = buf->GetLength();
77 if (index >= bufLen) {
78 auto *coroutine = EtsCoroutine::GetCurrent();
79 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
80 EtsHandle<EtsCharArray> arrHandle(coroutine, arr);
81 EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
82 // May trigger GC
83 buf = ReallocateBuffer(bufHandle);
84 // Update sb and arr as corresponding objects might be moved by GC
85 sb = sbHandle.GetPtr();
86 arr = arrHandle.GetPtr();
87 // Remember the new buffer
88 sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<EtsObject *>(buf));
89 }
90
91 // Append array to the buf
92 buf->Set(index, reinterpret_cast<EtsObject *>(arr));
93 // Increment the index
94 sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
95 // Increase the length
96 // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
97 sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + arr->GetLength());
98 // If string compression is disabled in the runtime, then set 'StringBuilder.compress' to 'false',
99 // as by default 'StringBuilder.compress' is 'true'.
100 if (!Runtime::GetCurrent()->GetOptions().IsRuntimeCompressedStringsEnabled()) {
101 if (sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET)) {
102 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
103 }
104 } else if (CHECK_IF_COMPRESSABLE && sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET)) {
105 // Set the compress field to false if the array contains not compressable chars
106 auto n = arr->GetLength();
107 for (uint32_t i = 0; i < n; ++i) {
108 if (!ark::coretypes::String::IsASCIICharacter(arr->Get(i))) {
109 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
110 break;
111 }
112 }
113 }
114 return sb->GetCoreType();
115 }
116
ReconstructStringAsMUtf8(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)117 static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
118 EtsClass *stringKlass)
119 {
120 // All strings in the buf are MUtf8
121 uint8_t *dstData = dstString->GetDataMUtf8();
122 for (uint32_t i = 0; i < index; ++i) {
123 EtsObject *obj = buffer->Get(i);
124 if (obj->IsInstanceOf(stringKlass)) {
125 coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
126 uint32_t n = srcString->CopyDataRegionMUtf8(dstData, 0, srcString->GetLength(), length);
127 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
128 length -= n;
129 } else {
130 // obj is an array of chars
131 coretypes::Array *srcArray = reinterpret_cast<EtsArray *>(obj)->GetCoreType();
132 uint32_t n = srcArray->GetLength();
133 for (uint32_t j = 0; j < n; ++j) {
134 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
135 dstData[j] = srcArray->GetPrimitive<uint16_t>(sizeof(uint16_t) * j);
136 }
137 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138 length -= n;
139 }
140 }
141 }
142
ReconstructStringAsUtf16(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)143 static void ReconstructStringAsUtf16(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
144 EtsClass *stringKlass)
145 {
146 // Some strings in the buf are Utf16
147 uint16_t *dstData = dstString->GetDataUtf16();
148 for (uint32_t i = 0; i < index; ++i) {
149 EtsObject *obj = buffer->Get(i);
150 if (obj->IsInstanceOf(stringKlass)) {
151 coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
152 uint32_t n = srcString->CopyDataRegionUtf16(dstData, 0, srcString->GetLength(), length);
153 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
154 length -= n;
155 } else {
156 // obj is an array of chars
157 coretypes::Array *srcArray = reinterpret_cast<EtsCharArray *>(obj)->GetCoreType();
158 auto *srcData = reinterpret_cast<EtsChar *>(srcArray->GetData());
159 uint32_t n = srcArray->GetLength();
160 ASSERT(IsAligned(ToUintPtr(srcData), sizeof(uint64_t)));
161 auto bytes = n << 1UL;
162 // equals to 2^(k + 1) when n is 2^k AND dst is aligned by 2^(k + 1)
163 auto bytesAndAligned = bytes | (ToUintPtr(dstData) & (bytes - 1));
164 switch (bytesAndAligned) {
165 case 2U: // 2 bytes
166 *dstData = *reinterpret_cast<EtsChar *>(srcData);
167 break;
168 case 4U: // 4 bytes
169 *reinterpret_cast<uint32_t *>(dstData) = *reinterpret_cast<uint32_t *>(srcData);
170 break;
171 case 8U: // 8 bytes
172 *reinterpret_cast<uint64_t *>(dstData) = *reinterpret_cast<uint64_t *>(srcData);
173 break;
174 default:
175 std::copy_n(srcData, n, dstData);
176 }
177 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178 length -= n;
179 }
180 }
181 }
182
NullToCharArray()183 static inline EtsCharArray *NullToCharArray()
184 {
185 EtsCharArray *arr = EtsCharArray::Create(std::char_traits<char>::length("null"));
186 *reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>()) = NULL_CODE;
187 return arr;
188 }
189
BoolToCharArray(EtsBoolean v)190 static inline EtsCharArray *BoolToCharArray(EtsBoolean v)
191 {
192 auto arrLen = v != 0U ? std::char_traits<char>::length("true") : std::char_traits<char>::length("false");
193 EtsCharArray *arr = EtsCharArray::Create(arrLen);
194 auto *data = reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>());
195 if (v != 0U) {
196 *data = TRUE_CODE;
197 } else {
198 *data = FALS_CODE;
199 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
200 *reinterpret_cast<EtsChar *>(data + 1) = E_CODE;
201 }
202 return arr;
203 }
204
CharToCharArray(EtsChar v)205 static inline EtsCharArray *CharToCharArray(EtsChar v)
206 {
207 EtsCharArray *arr = EtsCharArray::Create(1U);
208 *(reinterpret_cast<EtsChar *>(arr->GetData<EtsChar>())) = v;
209 return arr;
210 }
211
StringBuilderAppendNullString(ObjectHeader * sb)212 ObjectHeader *StringBuilderAppendNullString(ObjectHeader *sb)
213 {
214 ASSERT(sb != nullptr);
215 auto *coroutine = EtsCoroutine::GetCurrent();
216 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
217 VMHandle<EtsObject> sbHandle(coroutine, sb);
218 // May trigger GC
219 EtsCharArray *arr = NullToCharArray();
220 return AppendCharArrayToBuffer<false>(sbHandle, arr);
221 }
222
223 /**
224 * Implementation of public native append(s: String): StringBuilder.
225 * Inserts the string 's' into a free buffer slot:
226 *
227 * buf[index] = s;
228 * index++;
229 * length += s.length
230 * compress &= s.IsMUtf8()
231 *
232 * In case of the buf overflow, we create a new buffer of a larger size
233 * and copy the data from the old buffer.
234 */
StringBuilderAppendString(ObjectHeader * sb,EtsString * str)235 ObjectHeader *StringBuilderAppendString(ObjectHeader *sb, EtsString *str)
236 {
237 ASSERT(sb != nullptr);
238
239 if (str == nullptr) {
240 return StringBuilderAppendNullString(sb);
241 }
242 if (str->GetLength() == 0) {
243 return sb;
244 }
245
246 auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
247 auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
248 // Check buf overflow
249 if (index >= buf->GetLength()) {
250 auto *coroutine = EtsCoroutine::GetCurrent();
251 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
252 VMHandle<EtsObject> sbHandle(coroutine, sb);
253 EtsHandle<EtsString> strHandle(coroutine, str);
254 EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
255 // May trigger GC
256 buf = ReallocateBuffer(bufHandle);
257 // Update sb and s as corresponding objects might be moved by GC
258 sb = sbHandle->GetCoreType();
259 str = strHandle.GetPtr();
260 // Remember the new buffer
261 sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<ObjectHeader *>(buf));
262 }
263 // Append string to the buf
264 // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
265 buf->Set(index, reinterpret_cast<EtsObject *>(str));
266 // Increment the index
267 sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
268 // Increase the length
269 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
270 sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + str->GetLength());
271 // Set the compress field to false if the string is not compressable
272 if (sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET) && str->IsUtf16()) {
273 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
274 }
275
276 return sb;
277 }
278
StringBuilderAppendChar(ObjectHeader * sb,EtsChar v)279 ObjectHeader *StringBuilderAppendChar(ObjectHeader *sb, EtsChar v)
280 {
281 ASSERT(sb != nullptr);
282
283 auto *coroutine = EtsCoroutine::GetCurrent();
284 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
285 VMHandle<EtsObject> sbHandle(coroutine, sb);
286
287 // May trigger GC
288 auto *arr = CharToCharArray(v);
289 return AppendCharArrayToBuffer(sbHandle, arr);
290 }
291
StringBuilderAppendBool(ObjectHeader * sb,EtsBoolean v)292 ObjectHeader *StringBuilderAppendBool(ObjectHeader *sb, EtsBoolean v)
293 {
294 ASSERT(sb != nullptr);
295
296 auto *coroutine = EtsCoroutine::GetCurrent();
297 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
298 VMHandle<EtsObject> sbHandle(coroutine, sb);
299
300 // May trigger GC
301 auto *arr = BoolToCharArray(v);
302 return AppendCharArrayToBuffer<false>(sbHandle, arr);
303 }
304
StringBuilderAppendLong(ObjectHeader * sb,EtsLong v)305 ObjectHeader *StringBuilderAppendLong(ObjectHeader *sb, EtsLong v)
306 {
307 ASSERT(sb != nullptr);
308
309 auto *coroutine = EtsCoroutine::GetCurrent();
310 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
311 VMHandle<EtsObject> sbHandle(coroutine, sb);
312
313 // May trigger GC
314 auto *cache = PandaEtsVM::GetCurrent()->GetLongToStringCache();
315 ASSERT(cache != nullptr);
316 auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
317 return StringBuilderAppendString(sbHandle->GetCoreType(), str);
318 }
319
320 template <typename FpType, std::enable_if_t<std::is_floating_point_v<FpType>, bool> = true>
FloatingPointToCharArray(FpType number)321 static inline EtsCharArray *FloatingPointToCharArray(FpType number)
322 {
323 return intrinsics::helpers::FpToStringDecimalRadix(number, [](std::string_view str) {
324 auto *arr = EtsCharArray::Create(str.length());
325 Span<uint16_t> data(reinterpret_cast<uint16_t *>(arr->GetData<EtsChar>()), str.length());
326 for (size_t i = 0; i < str.length(); ++i) {
327 ASSERT(ark::coretypes::String::IsASCIICharacter(str[i]));
328 data[i] = static_cast<uint16_t>(str[i]);
329 }
330 return arr;
331 });
332 }
333
StringBuilderAppendFloat(ObjectHeader * sb,EtsFloat v)334 ObjectHeader *StringBuilderAppendFloat(ObjectHeader *sb, EtsFloat v)
335 {
336 ASSERT(sb != nullptr);
337
338 auto *coroutine = EtsCoroutine::GetCurrent();
339 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
340 VMHandle<EtsObject> sbHandle(coroutine, sb);
341
342 auto *cache = PandaEtsVM::GetCurrent()->GetFloatToStringCache();
343 ASSERT(cache != nullptr);
344 auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
345 return StringBuilderAppendString(sbHandle->GetCoreType(), str);
346 }
347
StringBuilderAppendDouble(ObjectHeader * sb,EtsDouble v)348 ObjectHeader *StringBuilderAppendDouble(ObjectHeader *sb, EtsDouble v)
349 {
350 ASSERT(sb != nullptr);
351
352 auto *coroutine = EtsCoroutine::GetCurrent();
353 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
354 VMHandle<EtsObject> sbHandle(coroutine, sb);
355
356 auto *cache = PandaEtsVM::GetCurrent()->GetDoubleToStringCache();
357 ASSERT(cache != nullptr);
358 auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
359 return StringBuilderAppendString(sbHandle->GetCoreType(), str);
360 }
361
StringBuilderToString(ObjectHeader * sb)362 EtsString *StringBuilderToString(ObjectHeader *sb)
363 {
364 ASSERT(sb != nullptr);
365
366 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
367 if (length == 0) {
368 return EtsString::CreateNewEmptyString();
369 }
370
371 auto *coroutine = EtsCoroutine::GetCurrent();
372 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
373 VMHandle<EtsObject> sbHandle(coroutine, sb);
374
375 auto index = sbHandle->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
376 auto compress = sbHandle->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET);
377 EtsString *s = EtsString::AllocateNonInitializedString(length, compress);
378 EtsClass *sKlass = EtsClass::FromRuntimeClass(s->GetCoreType()->ClassAddr<Class>());
379 auto *buf = reinterpret_cast<EtsObjectArray *>(sbHandle->GetFieldObject(SB_BUFFER_OFFSET));
380 if (compress) {
381 ReconstructStringAsMUtf8(s, buf, index, length, sKlass);
382 } else {
383 ReconstructStringAsUtf16(s, buf, index, length, sKlass);
384 }
385 return s;
386 }
387
388 } // namespace ark::ets
389