• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "libpandabase/utils/utils.h"
17 #include "libpandabase/utils/utf.h"
18 #include "runtime/arch/memory_helpers.h"
19 #include "runtime/include/runtime.h"
20 #include "plugins/ets/runtime/ets_coroutine.h"
21 #include "plugins/ets/runtime/ets_handle.h"
22 #include "plugins/ets/runtime/ets_handle_scope.h"
23 #include "plugins/ets/runtime/types/ets_primitives.h"
24 #include "plugins/ets/runtime/types/ets_string.h"
25 #include "plugins/ets/runtime/types/ets_array.h"
26 #include "plugins/ets/runtime/types/ets_string_builder.h"
27 #include "plugins/ets/runtime/intrinsics/helpers/ets_intrinsics_helpers.h"
28 #include "plugins/ets/runtime/intrinsics/helpers/ets_to_string_cache.h"
29 #include <cstdint>
30 #include <cmath>
31 
32 namespace ark::ets {
33 
34 /// StringBuilder fields offsets
35 static constexpr uint32_t SB_BUFFER_OFFSET = ark::ObjectHeader::ObjectHeaderSize();
36 static constexpr uint32_t SB_INDEX_OFFSET = SB_BUFFER_OFFSET + ark::OBJECT_POINTER_SIZE;
37 static constexpr uint32_t SB_LENGTH_OFFSET = SB_INDEX_OFFSET + sizeof(int32_t);
38 static constexpr uint32_t SB_COMPRESS_OFFSET = SB_LENGTH_OFFSET + sizeof(int32_t);
39 
40 /// "null", "true" and "false" packed to integral types
41 static constexpr uint64_t NULL_CODE = 0x006C006C0075006E;
42 static constexpr uint64_t TRUE_CODE = 0x0065007500720074;
43 static constexpr uint64_t FALS_CODE = 0x0073006c00610066;
44 static constexpr uint16_t E_CODE = 0x0065;
45 
46 static_assert(std::is_same_v<EtsBoolean, uint8_t>);
47 static_assert(std::is_same_v<EtsChar, uint16_t> &&
48               std::is_same_v<EtsCharArray, EtsPrimitiveArray<EtsChar, EtsClassRoot::CHAR_ARRAY>>);
49 
50 // The following implementation is based on ObjectHeader::ShallowCopy
ReallocateBuffer(EtsHandle<EtsObjectArray> & bufHandle)51 static EtsObjectArray *ReallocateBuffer(EtsHandle<EtsObjectArray> &bufHandle)
52 {
53     uint32_t bufLen = bufHandle->GetLength();
54     ASSERT(bufLen < (UINT_MAX >> 1U));
55     // Allocate the new buffer - may trigger GC
56     auto *newBuf = EtsObjectArray::Create(bufHandle->GetClass(), 2 * bufLen);
57     ASSERT(newBuf != nullptr);
58     // Copy the old buffer data
59     bufHandle->CopyDataTo(newBuf);
60     EVENT_SB_BUFFER_REALLOC(ManagedThread::GetCurrent()->GetId(), newBuf, newBuf->GetLength(), newBuf->GetElementSize(),
61                             newBuf->ObjectSize());
62     return newBuf;
63 }
64 
65 // A string representations of nullptr, bool, short, int, long, float and double
66 // do not contain uncompressable chars. So we may skip 'compress' check in these cases.
67 template <bool CHECK_IF_COMPRESSABLE = true>
AppendCharArrayToBuffer(VMHandle<EtsObject> & sbHandle,EtsCharArray * arr)68 ObjectHeader *AppendCharArrayToBuffer(VMHandle<EtsObject> &sbHandle, EtsCharArray *arr)
69 {
70     auto *sb = sbHandle.GetPtr();
71     auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
72     auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
73     auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
74 
75     // Check the case of the buf overflow
76     uint32_t bufLen = buf->GetLength();
77     if (index >= bufLen) {
78         auto *coroutine = EtsCoroutine::GetCurrent();
79         [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
80         EtsHandle<EtsCharArray> arrHandle(coroutine, arr);
81         EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
82         // May trigger GC
83         buf = ReallocateBuffer(bufHandle);
84         // Update sb and arr as corresponding objects might be moved by GC
85         sb = sbHandle.GetPtr();
86         arr = arrHandle.GetPtr();
87         // Remember the new buffer
88         sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<EtsObject *>(buf));
89     }
90 
91     // Append array to the buf
92     buf->Set(index, reinterpret_cast<EtsObject *>(arr));
93     // Increment the index
94     sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
95     // Increase the length
96     // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
97     sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + arr->GetLength());
98     // If string compression is disabled in the runtime, then set 'StringBuilder.compress' to 'false',
99     // as by default 'StringBuilder.compress' is 'true'.
100     if (!Runtime::GetCurrent()->GetOptions().IsRuntimeCompressedStringsEnabled()) {
101         if (sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET)) {
102             sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
103         }
104     } else if (CHECK_IF_COMPRESSABLE && sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET)) {
105         // Set the compress field to false if the array contains not compressable chars
106         auto n = arr->GetLength();
107         for (uint32_t i = 0; i < n; ++i) {
108             if (!ark::coretypes::String::IsASCIICharacter(arr->Get(i))) {
109                 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
110                 break;
111             }
112         }
113     }
114     return sb->GetCoreType();
115 }
116 
ReconstructStringAsMUtf8(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)117 static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
118                                      EtsClass *stringKlass)
119 {
120     // All strings in the buf are MUtf8
121     uint8_t *dstData = dstString->GetDataMUtf8();
122     for (uint32_t i = 0; i < index; ++i) {
123         EtsObject *obj = buffer->Get(i);
124         if (obj->IsInstanceOf(stringKlass)) {
125             coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
126             uint32_t n = srcString->CopyDataRegionMUtf8(dstData, 0, srcString->GetLength(), length);
127             dstData += n;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
128             length -= n;
129         } else {
130             // obj is an array of chars
131             coretypes::Array *srcArray = reinterpret_cast<EtsArray *>(obj)->GetCoreType();
132             uint32_t n = srcArray->GetLength();
133             for (uint32_t j = 0; j < n; ++j) {
134                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
135                 dstData[j] = srcArray->GetPrimitive<uint16_t>(sizeof(uint16_t) * j);
136             }
137             dstData += n;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138             length -= n;
139         }
140     }
141 }
142 
ReconstructStringAsUtf16(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)143 static void ReconstructStringAsUtf16(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
144                                      EtsClass *stringKlass)
145 {
146     // Some strings in the buf are Utf16
147     uint16_t *dstData = dstString->GetDataUtf16();
148     for (uint32_t i = 0; i < index; ++i) {
149         EtsObject *obj = buffer->Get(i);
150         if (obj->IsInstanceOf(stringKlass)) {
151             coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
152             uint32_t n = srcString->CopyDataRegionUtf16(dstData, 0, srcString->GetLength(), length);
153             dstData += n;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
154             length -= n;
155         } else {
156             // obj is an array of chars
157             coretypes::Array *srcArray = reinterpret_cast<EtsCharArray *>(obj)->GetCoreType();
158             auto *srcData = reinterpret_cast<EtsChar *>(srcArray->GetData());
159             uint32_t n = srcArray->GetLength();
160             ASSERT(IsAligned(ToUintPtr(srcData), sizeof(uint64_t)));
161             auto bytes = n << 1UL;
162             // equals to 2^(k + 1) when n is 2^k AND dst is aligned by 2^(k + 1)
163             auto bytesAndAligned = bytes | (ToUintPtr(dstData) & (bytes - 1));
164             switch (bytesAndAligned) {
165                 case 2U:  // 2 bytes
166                     *dstData = *reinterpret_cast<EtsChar *>(srcData);
167                     break;
168                 case 4U:  // 4 bytes
169                     *reinterpret_cast<uint32_t *>(dstData) = *reinterpret_cast<uint32_t *>(srcData);
170                     break;
171                 case 8U:  // 8 bytes
172                     *reinterpret_cast<uint64_t *>(dstData) = *reinterpret_cast<uint64_t *>(srcData);
173                     break;
174                 default:
175                     std::copy_n(srcData, n, dstData);
176             }
177             dstData += n;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178             length -= n;
179         }
180     }
181 }
182 
NullToCharArray()183 static inline EtsCharArray *NullToCharArray()
184 {
185     EtsCharArray *arr = EtsCharArray::Create(std::char_traits<char>::length("null"));
186     *reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>()) = NULL_CODE;
187     return arr;
188 }
189 
BoolToCharArray(EtsBoolean v)190 static inline EtsCharArray *BoolToCharArray(EtsBoolean v)
191 {
192     auto arrLen = v != 0U ? std::char_traits<char>::length("true") : std::char_traits<char>::length("false");
193     EtsCharArray *arr = EtsCharArray::Create(arrLen);
194     auto *data = reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>());
195     if (v != 0U) {
196         *data = TRUE_CODE;
197     } else {
198         *data = FALS_CODE;
199         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
200         *reinterpret_cast<EtsChar *>(data + 1) = E_CODE;
201     }
202     return arr;
203 }
204 
CharToCharArray(EtsChar v)205 static inline EtsCharArray *CharToCharArray(EtsChar v)
206 {
207     EtsCharArray *arr = EtsCharArray::Create(1U);
208     *(reinterpret_cast<EtsChar *>(arr->GetData<EtsChar>())) = v;
209     return arr;
210 }
211 
StringBuilderAppendNullString(ObjectHeader * sb)212 ObjectHeader *StringBuilderAppendNullString(ObjectHeader *sb)
213 {
214     ASSERT(sb != nullptr);
215     auto *coroutine = EtsCoroutine::GetCurrent();
216     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
217     VMHandle<EtsObject> sbHandle(coroutine, sb);
218     // May trigger GC
219     EtsCharArray *arr = NullToCharArray();
220     return AppendCharArrayToBuffer<false>(sbHandle, arr);
221 }
222 
223 /**
224  * Implementation of public native append(s: String): StringBuilder.
225  * Inserts the string 's' into a free buffer slot:
226  *
227  *    buf[index] = s;
228  *    index++;
229  *    length += s.length
230  *    compress &= s.IsMUtf8()
231  *
232  * In case of the buf overflow, we create a new buffer of a larger size
233  * and copy the data from the old buffer.
234  */
StringBuilderAppendString(ObjectHeader * sb,EtsString * str)235 ObjectHeader *StringBuilderAppendString(ObjectHeader *sb, EtsString *str)
236 {
237     ASSERT(sb != nullptr);
238 
239     if (str == nullptr) {
240         return StringBuilderAppendNullString(sb);
241     }
242     if (str->GetLength() == 0) {
243         return sb;
244     }
245 
246     auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
247     auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
248     // Check buf overflow
249     if (index >= buf->GetLength()) {
250         auto *coroutine = EtsCoroutine::GetCurrent();
251         [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
252         VMHandle<EtsObject> sbHandle(coroutine, sb);
253         EtsHandle<EtsString> strHandle(coroutine, str);
254         EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
255         // May trigger GC
256         buf = ReallocateBuffer(bufHandle);
257         // Update sb and s as corresponding objects might be moved by GC
258         sb = sbHandle->GetCoreType();
259         str = strHandle.GetPtr();
260         // Remember the new buffer
261         sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<ObjectHeader *>(buf));
262     }
263     // Append string to the buf
264     // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
265     buf->Set(index, reinterpret_cast<EtsObject *>(str));
266     // Increment the index
267     sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
268     // Increase the length
269     auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
270     sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + str->GetLength());
271     // Set the compress field to false if the string is not compressable
272     if (sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET) && str->IsUtf16()) {
273         sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
274     }
275 
276     return sb;
277 }
278 
StringBuilderAppendChar(ObjectHeader * sb,EtsChar v)279 ObjectHeader *StringBuilderAppendChar(ObjectHeader *sb, EtsChar v)
280 {
281     ASSERT(sb != nullptr);
282 
283     auto *coroutine = EtsCoroutine::GetCurrent();
284     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
285     VMHandle<EtsObject> sbHandle(coroutine, sb);
286 
287     // May trigger GC
288     auto *arr = CharToCharArray(v);
289     return AppendCharArrayToBuffer(sbHandle, arr);
290 }
291 
StringBuilderAppendBool(ObjectHeader * sb,EtsBoolean v)292 ObjectHeader *StringBuilderAppendBool(ObjectHeader *sb, EtsBoolean v)
293 {
294     ASSERT(sb != nullptr);
295 
296     auto *coroutine = EtsCoroutine::GetCurrent();
297     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
298     VMHandle<EtsObject> sbHandle(coroutine, sb);
299 
300     // May trigger GC
301     auto *arr = BoolToCharArray(v);
302     return AppendCharArrayToBuffer<false>(sbHandle, arr);
303 }
304 
StringBuilderAppendLong(ObjectHeader * sb,EtsLong v)305 ObjectHeader *StringBuilderAppendLong(ObjectHeader *sb, EtsLong v)
306 {
307     ASSERT(sb != nullptr);
308 
309     auto *coroutine = EtsCoroutine::GetCurrent();
310     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
311     VMHandle<EtsObject> sbHandle(coroutine, sb);
312 
313     // May trigger GC
314     auto *cache = PandaEtsVM::GetCurrent()->GetLongToStringCache();
315     ASSERT(cache != nullptr);
316     auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
317     return StringBuilderAppendString(sbHandle->GetCoreType(), str);
318 }
319 
320 template <typename FpType, std::enable_if_t<std::is_floating_point_v<FpType>, bool> = true>
FloatingPointToCharArray(FpType number)321 static inline EtsCharArray *FloatingPointToCharArray(FpType number)
322 {
323     return intrinsics::helpers::FpToStringDecimalRadix(number, [](std::string_view str) {
324         auto *arr = EtsCharArray::Create(str.length());
325         Span<uint16_t> data(reinterpret_cast<uint16_t *>(arr->GetData<EtsChar>()), str.length());
326         for (size_t i = 0; i < str.length(); ++i) {
327             ASSERT(ark::coretypes::String::IsASCIICharacter(str[i]));
328             data[i] = static_cast<uint16_t>(str[i]);
329         }
330         return arr;
331     });
332 }
333 
StringBuilderAppendFloat(ObjectHeader * sb,EtsFloat v)334 ObjectHeader *StringBuilderAppendFloat(ObjectHeader *sb, EtsFloat v)
335 {
336     ASSERT(sb != nullptr);
337 
338     auto *coroutine = EtsCoroutine::GetCurrent();
339     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
340     VMHandle<EtsObject> sbHandle(coroutine, sb);
341 
342     auto *cache = PandaEtsVM::GetCurrent()->GetFloatToStringCache();
343     ASSERT(cache != nullptr);
344     auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
345     return StringBuilderAppendString(sbHandle->GetCoreType(), str);
346 }
347 
StringBuilderAppendDouble(ObjectHeader * sb,EtsDouble v)348 ObjectHeader *StringBuilderAppendDouble(ObjectHeader *sb, EtsDouble v)
349 {
350     ASSERT(sb != nullptr);
351 
352     auto *coroutine = EtsCoroutine::GetCurrent();
353     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
354     VMHandle<EtsObject> sbHandle(coroutine, sb);
355 
356     auto *cache = PandaEtsVM::GetCurrent()->GetDoubleToStringCache();
357     ASSERT(cache != nullptr);
358     auto *str = cache->GetOrCache(EtsCoroutine::GetCurrent(), v);
359     return StringBuilderAppendString(sbHandle->GetCoreType(), str);
360 }
361 
StringBuilderToString(ObjectHeader * sb)362 EtsString *StringBuilderToString(ObjectHeader *sb)
363 {
364     ASSERT(sb != nullptr);
365 
366     auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
367     if (length == 0) {
368         return EtsString::CreateNewEmptyString();
369     }
370 
371     auto *coroutine = EtsCoroutine::GetCurrent();
372     [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
373     VMHandle<EtsObject> sbHandle(coroutine, sb);
374 
375     auto index = sbHandle->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
376     auto compress = sbHandle->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET);
377     EtsString *s = EtsString::AllocateNonInitializedString(length, compress);
378     EtsClass *sKlass = EtsClass::FromRuntimeClass(s->GetCoreType()->ClassAddr<Class>());
379     auto *buf = reinterpret_cast<EtsObjectArray *>(sbHandle->GetFieldObject(SB_BUFFER_OFFSET));
380     if (compress) {
381         ReconstructStringAsMUtf8(s, buf, index, length, sKlass);
382     } else {
383         ReconstructStringAsUtf16(s, buf, index, length, sKlass);
384     }
385     return s;
386 }
387 
388 }  // namespace ark::ets
389