1 /**
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "libpandabase/utils/utils.h"
17 #include "libpandabase/utils/utf.h"
18 #include "runtime/arch/memory_helpers.h"
19 #include "plugins/ets/runtime/ets_coroutine.h"
20 #include "plugins/ets/runtime/ets_handle.h"
21 #include "plugins/ets/runtime/ets_handle_scope.h"
22 #include "plugins/ets/runtime/types/ets_primitives.h"
23 #include "plugins/ets/runtime/types/ets_string.h"
24 #include "plugins/ets/runtime/types/ets_array.h"
25 #include "plugins/ets/runtime/types/ets_string_builder.h"
26 #include <cstdint>
27
28 namespace panda::ets {
29
30 /// StringBuilder fields offsets
31 static constexpr uint32_t SB_BUFFER_OFFSET = panda::ObjectHeader::ObjectHeaderSize();
32 static constexpr uint32_t SB_INDEX_OFFSET = SB_BUFFER_OFFSET + panda::OBJECT_POINTER_SIZE;
33 static constexpr uint32_t SB_LENGTH_OFFSET = SB_INDEX_OFFSET + sizeof(int32_t);
34 static constexpr uint32_t SB_COMPRESS_OFFSET = SB_LENGTH_OFFSET + sizeof(int32_t);
35
36 /// "null", "true" and "false" packed to integral types
37 static constexpr uint64_t NULL_CODE = 0x006C006C0075006E;
38 static constexpr uint64_t TRUE_CODE = 0x0065007500720074;
39 static constexpr uint64_t FALS_CODE = 0x0073006c00610066;
40 static constexpr uint16_t E_CODE = 0x0065;
41
42 static_assert(std::is_same_v<EtsBoolean, uint8_t>);
43 static_assert(std::is_same_v<EtsChar, uint16_t> &&
44 std::is_same_v<EtsCharArray, EtsPrimitiveArray<EtsChar, EtsClassRoot::CHAR_ARRAY>>);
45
46 // The following implementation is based on ObjectHeader::ShallowCopy
ReallocateBuffer(EtsHandle<EtsObjectArray> & bufHandle)47 static EtsObjectArray *ReallocateBuffer(EtsHandle<EtsObjectArray> &bufHandle)
48 {
49 uint32_t bufLen = bufHandle->GetLength();
50 ASSERT(bufLen < (UINT_MAX >> 1U));
51 // Allocate the new buffer - may trigger GC
52 auto *newBuf = EtsObjectArray::Create(bufHandle->GetClass(), 2 * bufLen);
53 // Copy the old buffer data
54 bufHandle->CopyDataTo(newBuf);
55 return newBuf;
56 }
57
AppendCharArrayToBuffer(VMHandle<EtsObject> & sbHandle,EtsCharArray * arr)58 static ObjectHeader *AppendCharArrayToBuffer(VMHandle<EtsObject> &sbHandle, EtsCharArray *arr)
59 {
60 auto *sb = sbHandle.GetPtr();
61 auto compress = sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET);
62 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
63 auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
64 auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
65
66 // Check the case of the buf overflow
67 uint32_t bufLen = buf->GetLength();
68 if (index >= bufLen) {
69 auto *coroutine = EtsCoroutine::GetCurrent();
70 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
71 EtsHandle<EtsCharArray> arrHandle(coroutine, arr);
72 EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
73 // May trigger GC
74 buf = ReallocateBuffer(bufHandle);
75 // Update sb and arr as corresponding objects might be moved by GC
76 sb = sbHandle.GetPtr();
77 arr = arrHandle.GetPtr();
78 // Remember the new buffer
79 sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<EtsObject *>(buf));
80 }
81
82 // Append array to the buf
83 buf->Set(index, reinterpret_cast<EtsObject *>(arr));
84 // Increment the index
85 sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
86 // Increase the length
87 // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
88 sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + arr->GetLength());
89 // Set the compress field to false if the array contains not compressable chars
90 if (compress) {
91 auto n = arr->GetLength();
92 for (uint32_t i = 0; i < n; ++i) {
93 if (!panda::coretypes::String::IsASCIICharacter(arr->Get(i))) {
94 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
95 break;
96 }
97 }
98 }
99 return sb->GetCoreType();
100 }
101
ReconstructStringAsMUtf8(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)102 static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
103 EtsClass *stringKlass)
104 {
105 // All strings in the buf are MUtf8
106 uint8_t *dstData = dstString->GetDataMUtf8();
107 for (uint32_t i = 0; i < index; ++i) {
108 EtsObject *obj = buffer->Get(i);
109 if (obj->IsInstanceOf(stringKlass)) {
110 coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
111 uint32_t n = srcString->CopyDataRegionMUtf8(dstData, 0, srcString->GetLength(), length);
112 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
113 length -= n;
114 } else {
115 // obj is an array of chars
116 coretypes::Array *srcArray = reinterpret_cast<EtsArray *>(obj)->GetCoreType();
117 uint32_t n = srcArray->GetLength();
118 for (uint32_t j = 0; j < n; ++j) {
119 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
120 dstData[j] = srcArray->GetPrimitive<uint16_t>(sizeof(uint16_t) * j);
121 }
122 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
123 length -= n;
124 }
125 }
126 }
127
ReconstructStringAsUtf16(EtsString * dstString,EtsObjectArray * buffer,uint32_t index,uint32_t length,EtsClass * stringKlass)128 static void ReconstructStringAsUtf16(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length,
129 EtsClass *stringKlass)
130 {
131 // Some strings in the buf are Utf16
132 uint16_t *dstData = dstString->GetDataUtf16();
133 for (uint32_t i = 0; i < index; ++i) {
134 EtsObject *obj = buffer->Get(i);
135 if (obj->IsInstanceOf(stringKlass)) {
136 coretypes::String *srcString = reinterpret_cast<EtsString *>(obj)->GetCoreType();
137 uint32_t n = srcString->CopyDataRegionUtf16(dstData, 0, srcString->GetLength(), length);
138 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
139 length -= n;
140 } else {
141 // obj is an array of chars
142 coretypes::Array *srcArray = reinterpret_cast<EtsCharArray *>(obj)->GetCoreType();
143 auto *srcData = reinterpret_cast<EtsChar *>(srcArray->GetData());
144 uint32_t n = srcArray->GetLength();
145 switch (n) {
146 case 1U: // 2 bytes
147 *dstData = *reinterpret_cast<EtsChar *>(srcData);
148 break;
149 case 2U: // 4 bytes
150 *reinterpret_cast<uint32_t *>(dstData) = *reinterpret_cast<uint32_t *>(srcData);
151 break;
152 case 4U: // 8 bytes
153 *reinterpret_cast<uint64_t *>(dstData) = *reinterpret_cast<uint64_t *>(srcData);
154 break;
155 default:
156 memcpy_s(static_cast<void *>(dstData), n << 1UL, static_cast<void *>(srcData), n << 1UL);
157 }
158 dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
159 length -= n;
160 }
161 }
162 }
163
NullToCharArray()164 static inline EtsCharArray *NullToCharArray()
165 {
166 EtsCharArray *arr = EtsCharArray::Create(std::char_traits<char>::length("null"));
167 *reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>()) = NULL_CODE;
168 return arr;
169 }
170
BoolToCharArray(EtsBoolean v)171 static inline EtsCharArray *BoolToCharArray(EtsBoolean v)
172 {
173 auto arrLen = v != 0U ? std::char_traits<char>::length("true") : std::char_traits<char>::length("false");
174 EtsCharArray *arr = EtsCharArray::Create(arrLen);
175 auto *data = reinterpret_cast<uint64_t *>(arr->GetData<EtsChar>());
176 if (v != 0U) {
177 *data = TRUE_CODE;
178 } else {
179 *data = FALS_CODE;
180 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
181 *reinterpret_cast<EtsChar *>(data + 1) = E_CODE;
182 }
183 return arr;
184 }
185
CharToCharArray(EtsChar v)186 static inline EtsCharArray *CharToCharArray(EtsChar v)
187 {
188 EtsCharArray *arr = EtsCharArray::Create(1U);
189 *(reinterpret_cast<EtsChar *>(arr->GetData<EtsChar>())) = v;
190 return arr;
191 }
192
LongToCharArray(EtsLong v)193 static inline EtsCharArray *LongToCharArray(EtsLong v)
194 {
195 auto sign = static_cast<uint32_t>(v < 0);
196 auto nDigits = CountDigits(std::abs(v)) + sign;
197 EtsCharArray *arr = EtsCharArray::Create(nDigits);
198 auto *arrData = reinterpret_cast<EtsChar *>(arr->GetData<EtsChar>());
199 utf::UInt64ToUtf16Array(std::abs(v), arrData, nDigits, sign != 0U);
200 return arr;
201 }
202
203 /**
204 * Implementation of public native append(s: String): StringBuilder.
205 * Inserts the string 's' into a free buffer slot:
206 *
207 * buf[index] = s;
208 * index++;
209 * length += s.length
210 * compress &= s.IsMUtf8()
211 *
212 * In case of the buf overflow, we create a new buffer of a larger size
213 * and copy the data from the old buffer.
214 */
StringBuilderAppendString(ObjectHeader * sb,EtsString * str)215 ObjectHeader *StringBuilderAppendString(ObjectHeader *sb, EtsString *str)
216 {
217 ASSERT(sb != nullptr);
218
219 if (str == nullptr) {
220 auto *coroutine = EtsCoroutine::GetCurrent();
221 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
222 VMHandle<EtsObject> sbHandle(coroutine, sb);
223 // May trigger GC
224 EtsCharArray *arr = NullToCharArray();
225 return AppendCharArrayToBuffer(sbHandle, arr);
226 }
227 if (str->GetLength() == 0) {
228 return sb;
229 }
230
231 auto index = sb->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
232 auto *buf = reinterpret_cast<EtsObjectArray *>(sb->GetFieldObject(SB_BUFFER_OFFSET));
233 // Check buf overflow
234 if (index >= buf->GetLength()) {
235 auto *coroutine = EtsCoroutine::GetCurrent();
236 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
237 VMHandle<EtsObject> sbHandle(coroutine, sb);
238 EtsHandle<EtsString> strHandle(coroutine, str);
239 EtsHandle<EtsObjectArray> bufHandle(coroutine, buf);
240 // May trigger GC
241 buf = ReallocateBuffer(bufHandle);
242 // Update sb and s as corresponding objects might be moved by GC
243 sb = sbHandle->GetCoreType();
244 str = strHandle.GetPtr();
245 // Remember the new buffer
246 sb->SetFieldObject(SB_BUFFER_OFFSET, reinterpret_cast<ObjectHeader *>(buf));
247 }
248 // Append string to the buf
249 // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
250 buf->Set(index, reinterpret_cast<EtsObject *>(str));
251 // Increment the index
252 sb->SetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET, index + 1U);
253 // Increase the length
254 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
255 sb->SetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET, length + str->GetLength());
256 // Set the compress field to false if the string is not compressable
257 if (sb->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET) && str->IsUtf16()) {
258 sb->SetFieldPrimitive<bool>(SB_COMPRESS_OFFSET, false);
259 }
260
261 return sb;
262 }
263
StringBuilderAppendBool(ObjectHeader * sb,EtsBoolean v)264 ObjectHeader *StringBuilderAppendBool(ObjectHeader *sb, EtsBoolean v)
265 {
266 ASSERT(sb != nullptr);
267
268 auto *coroutine = EtsCoroutine::GetCurrent();
269 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
270 VMHandle<EtsObject> sbHandle(coroutine, sb);
271
272 // May trigger GC
273 auto *arr = BoolToCharArray(v);
274 return AppendCharArrayToBuffer(sbHandle, arr);
275 }
276
StringBuilderAppendChar(ObjectHeader * sb,EtsChar v)277 ObjectHeader *StringBuilderAppendChar(ObjectHeader *sb, EtsChar v)
278 {
279 ASSERT(sb != nullptr);
280
281 auto *coroutine = EtsCoroutine::GetCurrent();
282 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
283 VMHandle<EtsObject> sbHandle(coroutine, sb);
284
285 // May trigger GC
286 auto *arr = CharToCharArray(v);
287 return AppendCharArrayToBuffer(sbHandle, arr);
288 }
289
StringBuilderAppendLong(ObjectHeader * sb,EtsLong v)290 ObjectHeader *StringBuilderAppendLong(ObjectHeader *sb, EtsLong v)
291 {
292 ASSERT(sb != nullptr);
293
294 auto *coroutine = EtsCoroutine::GetCurrent();
295 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
296 VMHandle<EtsObject> sbHandle(coroutine, sb);
297
298 // May trigger GC
299 auto *arr = LongToCharArray(v);
300 return AppendCharArrayToBuffer(sbHandle, arr);
301 }
302
StringBuilderToString(ObjectHeader * sb)303 EtsString *StringBuilderToString(ObjectHeader *sb)
304 {
305 ASSERT(sb != nullptr);
306
307 auto length = sb->GetFieldPrimitive<uint32_t>(SB_LENGTH_OFFSET);
308 if (length == 0) {
309 return EtsString::CreateNewEmptyString();
310 }
311
312 auto *coroutine = EtsCoroutine::GetCurrent();
313 [[maybe_unused]] HandleScope<ObjectHeader *> scope(coroutine);
314 VMHandle<EtsObject> sbHandle(coroutine, sb);
315
316 auto index = sbHandle->GetFieldPrimitive<uint32_t>(SB_INDEX_OFFSET);
317 auto compress = sbHandle->GetFieldPrimitive<bool>(SB_COMPRESS_OFFSET);
318 EtsString *s = EtsString::AllocateNonInitializedString(length, compress);
319 EtsClass *sKlass = EtsClass::FromRuntimeClass(s->GetCoreType()->ClassAddr<Class>());
320 auto *buf = reinterpret_cast<EtsObjectArray *>(sbHandle->GetFieldObject(SB_BUFFER_OFFSET));
321 if (compress) {
322 ReconstructStringAsMUtf8(s, buf, index, length, sKlass);
323 } else {
324 ReconstructStringAsUtf16(s, buf, index, length, sKlass);
325 }
326 return s;
327 }
328
329 } // namespace panda::ets
330