1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef ECMASCRIPT_STRING_INL_H
17 #define ECMASCRIPT_STRING_INL_H
18
19 #include "ecmascript/ecma_string.h"
20 #include "ecmascript/base/string_helper.h"
21 #include "ecmascript/ecma_vm.h"
22 #include "ecmascript/js_handle.h"
23 #include "ecmascript/js_tagged_value-inl.h"
24 #include "ecmascript/object_factory-inl.h"
25
26 namespace panda::ecmascript {
27 /* static */
CreateEmptyString(const EcmaVM * vm)28 inline EcmaString *EcmaString::CreateEmptyString(const EcmaVM *vm)
29 {
30 auto string = vm->GetFactory()->AllocNonMovableLineStringObject(EcmaString::SIZE);
31 string->SetLength(0, true);
32 string->SetRawHashcode(0);
33 return string;
34 }
35
36 /* static */
CreateFromUtf8(const EcmaVM * vm,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress,MemSpaceType type,bool isConstantString,uint32_t idOffset)37 inline EcmaString *EcmaString::CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
38 bool canBeCompress, MemSpaceType type, bool isConstantString,
39 uint32_t idOffset)
40 {
41 if (utf8Len == 0) {
42 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
43 }
44 EcmaString *string = nullptr;
45 if (canBeCompress) {
46 if (isConstantString) {
47 string = CreateConstantString(vm, utf8Data, utf8Len, canBeCompress, type, idOffset);
48 } else {
49 string = CreateLineStringWithSpaceType(vm, utf8Len, true, type);
50 ASSERT(string != nullptr);
51
52 if (memcpy_s(string->GetDataUtf8Writable(), utf8Len, utf8Data, utf8Len) != EOK) {
53 LOG_FULL(FATAL) << "memcpy_s failed";
54 UNREACHABLE();
55 }
56 }
57 } else {
58 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
59 string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
60 ASSERT(string != nullptr);
61
62 [[maybe_unused]] auto len =
63 base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
64 ASSERT(len == utf16Len);
65 }
66
67 ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
68 return string;
69 }
70
CreateFromUtf16(const EcmaVM * vm,const uint16_t * utf16Data,uint32_t utf16Len,bool canBeCompress,MemSpaceType type)71 inline EcmaString *EcmaString::CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
72 bool canBeCompress, MemSpaceType type)
73 {
74 if (utf16Len == 0) {
75 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
76 }
77 auto string = CreateLineStringWithSpaceType(vm, utf16Len, canBeCompress, type);
78 ASSERT(string != nullptr);
79
80 if (canBeCompress) {
81 CopyChars(string->GetDataUtf8Writable(), utf16Data, utf16Len);
82 } else {
83 uint32_t len = utf16Len * (sizeof(uint16_t) / sizeof(uint8_t));
84 if (memcpy_s(string->GetDataUtf16Writable(), len, utf16Data, len) != EOK) {
85 LOG_FULL(FATAL) << "memcpy_s failed";
86 UNREACHABLE();
87 }
88 }
89
90 ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
91 return string;
92 }
93
94 /* static */
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)95 inline EcmaString *EcmaString::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
96 {
97 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
98 auto string = vm->GetFactory()->AllocLineStringObject(size);
99 string->SetLength(length, compressed);
100 string->SetRawHashcode(0);
101 return string;
102 }
103
104 /* static */
CreateLineStringNoGC(const EcmaVM * vm,size_t length,bool compressed)105 inline EcmaString *EcmaString::CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed)
106 {
107 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
108 size = AlignUp(size, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT));
109 auto object = reinterpret_cast<TaggedObject *>(vm->GetHeap()->GetOldSpace()->Allocate(size, false));
110 object->SetClass(JSHClass::Cast(vm->GetJSThread()->GlobalConstants()->GetLineStringClass().GetTaggedObject()));
111 auto string = EcmaString::Cast(object);
112 string->SetLength(length, compressed);
113 string->SetRawHashcode(0);
114 return string;
115 }
116
117 /* static */
CreateLineStringWithSpaceType(const EcmaVM * vm,size_t length,bool compressed,MemSpaceType type)118 inline EcmaString *EcmaString::CreateLineStringWithSpaceType(const EcmaVM *vm, size_t length, bool compressed,
119 MemSpaceType type)
120 {
121 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
122 EcmaString *string = nullptr;
123 switch (type) {
124 case MemSpaceType::SEMI_SPACE:
125 string = vm->GetFactory()->AllocLineStringObject(size);
126 break;
127 case MemSpaceType::OLD_SPACE:
128 string = vm->GetFactory()->AllocOldSpaceLineStringObject(size);
129 break;
130 case MemSpaceType::NON_MOVABLE:
131 string = vm->GetFactory()->AllocNonMovableLineStringObject(size);
132 break;
133 default:
134 LOG_ECMA(FATAL) << "this branch is unreachable";
135 UNREACHABLE();
136 }
137 string->SetLength(length, compressed);
138 string->SetRawHashcode(0);
139 return string;
140 }
141
CreateConstantString(const EcmaVM * vm,const uint8_t * utf8Data,size_t length,bool compressed,MemSpaceType type,uint32_t idOffset)142 inline EcmaString *EcmaString::CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
143 size_t length, bool compressed, MemSpaceType type, uint32_t idOffset)
144 {
145 auto string = ConstantString::Cast(vm->GetFactory()->AllocConstantStringObject(type));
146 string->SetLength(length, compressed);
147 string->SetRawHashcode(0);
148 string->SetConstantData(const_cast<uint8_t *>(utf8Data));
149 // The string might be serialized, the const data will be replaced by index in the panda file.
150 string->SetEntityId(idOffset);
151 return string;
152 }
153
CreateTreeString(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t length,bool compressed)154 inline EcmaString *EcmaString::CreateTreeString(const EcmaVM *vm,
155 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed)
156 {
157 ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length);
158 auto thread = vm->GetJSThread();
159 auto string = TreeEcmaString::Cast(vm->GetFactory()->AllocTreeStringObject());
160 string->SetLength(length, compressed);
161 string->SetRawHashcode(0);
162 string->SetFirst(thread, left.GetTaggedValue());
163 string->SetSecond(thread, right.GetTaggedValue());
164 return string;
165 }
166
167 /* static */
FastSubUtf8String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)168 EcmaString *EcmaString::FastSubUtf8String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
169 uint32_t length)
170 {
171 ASSERT(src->IsLineOrConstantString());
172 auto string = CreateLineString(vm, length, true);
173 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
174 Span<uint8_t> dst(string->GetDataUtf8Writable(), length);
175 Span<const uint8_t> source(src->GetDataUtf8() + start, length);
176 EcmaString::MemCopyChars(dst, length, source, length);
177
178 ASSERT_PRINT(CanBeCompressed(string), "canBeCompresse does not match the real value!");
179 return string;
180 }
181
182 /* static */
FastSubUtf16String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)183 EcmaString *EcmaString::FastSubUtf16String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
184 uint32_t length)
185 {
186 ASSERT(src->IsLineOrConstantString());
187 bool canBeCompressed = CanBeCompressed(src->GetDataUtf16() + start, length);
188 auto string = CreateLineString(vm, length, canBeCompressed);
189 if (canBeCompressed) {
190 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
191 CopyChars(string->GetDataUtf8Writable(), src->GetDataUtf16() + start, length);
192 } else {
193 uint32_t len = length * (sizeof(uint16_t) / sizeof(uint8_t));
194 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
195 Span<uint16_t> dst(string->GetDataUtf16Writable(), length);
196 Span<const uint16_t> source(src->GetDataUtf16() + start, length);
197 EcmaString::MemCopyChars(dst, len, source, len);
198 }
199 ASSERT_PRINT(canBeCompressed == CanBeCompressed(string), "canBeCompresse does not match the real value!");
200 return string;
201 }
202
GetData()203 inline uint16_t *EcmaString::GetData() const
204 {
205 ASSERT_PRINT(IsLineString(), "EcmaString: Read data from not LineString");
206 return LineEcmaString::Cast(this)->GetData();
207 }
208
GetDataUtf8()209 inline const uint8_t *EcmaString::GetDataUtf8() const
210 {
211 ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
212 if (IsConstantString()) {
213 return ConstantString::Cast(this)->GetConstantData();
214 }
215 return reinterpret_cast<uint8_t *>(GetData());
216 }
217
GetDataUtf16()218 inline const uint16_t *EcmaString::GetDataUtf16() const
219 {
220 LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
221 return GetData();
222 }
223
GetDataUtf8Writable()224 inline uint8_t *EcmaString::GetDataUtf8Writable()
225 {
226 ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
227 if (IsConstantString()) {
228 return ConstantString::Cast(this)->GetConstantData();
229 }
230 return reinterpret_cast<uint8_t *>(GetData());
231 }
232
GetDataUtf16Writable()233 inline uint16_t *EcmaString::GetDataUtf16Writable()
234 {
235 LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
236 return GetData();
237 }
238
GetUtf8Length(bool modify)239 inline size_t EcmaString::GetUtf8Length(bool modify) const
240 {
241 ASSERT(IsLineOrConstantString());
242 if (!IsUtf16()) {
243 return GetLength() + 1; // add place for zero in the end
244 }
245 return base::utf_helper::Utf16ToUtf8Size(GetData(), GetLength(), modify);
246 }
247
248 template<bool verify>
At(int32_t index)249 inline uint16_t EcmaString::At(int32_t index) const
250 {
251 int32_t length = static_cast<int32_t>(GetLength());
252 if (verify) {
253 if ((index < 0) || (index >= length)) {
254 return 0;
255 }
256 }
257 if (IsLineString()) {
258 return LineEcmaString::Cast(this)->Get<verify>(index);
259 } else if (IsConstantString()) {
260 return ConstantString::Cast(this)->Get<verify>(index);
261 } else {
262 return TreeEcmaString::Cast(this)->Get<verify>(index);
263 }
264 }
265
WriteData(uint32_t index,uint16_t src)266 inline void EcmaString::WriteData(uint32_t index, uint16_t src)
267 {
268 ASSERT(index < GetLength());
269 ASSERT(IsLineString());
270 LineEcmaString::Cast(this)->Set(index, src);
271 }
272
IsFlat()273 inline bool EcmaString::IsFlat() const
274 {
275 if (!JSTaggedValue(this).IsTreeString()) {
276 return true;
277 }
278 return TreeEcmaString::Cast(this)->IsFlat();
279 }
280
281 template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)282 void EcmaString::WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
283 {
284 DISALLOW_GARBAGE_COLLECTION;
285 uint32_t length = src->GetLength();
286 if (length == 0) {
287 return;
288 }
289 while (true) {
290 ASSERT(length <= maxLength && length > 0);
291 ASSERT(length <= src->GetLength());
292 switch (src->GetStringType()) {
293 case JSType::LINE_STRING: {
294 if (src->IsUtf8()) {
295 CopyChars(buf, src->GetDataUtf8(), length);
296 } else {
297 CopyChars(buf, src->GetDataUtf16(), length);
298 }
299 return;
300 }
301 case JSType::CONSTANT_STRING: {
302 ASSERT(src->IsUtf8());
303 CopyChars(buf, src->GetDataUtf8(), length);
304 return;
305 }
306 case JSType::TREE_STRING: {
307 TreeEcmaString *treeSrc = TreeEcmaString::Cast(src);
308 EcmaString *first = EcmaString::Cast(treeSrc->GetFirst());
309 EcmaString *second = EcmaString::Cast(treeSrc->GetSecond());
310 uint32_t firstLength = first->GetLength();
311 uint32_t secondLength = second->GetLength();
312 if (secondLength >= firstLength) {
313 // second string is longer. So recurse over first.
314 WriteToFlat(first, buf, maxLength);
315 if (first == second) {
316 CopyChars(buf + firstLength, buf, firstLength);
317 return;
318 }
319 buf += firstLength;
320 maxLength -= firstLength;
321 src = second;
322 length -= firstLength;
323 } else {
324 // first string is longer. So recurse over second.
325 if (secondLength > 0) {
326 if (secondLength == 1) {
327 buf[firstLength] = static_cast<Char>(second->At<false>(0));
328 } else if ((second->IsLineOrConstantString()) && second->IsUtf8()) {
329 CopyChars(buf + firstLength, second->GetDataUtf8(), secondLength);
330 } else {
331 WriteToFlat(second, buf + firstLength, maxLength - firstLength);
332 }
333 }
334 maxLength = firstLength;
335 src = first;
336 length -= secondLength;
337 }
338 continue;
339 }
340 default:
341 LOG_ECMA(FATAL) << "this branch is unreachable";
342 UNREACHABLE();
343 }
344 }
345 }
346
GetDataUtf8()347 inline const uint8_t *EcmaStringAccessor::GetDataUtf8()
348 {
349 return string_->GetDataUtf8();
350 }
351
GetDataUtf16()352 inline const uint16_t *EcmaStringAccessor::GetDataUtf16()
353 {
354 return string_->GetDataUtf16();
355 }
356
GetUtf8Length()357 inline size_t EcmaStringAccessor::GetUtf8Length() const
358 {
359 return string_->GetUtf8Length();
360 }
361
ReadData(EcmaString * dst,EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)362 inline void EcmaStringAccessor::ReadData(EcmaString *dst, EcmaString *src,
363 uint32_t start, uint32_t destSize, uint32_t length)
364 {
365 dst->WriteData(src, start, destSize, length);
366 }
367 } // namespace panda::ecmascript
368 #endif
369