1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef ECMASCRIPT_STRING_INL_H
17 #define ECMASCRIPT_STRING_INL_H
18
19 #include "ecmascript/ecma_string.h"
20 #include "ecmascript/base/string_helper.h"
21 #include "ecmascript/ecma_vm.h"
22 #include "ecmascript/js_handle.h"
23 #include "ecmascript/js_tagged_value-inl.h"
24 #include "ecmascript/object_factory-inl.h"
25
26 namespace panda::ecmascript {
27 /* static */
CreateEmptyString(const EcmaVM * vm)28 inline EcmaString *EcmaString::CreateEmptyString(const EcmaVM *vm)
29 {
30 auto string = vm->GetFactory()->AllocNonMovableLineStringObject(EcmaString::SIZE);
31 string->SetLength(0, true);
32 string->SetRawHashcode(0);
33 return string;
34 }
35
36 /* static */
CreateFromUtf8(const EcmaVM * vm,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress,MemSpaceType type,bool isConstantString,uint32_t idOffset)37 inline EcmaString *EcmaString::CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
38 bool canBeCompress, MemSpaceType type, bool isConstantString,
39 uint32_t idOffset)
40 {
41 if (utf8Len == 0) {
42 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
43 }
44 EcmaString *string = nullptr;
45 if (canBeCompress) {
46 if (isConstantString) {
47 string = CreateConstantString(vm, utf8Data, utf8Len, canBeCompress, type, idOffset);
48 } else {
49 string = CreateLineStringWithSpaceType(vm, utf8Len, true, type);
50 ASSERT(string != nullptr);
51
52 if (memcpy_s(string->GetDataUtf8Writable(), utf8Len, utf8Data, utf8Len) != EOK) {
53 LOG_FULL(FATAL) << "memcpy_s failed";
54 UNREACHABLE();
55 }
56 }
57 } else {
58 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
59 string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
60 ASSERT(string != nullptr);
61
62 [[maybe_unused]] auto len =
63 base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
64 ASSERT(len == utf16Len);
65 }
66
67 ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
68 return string;
69 }
70
CreateUtf16StringFromUtf8(const EcmaVM * vm,const uint8_t * utf8Data,uint32_t utf16Len,MemSpaceType type)71 inline EcmaString *EcmaString::CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf16Len,
72 MemSpaceType type)
73 {
74 if (utf16Len == 0) {
75 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
76 }
77 auto string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
78 ASSERT(string != nullptr);
79 auto len = utf::ConvertRegionMUtf8ToUtf16(
80 utf8Data, string->GetDataUtf16Writable(), utf::Mutf8Size(utf8Data), utf16Len, 0);
81 if (len < utf16Len) {
82 string->TrimLineString(vm->GetJSThread(), len);
83 }
84 ASSERT_PRINT(false == CanBeCompressed(string), "Bad input canBeCompress!");
85 return string;
86 }
87
TrimLineString(const JSThread * thread,uint32_t newLength)88 inline void EcmaString::TrimLineString(const JSThread *thread, uint32_t newLength)
89 {
90 ASSERT(IsLineString());
91 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
92 uint32_t oldLength = GetLength();
93 ASSERT(oldLength > newLength);
94 size_t trimBytes = (oldLength - newLength) * (IsUtf8() ? sizeof(uint8_t) : sizeof(uint16_t));
95 size_t size = IsUtf8() ? LineEcmaString::ComputeSizeUtf8(newLength) : LineEcmaString::ComputeSizeUtf16(newLength);
96 factory->FillFreeObject(ToUintPtr(this) + size, trimBytes, RemoveSlots::YES, ToUintPtr(this));
97 SetLength(newLength, CanBeCompressed(this));
98 }
99
CreateFromUtf16(const EcmaVM * vm,const uint16_t * utf16Data,uint32_t utf16Len,bool canBeCompress,MemSpaceType type)100 inline EcmaString *EcmaString::CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
101 bool canBeCompress, MemSpaceType type)
102 {
103 if (utf16Len == 0) {
104 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
105 }
106 auto string = CreateLineStringWithSpaceType(vm, utf16Len, canBeCompress, type);
107 ASSERT(string != nullptr);
108
109 if (canBeCompress) {
110 CopyChars(string->GetDataUtf8Writable(), utf16Data, utf16Len);
111 } else {
112 uint32_t len = utf16Len * (sizeof(uint16_t) / sizeof(uint8_t));
113 if (memcpy_s(string->GetDataUtf16Writable(), len, utf16Data, len) != EOK) {
114 LOG_FULL(FATAL) << "memcpy_s failed";
115 UNREACHABLE();
116 }
117 }
118
119 ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
120 return string;
121 }
122
123 /* static */
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)124 inline EcmaString *EcmaString::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
125 {
126 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
127 auto string = vm->GetFactory()->AllocLineStringObject(size);
128 string->SetLength(length, compressed);
129 string->SetRawHashcode(0);
130 return string;
131 }
132
133 /* static */
CreateLineStringNoGC(const EcmaVM * vm,size_t length,bool compressed)134 inline EcmaString *EcmaString::CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed)
135 {
136 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
137 size = AlignUp(size, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT));
138 auto object = reinterpret_cast<TaggedObject *>(vm->GetHeap()->GetOldSpace()->Allocate(size, false));
139 auto thread = vm->GetJSThread();
140 object->SetClass(thread, JSHClass::Cast(thread->GlobalConstants()->GetLineStringClass().GetTaggedObject()));
141 auto string = EcmaString::Cast(object);
142 string->SetLength(length, compressed);
143 string->SetRawHashcode(0);
144 return string;
145 }
146
147 /* static */
CreateLineStringWithSpaceType(const EcmaVM * vm,size_t length,bool compressed,MemSpaceType type)148 inline EcmaString *EcmaString::CreateLineStringWithSpaceType(const EcmaVM *vm, size_t length, bool compressed,
149 MemSpaceType type)
150 {
151 size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
152 EcmaString *string = nullptr;
153 switch (type) {
154 case MemSpaceType::SEMI_SPACE:
155 string = vm->GetFactory()->AllocLineStringObject(size);
156 break;
157 case MemSpaceType::OLD_SPACE:
158 string = vm->GetFactory()->AllocOldSpaceLineStringObject(size);
159 break;
160 case MemSpaceType::NON_MOVABLE:
161 string = vm->GetFactory()->AllocNonMovableLineStringObject(size);
162 break;
163 default:
164 LOG_ECMA(FATAL) << "this branch is unreachable";
165 UNREACHABLE();
166 }
167 string->SetLength(length, compressed);
168 string->SetRawHashcode(0);
169 return string;
170 }
171
CreateSlicedString(const EcmaVM * vm,MemSpaceType type)172 inline SlicedString *EcmaString::CreateSlicedString(const EcmaVM *vm, MemSpaceType type)
173 {
174 auto slicedString = SlicedString::Cast(vm->GetFactory()->AllocSlicedStringObject(type));
175 slicedString->SetRawHashcode(0);
176 return slicedString;
177 }
178
CreateConstantString(const EcmaVM * vm,const uint8_t * utf8Data,size_t length,bool compressed,MemSpaceType type,uint32_t idOffset)179 inline EcmaString *EcmaString::CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
180 size_t length, bool compressed, MemSpaceType type, uint32_t idOffset)
181 {
182 auto string = ConstantString::Cast(vm->GetFactory()->AllocConstantStringObject(type));
183 auto thread = vm->GetJSThread();
184 string->SetLength(length, compressed);
185 string->SetRawHashcode(0);
186 string->SetConstantData(const_cast<uint8_t *>(utf8Data));
187 // The string might be serialized, the const data will be replaced by index in the panda file.
188 string->SetEntityId(idOffset);
189 string->SetRelocatedData(thread, JSTaggedValue::Undefined(), BarrierMode::SKIP_BARRIER);
190 return string;
191 }
192
CreateTreeString(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t length,bool compressed)193 inline EcmaString *EcmaString::CreateTreeString(const EcmaVM *vm,
194 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed)
195 {
196 ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length);
197 auto thread = vm->GetJSThread();
198 auto string = TreeEcmaString::Cast(vm->GetFactory()->AllocTreeStringObject());
199 string->SetLength(length, compressed);
200 string->SetRawHashcode(0);
201 string->SetFirst(thread, left.GetTaggedValue());
202 string->SetSecond(thread, right.GetTaggedValue());
203 return string;
204 }
205
206 /* static */
FastSubUtf8String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)207 EcmaString *EcmaString::FastSubUtf8String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
208 uint32_t length)
209 {
210 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, true));
211 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
212 FlatStringInfo srcFlat = FlattenAllString(vm, src);
213 Span<uint8_t> dst(string->GetDataUtf8Writable(), length);
214 Span<const uint8_t> source(srcFlat.GetDataUtf8() + start, length);
215 EcmaString::MemCopyChars(dst, length, source, length);
216
217 ASSERT_PRINT(CanBeCompressed(*string), "canBeCompresse does not match the real value!");
218 return *string;
219 }
220
221 /* static */
FastSubUtf16String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)222 EcmaString *EcmaString::FastSubUtf16String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
223 uint32_t length)
224 {
225 FlatStringInfo srcFlat = FlattenAllString(vm, src);
226 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
227 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
228 // maybe happen GC,so get srcFlat again
229 srcFlat = FlattenAllString(vm, src);
230 if (canBeCompressed) {
231 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
232 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
233 } else {
234 uint32_t len = length * (sizeof(uint16_t) / sizeof(uint8_t));
235 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
236 Span<uint16_t> dst(string->GetDataUtf16Writable(), length);
237 Span<const uint16_t> source(srcFlat.GetDataUtf16() + start, length);
238 EcmaString::MemCopyChars(dst, len, source, len);
239 }
240 ASSERT_PRINT(canBeCompressed == CanBeCompressed(*string), "canBeCompresse does not match the real value!");
241 return *string;
242 }
243
GetData()244 inline uint16_t *EcmaString::GetData() const
245 {
246 ASSERT_PRINT(IsLineString(), "EcmaString: Read data from not LineString");
247 return LineEcmaString::Cast(this)->GetData();
248 }
249
GetDataUtf8()250 inline const uint8_t *EcmaString::GetDataUtf8() const
251 {
252 ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
253 if (IsLineString()) {
254 return reinterpret_cast<uint8_t *>(GetData());
255 }
256 return ConstantString::Cast(this)->GetConstantData();
257 }
258
GetDataUtf16()259 inline const uint16_t *EcmaString::GetDataUtf16() const
260 {
261 LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
262 return GetData();
263 }
264
GetDataUtf8Writable()265 inline uint8_t *EcmaString::GetDataUtf8Writable()
266 {
267 ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
268 if (IsConstantString()) {
269 return ConstantString::Cast(this)->GetConstantData();
270 }
271 return reinterpret_cast<uint8_t *>(GetData());
272 }
273
GetDataUtf16Writable()274 inline uint16_t *EcmaString::GetDataUtf16Writable()
275 {
276 LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
277 return GetData();
278 }
279
GetUtf8Length(bool modify)280 inline size_t EcmaString::GetUtf8Length(bool modify) const
281 {
282 if (!IsUtf16()) {
283 return GetLength() + 1; // add place for zero in the end
284 }
285 CVector<uint16_t> tmpBuf;
286 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
287 return base::utf_helper::Utf16ToUtf8Size(data, GetLength(), modify);
288 }
289
290 template<bool verify>
At(int32_t index)291 inline uint16_t EcmaString::At(int32_t index) const
292 {
293 int32_t length = static_cast<int32_t>(GetLength());
294 if (verify) {
295 if ((index < 0) || (index >= length)) {
296 return 0;
297 }
298 }
299 switch (GetStringType()) {
300 case JSType::LINE_STRING:
301 return LineEcmaString::Cast(this)->Get<verify>(index);
302 case JSType::CONSTANT_STRING:
303 return ConstantString::Cast(this)->Get<verify>(index);
304 case JSType::SLICED_STRING:
305 return SlicedString::Cast(this)->Get<verify>(index);
306 case JSType::TREE_STRING:
307 return TreeEcmaString::Cast(this)->Get<verify>(index);
308 default:
309 LOG_ECMA(FATAL) << "this branch is unreachable";
310 UNREACHABLE();
311 }
312 }
313
FastToUtf8Span()314 inline Span<const uint8_t> EcmaString::FastToUtf8Span() const
315 {
316 uint32_t strLen = GetLength();
317 ASSERT(IsUtf8());
318 const uint8_t *data = GetDataUtf8();
319 return Span<const uint8_t>(data, strLen);
320 }
321
WriteData(uint32_t index,uint16_t src)322 inline void EcmaString::WriteData(uint32_t index, uint16_t src)
323 {
324 ASSERT(index < GetLength());
325 ASSERT(IsLineString());
326 LineEcmaString::Cast(this)->Set(index, src);
327 }
328
IsFlat()329 inline bool EcmaString::IsFlat() const
330 {
331 if (!JSTaggedValue(this).IsTreeString()) {
332 return true;
333 }
334 return TreeEcmaString::Cast(this)->IsFlat();
335 }
336
337 template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)338 void EcmaString::WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
339 {
340 DISALLOW_GARBAGE_COLLECTION;
341 uint32_t length = src->GetLength();
342 if (length == 0) {
343 return;
344 }
345 while (true) {
346 ASSERT(length <= maxLength && length > 0);
347 ASSERT(length <= src->GetLength());
348 switch (src->GetStringType()) {
349 case JSType::LINE_STRING: {
350 if (src->IsUtf8()) {
351 CopyChars(buf, src->GetDataUtf8(), length);
352 } else {
353 CopyChars(buf, src->GetDataUtf16(), length);
354 }
355 return;
356 }
357 case JSType::CONSTANT_STRING: {
358 ASSERT(src->IsUtf8());
359 CopyChars(buf, src->GetDataUtf8(), length);
360 return;
361 }
362 case JSType::TREE_STRING: {
363 TreeEcmaString *treeSrc = TreeEcmaString::Cast(src);
364 EcmaString *first = EcmaString::Cast(treeSrc->GetFirst());
365 EcmaString *second = EcmaString::Cast(treeSrc->GetSecond());
366 uint32_t firstLength = first->GetLength();
367 uint32_t secondLength = second->GetLength();
368 if (secondLength >= firstLength) {
369 // second string is longer. So recurse over first.
370 WriteToFlat(first, buf, maxLength);
371 if (first == second) {
372 CopyChars(buf + firstLength, buf, firstLength);
373 return;
374 }
375 buf += firstLength;
376 maxLength -= firstLength;
377 src = second;
378 length -= firstLength;
379 } else {
380 // first string is longer. So recurse over second.
381 if (secondLength > 0) {
382 if (secondLength == 1) {
383 buf[firstLength] = static_cast<Char>(second->At<false>(0));
384 } else if ((second->IsLineOrConstantString()) && second->IsUtf8()) {
385 CopyChars(buf + firstLength, second->GetDataUtf8(), secondLength);
386 } else {
387 WriteToFlat(second, buf + firstLength, maxLength - firstLength);
388 }
389 }
390 maxLength = firstLength;
391 src = first;
392 length -= secondLength;
393 }
394 continue;
395 }
396 case JSType::SLICED_STRING: {
397 EcmaString *parent = EcmaString::Cast(SlicedString::Cast(src)->GetParent());
398 if (src->IsUtf8()) {
399 CopyChars(buf, parent->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex(), length);
400 } else {
401 CopyChars(buf, parent->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex(), length);
402 }
403 return;
404 }
405 default:
406 LOG_ECMA(FATAL) << "this branch is unreachable";
407 UNREACHABLE();
408 }
409 }
410 }
411
GetDataUtf8()412 inline const uint8_t *FlatStringInfo::GetDataUtf8() const
413 {
414 return string_->GetDataUtf8() + startIndex_;
415 }
416
GetDataUtf16()417 inline const uint16_t *FlatStringInfo::GetDataUtf16() const
418 {
419 return string_->GetDataUtf16() + startIndex_;
420 }
421
GetDataUtf8Writable()422 inline uint8_t *FlatStringInfo::GetDataUtf8Writable() const
423 {
424 return string_->GetDataUtf8Writable() + startIndex_;
425 }
426
GetDataUtf8()427 inline const uint8_t *EcmaStringAccessor::GetDataUtf8()
428 {
429 return string_->GetDataUtf8();
430 }
431
GetDataUtf16()432 inline const uint16_t *EcmaStringAccessor::GetDataUtf16()
433 {
434 return string_->GetDataUtf16();
435 }
436
GetUtf8Length()437 inline size_t EcmaStringAccessor::GetUtf8Length() const
438 {
439 return string_->GetUtf8Length();
440 }
441
ReadData(EcmaString * dst,EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)442 inline void EcmaStringAccessor::ReadData(EcmaString *dst, EcmaString *src,
443 uint32_t start, uint32_t destSize, uint32_t length)
444 {
445 dst->WriteData(src, start, destSize, length);
446 }
447
FastToUtf8Span()448 inline Span<const uint8_t> EcmaStringAccessor::FastToUtf8Span()
449 {
450 return string_->FastToUtf8Span();
451 }
452 } // namespace panda::ecmascript
453 #endif
454