• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_STRING_INL_H
17 #define ECMASCRIPT_STRING_INL_H
18 
19 #include "ecmascript/ecma_string.h"
20 #include "ecmascript/base/string_helper.h"
21 #include "ecmascript/ecma_vm.h"
22 #include "ecmascript/js_handle.h"
23 #include "ecmascript/js_tagged_value-inl.h"
24 #include "ecmascript/object_factory-inl.h"
25 
26 namespace panda::ecmascript {
27 /* static */
CreateEmptyString(const EcmaVM * vm)28 inline EcmaString *EcmaString::CreateEmptyString(const EcmaVM *vm)
29 {
30     auto string = vm->GetFactory()->AllocNonMovableLineStringObject(EcmaString::SIZE);
31     string->SetLength(0, true);
32     string->SetRawHashcode(0);
33     return string;
34 }
35 
36 /* static */
CreateFromUtf8(const EcmaVM * vm,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress,MemSpaceType type,bool isConstantString,uint32_t idOffset)37 inline EcmaString *EcmaString::CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
38                                               bool canBeCompress, MemSpaceType type, bool isConstantString,
39                                               uint32_t idOffset)
40 {
41     if (utf8Len == 0) {
42         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
43     }
44     EcmaString *string = nullptr;
45     if (canBeCompress) {
46         if (isConstantString) {
47             string = CreateConstantString(vm, utf8Data, utf8Len, canBeCompress, type, idOffset);
48         } else {
49             string = CreateLineStringWithSpaceType(vm, utf8Len, true, type);
50             ASSERT(string != nullptr);
51 
52             if (memcpy_s(string->GetDataUtf8Writable(), utf8Len, utf8Data, utf8Len) != EOK) {
53                 LOG_FULL(FATAL) << "memcpy_s failed";
54                 UNREACHABLE();
55             }
56         }
57     } else {
58         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
59         string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
60         ASSERT(string != nullptr);
61 
62         [[maybe_unused]] auto len =
63             base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
64         ASSERT(len == utf16Len);
65     }
66 
67     ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
68     return string;
69 }
70 
CreateUtf16StringFromUtf8(const EcmaVM * vm,const uint8_t * utf8Data,uint32_t utf16Len,MemSpaceType type)71 inline EcmaString *EcmaString::CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf16Len,
72     MemSpaceType type)
73 {
74     if (utf16Len == 0) {
75         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
76     }
77     auto string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
78     ASSERT(string != nullptr);
79     auto len = utf::ConvertRegionMUtf8ToUtf16(
80         utf8Data, string->GetDataUtf16Writable(), utf::Mutf8Size(utf8Data), utf16Len, 0);
81     if (len < utf16Len) {
82         string->TrimLineString(vm->GetJSThread(), len);
83     }
84     ASSERT_PRINT(false == CanBeCompressed(string), "Bad input canBeCompress!");
85     return string;
86 }
87 
TrimLineString(const JSThread * thread,uint32_t newLength)88 inline void EcmaString::TrimLineString(const JSThread *thread, uint32_t newLength)
89 {
90     ASSERT(IsLineString());
91     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
92     uint32_t oldLength = GetLength();
93     ASSERT(oldLength > newLength);
94     size_t trimBytes = (oldLength - newLength) * (IsUtf8() ? sizeof(uint8_t) : sizeof(uint16_t));
95     size_t size = IsUtf8() ? LineEcmaString::ComputeSizeUtf8(newLength) : LineEcmaString::ComputeSizeUtf16(newLength);
96     factory->FillFreeObject(ToUintPtr(this) + size, trimBytes, RemoveSlots::YES, ToUintPtr(this));
97     SetLength(newLength, CanBeCompressed(this));
98 }
99 
CreateFromUtf16(const EcmaVM * vm,const uint16_t * utf16Data,uint32_t utf16Len,bool canBeCompress,MemSpaceType type)100 inline EcmaString *EcmaString::CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
101                                                bool canBeCompress, MemSpaceType type)
102 {
103     if (utf16Len == 0) {
104         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
105     }
106     auto string = CreateLineStringWithSpaceType(vm, utf16Len, canBeCompress, type);
107     ASSERT(string != nullptr);
108 
109     if (canBeCompress) {
110         CopyChars(string->GetDataUtf8Writable(), utf16Data, utf16Len);
111     } else {
112         uint32_t len = utf16Len * (sizeof(uint16_t) / sizeof(uint8_t));
113         if (memcpy_s(string->GetDataUtf16Writable(), len, utf16Data, len) != EOK) {
114             LOG_FULL(FATAL) << "memcpy_s failed";
115             UNREACHABLE();
116         }
117     }
118 
119     ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
120     return string;
121 }
122 
123 /* static */
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)124 inline EcmaString *EcmaString::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
125 {
126     size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
127     auto string = vm->GetFactory()->AllocLineStringObject(size);
128     string->SetLength(length, compressed);
129     string->SetRawHashcode(0);
130     return string;
131 }
132 
133 /* static */
CreateLineStringNoGC(const EcmaVM * vm,size_t length,bool compressed)134 inline EcmaString *EcmaString::CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed)
135 {
136     size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
137     size = AlignUp(size, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT));
138     auto object = reinterpret_cast<TaggedObject *>(vm->GetHeap()->GetOldSpace()->Allocate(size, false));
139     auto thread = vm->GetJSThread();
140     object->SetClass(thread, JSHClass::Cast(thread->GlobalConstants()->GetLineStringClass().GetTaggedObject()));
141     auto string = EcmaString::Cast(object);
142     string->SetLength(length, compressed);
143     string->SetRawHashcode(0);
144     return string;
145 }
146 
147 /* static */
CreateLineStringWithSpaceType(const EcmaVM * vm,size_t length,bool compressed,MemSpaceType type)148 inline EcmaString *EcmaString::CreateLineStringWithSpaceType(const EcmaVM *vm, size_t length, bool compressed,
149                                                              MemSpaceType type)
150 {
151     size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
152     EcmaString *string = nullptr;
153     switch (type) {
154         case MemSpaceType::SEMI_SPACE:
155             string = vm->GetFactory()->AllocLineStringObject(size);
156             break;
157         case MemSpaceType::OLD_SPACE:
158             string = vm->GetFactory()->AllocOldSpaceLineStringObject(size);
159             break;
160         case MemSpaceType::NON_MOVABLE:
161             string = vm->GetFactory()->AllocNonMovableLineStringObject(size);
162             break;
163         default:
164             LOG_ECMA(FATAL) << "this branch is unreachable";
165             UNREACHABLE();
166     }
167     string->SetLength(length, compressed);
168     string->SetRawHashcode(0);
169     return string;
170 }
171 
CreateSlicedString(const EcmaVM * vm,MemSpaceType type)172 inline SlicedString *EcmaString::CreateSlicedString(const EcmaVM *vm, MemSpaceType type)
173 {
174     auto slicedString = SlicedString::Cast(vm->GetFactory()->AllocSlicedStringObject(type));
175     slicedString->SetRawHashcode(0);
176     return slicedString;
177 }
178 
CreateConstantString(const EcmaVM * vm,const uint8_t * utf8Data,size_t length,bool compressed,MemSpaceType type,uint32_t idOffset)179 inline EcmaString *EcmaString::CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
180     size_t length, bool compressed, MemSpaceType type, uint32_t idOffset)
181 {
182     auto string = ConstantString::Cast(vm->GetFactory()->AllocConstantStringObject(type));
183     auto thread = vm->GetJSThread();
184     string->SetLength(length, compressed);
185     string->SetRawHashcode(0);
186     string->SetConstantData(const_cast<uint8_t *>(utf8Data));
187     // The string might be serialized, the const data will be replaced by index in the panda file.
188     string->SetEntityId(idOffset);
189     string->SetRelocatedData(thread, JSTaggedValue::Undefined(), BarrierMode::SKIP_BARRIER);
190     return string;
191 }
192 
CreateTreeString(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t length,bool compressed)193 inline EcmaString *EcmaString::CreateTreeString(const EcmaVM *vm,
194     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed)
195 {
196     ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length);
197     auto thread = vm->GetJSThread();
198     auto string = TreeEcmaString::Cast(vm->GetFactory()->AllocTreeStringObject());
199     string->SetLength(length, compressed);
200     string->SetRawHashcode(0);
201     string->SetFirst(thread, left.GetTaggedValue());
202     string->SetSecond(thread, right.GetTaggedValue());
203     return string;
204 }
205 
206 /* static */
FastSubUtf8String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)207 EcmaString *EcmaString::FastSubUtf8String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
208                                           uint32_t length)
209 {
210     JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, true));
211     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
212     FlatStringInfo srcFlat = FlattenAllString(vm, src);
213     Span<uint8_t> dst(string->GetDataUtf8Writable(), length);
214     Span<const uint8_t> source(srcFlat.GetDataUtf8() + start, length);
215     EcmaString::MemCopyChars(dst, length, source, length);
216 
217     ASSERT_PRINT(CanBeCompressed(*string), "canBeCompresse does not match the real value!");
218     return *string;
219 }
220 
221 /* static */
FastSubUtf16String(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)222 EcmaString *EcmaString::FastSubUtf16String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
223                                            uint32_t length)
224 {
225     FlatStringInfo srcFlat = FlattenAllString(vm, src);
226     bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
227     JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
228     // maybe happen GC,so get srcFlat again
229     srcFlat = FlattenAllString(vm, src);
230     if (canBeCompressed) {
231         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
232         CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
233     } else {
234         uint32_t len = length * (sizeof(uint16_t) / sizeof(uint8_t));
235         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
236         Span<uint16_t> dst(string->GetDataUtf16Writable(), length);
237         Span<const uint16_t> source(srcFlat.GetDataUtf16() + start, length);
238         EcmaString::MemCopyChars(dst, len, source, len);
239     }
240     ASSERT_PRINT(canBeCompressed == CanBeCompressed(*string), "canBeCompresse does not match the real value!");
241     return *string;
242 }
243 
GetData()244 inline uint16_t *EcmaString::GetData() const
245 {
246     ASSERT_PRINT(IsLineString(), "EcmaString: Read data from not LineString");
247     return LineEcmaString::Cast(this)->GetData();
248 }
249 
GetDataUtf8()250 inline const uint8_t *EcmaString::GetDataUtf8() const
251 {
252     ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
253     if (IsLineString()) {
254         return reinterpret_cast<uint8_t *>(GetData());
255     }
256     return ConstantString::Cast(this)->GetConstantData();
257 }
258 
GetDataUtf16()259 inline const uint16_t *EcmaString::GetDataUtf16() const
260 {
261     LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
262     return GetData();
263 }
264 
GetDataUtf8Writable()265 inline uint8_t *EcmaString::GetDataUtf8Writable()
266 {
267     ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
268     if (IsConstantString()) {
269         return ConstantString::Cast(this)->GetConstantData();
270     }
271     return reinterpret_cast<uint8_t *>(GetData());
272 }
273 
GetDataUtf16Writable()274 inline uint16_t *EcmaString::GetDataUtf16Writable()
275 {
276     LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
277     return GetData();
278 }
279 
GetUtf8Length(bool modify)280 inline size_t EcmaString::GetUtf8Length(bool modify) const
281 {
282     if (!IsUtf16()) {
283         return GetLength() + 1;  // add place for zero in the end
284     }
285     CVector<uint16_t> tmpBuf;
286     const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
287     return base::utf_helper::Utf16ToUtf8Size(data, GetLength(), modify);
288 }
289 
290 template<bool verify>
At(int32_t index)291 inline uint16_t EcmaString::At(int32_t index) const
292 {
293     int32_t length = static_cast<int32_t>(GetLength());
294     if (verify) {
295         if ((index < 0) || (index >= length)) {
296             return 0;
297         }
298     }
299     switch (GetStringType()) {
300         case JSType::LINE_STRING:
301             return LineEcmaString::Cast(this)->Get<verify>(index);
302         case JSType::CONSTANT_STRING:
303             return ConstantString::Cast(this)->Get<verify>(index);
304         case JSType::SLICED_STRING:
305             return SlicedString::Cast(this)->Get<verify>(index);
306         case JSType::TREE_STRING:
307             return TreeEcmaString::Cast(this)->Get<verify>(index);
308         default:
309             LOG_ECMA(FATAL) << "this branch is unreachable";
310             UNREACHABLE();
311     }
312 }
313 
FastToUtf8Span()314 inline Span<const uint8_t> EcmaString::FastToUtf8Span() const
315 {
316     uint32_t strLen = GetLength();
317     ASSERT(IsUtf8());
318     const uint8_t *data = GetDataUtf8();
319     return Span<const uint8_t>(data, strLen);
320 }
321 
WriteData(uint32_t index,uint16_t src)322 inline void EcmaString::WriteData(uint32_t index, uint16_t src)
323 {
324     ASSERT(index < GetLength());
325     ASSERT(IsLineString());
326     LineEcmaString::Cast(this)->Set(index, src);
327 }
328 
IsFlat()329 inline bool EcmaString::IsFlat() const
330 {
331     if (!JSTaggedValue(this).IsTreeString()) {
332         return true;
333     }
334     return TreeEcmaString::Cast(this)->IsFlat();
335 }
336 
337 template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)338 void EcmaString::WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
339 {
340     DISALLOW_GARBAGE_COLLECTION;
341     uint32_t length = src->GetLength();
342     if (length == 0) {
343         return;
344     }
345     while (true) {
346         ASSERT(length <= maxLength && length > 0);
347         ASSERT(length <= src->GetLength());
348         switch (src->GetStringType()) {
349             case JSType::LINE_STRING: {
350                 if (src->IsUtf8()) {
351                     CopyChars(buf, src->GetDataUtf8(), length);
352                 } else {
353                     CopyChars(buf, src->GetDataUtf16(), length);
354                 }
355                 return;
356             }
357             case JSType::CONSTANT_STRING: {
358                 ASSERT(src->IsUtf8());
359                 CopyChars(buf, src->GetDataUtf8(), length);
360                 return;
361             }
362             case JSType::TREE_STRING: {
363                 TreeEcmaString *treeSrc = TreeEcmaString::Cast(src);
364                 EcmaString *first = EcmaString::Cast(treeSrc->GetFirst());
365                 EcmaString *second = EcmaString::Cast(treeSrc->GetSecond());
366                 uint32_t firstLength = first->GetLength();
367                 uint32_t secondLength = second->GetLength();
368                 if (secondLength >= firstLength) {
369                     // second string is longer. So recurse over first.
370                     WriteToFlat(first, buf, maxLength);
371                     if (first == second) {
372                         CopyChars(buf + firstLength, buf, firstLength);
373                         return;
374                     }
375                     buf += firstLength;
376                     maxLength -= firstLength;
377                     src = second;
378                     length -= firstLength;
379                 } else {
380                     // first string is longer.  So recurse over second.
381                     if (secondLength > 0) {
382                         if (secondLength == 1) {
383                             buf[firstLength] = static_cast<Char>(second->At<false>(0));
384                         } else if ((second->IsLineOrConstantString()) && second->IsUtf8()) {
385                             CopyChars(buf + firstLength, second->GetDataUtf8(), secondLength);
386                         } else {
387                             WriteToFlat(second, buf + firstLength, maxLength - firstLength);
388                         }
389                     }
390                     maxLength = firstLength;
391                     src = first;
392                     length -= secondLength;
393                 }
394                 continue;
395             }
396             case JSType::SLICED_STRING: {
397                 EcmaString *parent = EcmaString::Cast(SlicedString::Cast(src)->GetParent());
398                 if (src->IsUtf8()) {
399                     CopyChars(buf, parent->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex(), length);
400                 } else {
401                     CopyChars(buf, parent->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex(), length);
402                 }
403                 return;
404             }
405             default:
406                 LOG_ECMA(FATAL) << "this branch is unreachable";
407                 UNREACHABLE();
408         }
409     }
410 }
411 
GetDataUtf8()412 inline const uint8_t *FlatStringInfo::GetDataUtf8() const
413 {
414     return string_->GetDataUtf8() + startIndex_;
415 }
416 
GetDataUtf16()417 inline const uint16_t *FlatStringInfo::GetDataUtf16() const
418 {
419     return string_->GetDataUtf16() + startIndex_;
420 }
421 
GetDataUtf8Writable()422 inline uint8_t *FlatStringInfo::GetDataUtf8Writable() const
423 {
424     return string_->GetDataUtf8Writable() + startIndex_;
425 }
426 
GetDataUtf8()427 inline const uint8_t *EcmaStringAccessor::GetDataUtf8()
428 {
429     return string_->GetDataUtf8();
430 }
431 
GetDataUtf16()432 inline const uint16_t *EcmaStringAccessor::GetDataUtf16()
433 {
434     return string_->GetDataUtf16();
435 }
436 
GetUtf8Length()437 inline size_t EcmaStringAccessor::GetUtf8Length() const
438 {
439     return string_->GetUtf8Length();
440 }
441 
ReadData(EcmaString * dst,EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)442 inline void EcmaStringAccessor::ReadData(EcmaString *dst, EcmaString *src,
443     uint32_t start, uint32_t destSize, uint32_t length)
444 {
445     dst->WriteData(src, start, destSize, length);
446 }
447 
FastToUtf8Span()448 inline Span<const uint8_t> EcmaStringAccessor::FastToUtf8Span()
449 {
450     return string_->FastToUtf8Span();
451 }
452 }  // namespace panda::ecmascript
453 #endif
454