1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "common_interfaces/objects/base_string.h"
19 #include "ecmascript/base/json_helper.h"
20
21 namespace panda::ecmascript {
22
23 constexpr size_t LOW_3BITS = 0x7;
24 constexpr size_t LOW_4BITS = 0xF;
25 constexpr size_t LOW_5BITS = 0x1F;
26 constexpr size_t LOW_6BITS = 0x3F;
27 constexpr size_t L_SURROGATE_START = 0xDC00;
28 constexpr size_t H_SURROGATE_START = 0xD800;
29 constexpr size_t SURROGATE_RAIR_START = 0x10000;
30 constexpr size_t OFFSET_18POS = 18;
31 constexpr size_t OFFSET_12POS = 12;
32 constexpr size_t OFFSET_10POS = 10;
33 constexpr size_t OFFSET_6POS = 6;
34
35 using NumberHelper = base::NumberHelper;
36
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)37 EcmaString *EcmaString::Concat(const EcmaVM *vm,
38 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
39 {
40 ASSERT(IsSMemSpace(type));
41 // allocator may trig gc and move src, need to hold it
42 EcmaString *strLeft = *left;
43 EcmaString *strRight = *right;
44 uint32_t leftLength = strLeft->GetLength();
45 uint32_t rightLength = strRight->GetLength();
46 uint32_t newLength = leftLength + rightLength;
47 if (newLength == 0) {
48 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
49 }
50
51 if (leftLength == 0) {
52 return strRight;
53 }
54 if (rightLength == 0) {
55 return strLeft;
56 }
57 // if the result string is small, make a LineString
58 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
59 if (newLength < TreeString::MIN_TREE_STRING_LENGTH) {
60 ASSERT(strLeft->IsLineString());
61 ASSERT(strRight->IsLineString());
62 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
63 // retrieve strings after gc
64 strLeft = *left;
65 strRight = *right;
66 if (compressed) {
67 // copy left part
68 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
69 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
70 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
71 // copy right part
72 sp = sp.SubSpan(leftLength);
73 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
74 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
75 } else {
76 // copy left part
77 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
78 if (strLeft->IsUtf8()) {
79 BaseString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
80 } else {
81 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
82 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
83 }
84 // copy right part
85 sp = sp.SubSpan(leftLength);
86 if (strRight->IsUtf8()) {
87 BaseString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
88 } else {
89 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
90 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
91 }
92 }
93 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
94 return newString;
95 }
96 return CreateTreeString(vm, left, right, newLength, compressed);
97 }
98
99 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)100 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
101 uint32_t length, bool compressed)
102 {
103 JSHandle<EcmaString> newString(vm->GetJSThread(),
104 CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105 auto strOrigin = FlattenAllString(vm, original);
106 if (compressed) {
107 // copy
108 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109 Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110 EcmaString::MemCopyChars(sp, length, srcSp, length);
111 } else {
112 // copy left part
113 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114 if (strOrigin.IsUtf8()) {
115 BaseString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116 } else {
117 Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119 }
120 }
121 ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122 return *newString;
123 }
124
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129 ASSERT((start + length) <= src->GetLength());
130 if (length == 0) {
131 return *vm->GetFactory()->GetEmptyString();
132 }
133 if (start == 0 && length == src->GetLength()) {
134 return *src;
135 }
136 if (src->IsUtf8()) {
137 return FastSubUtf8String(vm, src, start, length);
138 }
139 return FastSubUtf16String(vm, src, start, length);
140 }
141
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146 ASSERT((start + length) <= src->GetLength());
147 FlatStringInfo srcFlat = FlattenAllString(vm, src);
148 JSHandle<EcmaString> flatString(vm->GetJSThread(), srcFlat.GetString());
149 SlicedEcmaString *slicedString = CreateSlicedString(vm, flatString);
150 slicedString->InitLengthAndFlags(length, flatString->IsUtf8());
151 slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152 return slicedString;
153 }
154
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159 ASSERT((start + length) <= src->GetLength());
160 if (length == 1) {
161 JSThread *thread = vm->GetJSThread();
162 uint16_t res = EcmaStringAccessor(src).Get<false>(thread, start);
163 if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164 JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165 return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(thread, res).GetTaggedObject());
166 }
167 }
168 if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_STRING_LENGTH) {
169 if (start == 0 && length == src->GetLength()) {
170 return *src;
171 }
172 if (src->IsUtf16()) {
173 FlatStringInfo srcFlat = FlattenAllString(vm, src);
174 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175 if (canBeCompressed) {
176 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177 srcFlat = FlattenAllString(vm, src);
178 BaseString::CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179 return *string;
180 }
181 }
182 return GetSlicedString(vm, src, start, length);
183 }
184 return FastSubString(vm, src, start, length);
185 }
186
SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)187 bool EcmaString::SubStringIsUtf8(const EcmaVM *vm,
188 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
189 {
190 ASSERT((start + length) <= src->GetLength());
191 if (length == 0) {
192 return true;
193 }
194 if (src->IsUtf8()) {
195 return true;
196 }
197 FlatStringInfo srcFlat = FlattenAllString(vm, src);
198 return CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
199 }
200
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)201 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
202 {
203 if (*left == *right) {
204 return 0;
205 }
206 FlatStringInfo lhs = FlattenAllString(vm, left);
207 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
208 FlatStringInfo rhs = FlattenAllString(vm, right);
209 lhs.SetString(*string);
210 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
211 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
212 int32_t countDiff = lhsCount - rhsCount;
213 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
214 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
215 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
216 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
217 int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
218 if (charDiff != 0) {
219 return charDiff;
220 }
221 } else if (!lhs.IsUtf16()) {
222 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
223 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
224 int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
225 if (charDiff != 0) {
226 return charDiff;
227 }
228 } else if (!rhs.IsUtf16()) {
229 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
230 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
231 int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
232 if (charDiff != 0) {
233 return charDiff;
234 }
235 } else {
236 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
237 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
238 int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
239 if (charDiff != 0) {
240 return charDiff;
241 }
242 }
243 return countDiff;
244 }
245
246 /**
247 * left: text string
248 * right: pattern string
249 * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
250 * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
251 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)252 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
253 const JSHandle<EcmaString>& right, uint32_t offset)
254 {
255 FlatStringInfo lhs = FlattenAllString(vm, left);
256 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
257 FlatStringInfo rhs = FlattenAllString(vm, right);
258 lhs.SetString(*string);
259 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
260 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
261 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
262 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
263 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
264 return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
265 } else if (!lhs.IsUtf16()) {
266 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
267 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
268 return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
269 } else if (!rhs.IsUtf16()) {
270 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
271 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
272 return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
273 } else {
274 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
275 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
276 return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
277 }
278 return false;
279 }
280
281 /* static */
282 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)283 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
284 {
285 return BaseString::IndexOf(lhsSp, rhsSp, pos, max);
286 }
287
288 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)289 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
290 {
291 return BaseString::LastIndexOf(lhsSp, rhsSp, pos);
292 }
293
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)294 int32_t EcmaString::IndexOf(const EcmaVM *vm,
295 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
296 {
297 EcmaString *lhstring = *receiver;
298 EcmaString *rhstring = *search;
299 if (lhstring == nullptr || rhstring == nullptr) {
300 return -1;
301 }
302 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
303 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
304
305 if (pos > lhsCount) {
306 return -1;
307 }
308
309 if (rhsCount == 0) {
310 return pos;
311 }
312
313 if (pos < 0) {
314 pos = 0;
315 }
316
317 int32_t max = lhsCount - rhsCount;
318 if (max < 0) {
319 return -1;
320 }
321
322 if (pos + rhsCount > lhsCount) {
323 return -1;
324 }
325
326 FlatStringInfo lhs = FlattenAllString(vm, receiver);
327 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
328 FlatStringInfo rhs = FlattenAllString(vm, search);
329 lhs.SetString(*string);
330
331 if (rhs.IsUtf8() && lhs.IsUtf8()) {
332 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
333 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
334 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
335 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
336 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
337 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
338 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
339 } else if (rhs.IsUtf16()) {
340 return -1;
341 } else { // NOLINT(readability-else-after-return)
342 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
343 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
344 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
345 }
346 }
347
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)348 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
349 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
350 {
351 EcmaString *lhstring = *receiver;
352 EcmaString *rhstring = *search;
353 if (lhstring == nullptr || rhstring == nullptr) {
354 return -1;
355 }
356
357 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
358 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
359 if (lhsCount < rhsCount) {
360 return -1;
361 }
362
363 if (pos < 0) {
364 pos = 0;
365 }
366
367 if (pos > lhsCount) {
368 pos = lhsCount;
369 }
370
371 if (pos + rhsCount > lhsCount) {
372 pos = lhsCount - rhsCount;
373 }
374
375 if (rhsCount == 0) {
376 return pos;
377 }
378
379 FlatStringInfo lhs = FlattenAllString(vm, receiver);
380 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
381 FlatStringInfo rhs = FlattenAllString(vm, search);
382 lhs.SetString(*string);
383 if (rhs.IsUtf8() && lhs.IsUtf8()) {
384 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
385 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
386 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
387 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
388 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
389 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
390 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
391 } else if (rhs.IsUtf16()) {
392 return -1;
393 } else { // NOLINT(readability-else-after-return)
394 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
395 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
396 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
397 }
398 }
399
ToU16String(const JSThread * thread,uint32_t len)400 std::u16string EcmaString::ToU16String(const JSThread *thread, uint32_t len)
401 {
402 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
403 return Barriers::GetTaggedObject(thread, obj, offset);
404 };
405 return ToBaseString()->ToU16String(std::move(readBarrier), len);
406 }
407
408 // static
409 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 * dataFirst,size_t sizeFirst,const T2 * dataSecond,size_t sizeSecond)410 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
411 const T2 *dataSecond, size_t sizeSecond)
412 {
413 return BaseString::CalculateDataConcatHashCode(dataFirst, sizeFirst, dataSecond, sizeSecond);
414 }
415
416 // static
CalculateConcatHashCode(const JSThread * thread,const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)417 uint32_t EcmaString::CalculateConcatHashCode(const JSThread *thread, const JSHandle<EcmaString> &firstString,
418 const JSHandle<EcmaString> &secondString)
419 {
420 uint32_t hashCode;
421 uint32_t firstLength = firstString->GetLength();
422 uint32_t secondLength = secondString->GetLength();
423 if ((firstLength + secondLength < BaseString::MAX_ELEMENT_INDEX_LEN) &&
424 firstString->IsUtf8() && secondString->IsUtf8() &&
425 firstString->IsInteger(thread) && secondString->IsInteger(thread)) {
426 firstString->HashIntegerString(firstLength, &hashCode, 0);
427 secondString->HashIntegerString(secondLength, &hashCode, hashCode);
428 return hashCode;
429 }
430 bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
431 bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
432 EcmaString *firstStr = *firstString;
433 EcmaString *secondStr = *secondString;
434 CVector<uint8_t> bufFirstUint8;
435 CVector<uint8_t> bufSecondUint8;
436 CVector<uint16_t> bufFirstUint16;
437 CVector<uint16_t> bufSecondUint16;
438 if (isFirstStringUtf8 && isSecondStringUtf8) {
439 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(thread, firstStr, bufFirstUint8);
440 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(thread, secondStr, bufSecondUint8);
441 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
442 dataSecond, secondStr->GetLength());
443 }
444 if (!isFirstStringUtf8 && isSecondStringUtf8) {
445 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(thread, firstStr, bufFirstUint16);
446 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(thread, secondStr, bufSecondUint8);
447 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
448 dataSecond, secondStr->GetLength());
449 }
450 if (isFirstStringUtf8 && !isSecondStringUtf8) {
451 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(thread, firstStr, bufFirstUint8);
452 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(thread, secondStr, bufSecondUint16);
453 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
454 dataSecond, secondStr->GetLength());
455 }
456 {
457 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(thread, firstStr, bufFirstUint16);
458 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(thread, secondStr, bufSecondUint16);
459 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
460 dataSecond, secondStr->GetLength());
461 }
462 }
463
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const464 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
465 {
466 ASSERT(length >= 0);
467 Span<const uint8_t> str = FastToUtf8Span();
468 return BaseString::HashIntegerString(str.data(), length, hash, hashSeed);
469 }
470
471 // static
CanBeCompressed(const EcmaString * string)472 bool EcmaString::CanBeCompressed(const EcmaString *string)
473 {
474 return BaseString::CanBeCompressed(string->ToBaseString());
475 }
476
477 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)478 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
479 {
480 return BaseString::CanBeCompressed(utf8Data, utf8Len);
481 }
482
483 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)484 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
485 {
486 return BaseString::CanBeCompressed(utf16Data, utf16Len);
487 }
488
EqualToSplicedString(const JSThread * thread,const EcmaString * str1,const EcmaString * str2)489 bool EcmaString::EqualToSplicedString(const JSThread *thread, const EcmaString *str1, const EcmaString *str2)
490 {
491 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
492 return Barriers::GetTaggedObject(thread, obj, offset);
493 };
494 return ToBaseString()->EqualToSplicedString(std::move(readBarrier), str1->ToBaseString(), str2->ToBaseString());
495 }
496
497 /* static */
StringsAreEqualDiffUtfEncoding(const JSThread * thread,EcmaString * left,EcmaString * right)498 bool EcmaString::StringsAreEqualDiffUtfEncoding(const JSThread *thread, EcmaString *left, EcmaString *right)
499 {
500 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
501 return Barriers::GetTaggedObject(thread, obj, offset);
502 };
503 return BaseString::StringsAreEqualDiffUtfEncoding(std::move(readBarrier), left->ToBaseString(),
504 right->ToBaseString());
505 }
506
507 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)508 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
509 {
510 int32_t lhsCount = static_cast<int32_t>(left.GetLength());
511 int32_t rhsCount = static_cast<int32_t>(right.GetLength());
512 if (!left.IsUtf16() && !right.IsUtf16()) {
513 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
514 Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
515 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
516 } else if (!left.IsUtf16()) {
517 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
518 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
519 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
520 } else if (!right.IsUtf16()) {
521 Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
522 Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
523 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
524 } else {
525 Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
526 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
527 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
528 }
529 }
530
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)531 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
532 {
533 if (str1 == str2) {
534 return true;
535 }
536 if (str1->IsInternString() && str2->IsInternString()) {
537 return false;
538 }
539 uint32_t str1Len = str1->GetLength();
540 if (str1Len != str2->GetLength()) {
541 return false;
542 }
543 if (str1Len == 0) {
544 return true;
545 }
546
547 uint32_t str1Hash;
548 uint32_t str2Hash;
549 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
550 if (str1Hash != str2Hash) {
551 return false;
552 }
553 }
554 FlatStringInfo str1Flat = FlattenAllString(vm, str1);
555 JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
556 FlatStringInfo str2Flat = FlattenAllString(vm, str2);
557 str1Flat.SetString(*string);
558 return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
559 }
560
561 /* static */
StringIsEqualUint8Data(const JSThread * thread,const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)562 bool EcmaString::StringIsEqualUint8Data(const JSThread *thread, const EcmaString *str1, const uint8_t *dataAddr,
563 uint32_t dataLen, bool canBeCompressToUtf8)
564 {
565 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
566 return Barriers::GetTaggedObject(thread, obj, offset);
567 };
568 return BaseString::StringIsEqualUint8Data(std::move(readBarrier), str1->ToBaseString(), dataAddr, dataLen,
569 canBeCompressToUtf8);
570 }
571
572 /* static */
StringsAreEqualUtf16(const JSThread * thread,const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)573 bool EcmaString::StringsAreEqualUtf16(const JSThread *thread, const EcmaString *str1, const uint16_t *utf16Data,
574 uint32_t utf16Len)
575 {
576 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
577 return Barriers::GetTaggedObject(thread, obj, offset);
578 };
579 return BaseString::StringsAreEqualUtf16(std::move(readBarrier), str1->ToBaseString(), utf16Data, utf16Len);
580 }
581
582 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)583 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
584 {
585 ASSERT(dstMax >= count);
586 ASSERT(dst.Size() >= src.Size());
587 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
588 LOG_FULL(FATAL) << "memcpy_s failed";
589 UNREACHABLE();
590 }
591 return true;
592 }
593
594 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)595 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
596 {
597 return BaseString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
598 }
599
600 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)601 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
602 {
603 return BaseString::ComputeHashcodeUtf16(utf16Data, length);
604 }
605
ToElementIndex(const JSThread * thread,uint32_t * index)606 bool EcmaString::ToElementIndex(const JSThread *thread, uint32_t *index)
607 {
608 uint32_t len = GetLength();
609 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
610 return false;
611 }
612 if (UNLIKELY(IsUtf16())) {
613 return false;
614 }
615
616 CVector<uint8_t> buf;
617 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
618 constexpr uint64_t maxValue = std::numeric_limits<uint32_t>::max() - 1;
619 if (NumberHelper::StringToUint<uint32_t, uint8_t>(std::basic_string_view(data, GetLength()), *index, maxValue)) {
620 return true;
621 }
622 return false;
623 }
624
ToInt(const JSThread * thread,int32_t * index,bool * negative)625 bool EcmaString::ToInt(const JSThread *thread, int32_t *index, bool *negative)
626 {
627 uint32_t len = GetLength();
628 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
629 return false;
630 }
631 if (UNLIKELY(IsUtf16())) {
632 return false;
633 }
634 CVector<uint8_t> buf;
635 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
636 uint32_t c = data[0];
637 uint32_t loopStart = 0;
638 uint64_t n = 0;
639 if (c == '0') {
640 *index = 0;
641 return len == 1;
642 }
643 if (c == '-' && len > 1) {
644 *negative = true;
645 loopStart = 1;
646 }
647
648 if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
649 *index = *negative ? -n : n;
650 return true;
651 }
652 return false;
653 }
654
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)655 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
656 {
657 uint64_t n = 0;
658 uint32_t len = GetLength();
659 if (UNLIKELY(loopStart >= len)) {
660 return false;
661 }
662 for (uint32_t i = loopStart; i < len; i++) {
663 uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
664 if (c < '0' || c > '9') {
665 return false;
666 }
667 // NOLINTNEXTLINE(readability-magic-numbers)
668 n = n * 10 + (c - '0'); // 10: decimal factor
669 }
670 *index = n;
671 return true;
672 }
673
ToTypedArrayIndex(const JSThread * thread,uint32_t * index)674 bool EcmaString::ToTypedArrayIndex(const JSThread *thread, uint32_t *index)
675 {
676 uint32_t len = GetLength();
677 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
678 return false;
679 }
680 if (UNLIKELY(IsUtf16())) {
681 return false;
682 }
683
684 CVector<uint8_t> buf;
685 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
686 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
687 uint64_t n = 0;
688 if (c == '0') {
689 *index = 0;
690 return len == 1;
691 }
692 if (c > '0' && c <= '9') {
693 n = c - '0';
694 for (uint32_t i = 1; i < len; i++) {
695 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
696 if (c >= '0' && c <= '9') {
697 // NOLINTNEXTLINE(readability-magic-numbers)
698 n = n * 10 + (c - '0'); // 10: decimal factor
699 } else if (c == '.') {
700 n = JSObject::MAX_ELEMENT_INDEX;
701 break;
702 } else {
703 return false;
704 }
705 }
706 if (n < JSObject::MAX_ELEMENT_INDEX) {
707 *index = n;
708 return true;
709 } else {
710 *index = JSObject::MAX_ELEMENT_INDEX;
711 return true;
712 }
713 } else if (c == '-') {
714 *index = JSObject::MAX_ELEMENT_INDEX;
715 return true;
716 }
717 return false;
718 }
719
720 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)721 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
722 {
723 uint32_t srcLen = src->GetLength();
724 int32_t start = 0;
725 int32_t end = static_cast<int32_t>(srcLen) - 1;
726
727 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
728 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
729 }
730 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
731 end = base::StringHelper::GetEnd(data, start, srcLen);
732 }
733 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
734 return res;
735 }
736
737 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)738 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
739 {
740 auto srcFlat = FlattenAllString(vm, src);
741 uint32_t srcLength = srcFlat.GetLength();
742 auto factory = vm->GetFactory();
743 if (srcFlat.IsUtf16()) {
744 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
745 std::string res = base::StringHelper::ToLower(u16str);
746 return *(factory->NewFromStdString(res));
747 } else {
748 return ConvertUtf8ToLowerOrUpper(vm, src, true);
749 }
750 }
751
752 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)753 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
754 {
755 auto srcFlat = FlattenAllString(vm, src);
756 uint32_t srcLength = srcFlat.GetLength();
757 const char start = 'A';
758 const char end = 'Z';
759 uint32_t upperIndex = srcLength;
760 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
761 for (uint32_t index = 0; index < srcLength; ++index) {
762 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
763 upperIndex = index;
764 break;
765 }
766 }
767 if (upperIndex == srcLength) {
768 return *src;
769 }
770 return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
771 }
772
773 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)774 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
775 {
776 auto srcFlat = FlattenAllString(vm, src);
777 uint32_t srcLength = srcFlat.GetLength();
778 const char start = 'a';
779 const char end = 'z';
780 uint32_t lowerIndex = srcLength;
781 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
782 for (uint32_t index = 0; index < srcLength; ++index) {
783 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
784 lowerIndex = index;
785 break;
786 }
787 }
788 if (lowerIndex == srcLength) {
789 return *src;
790 }
791 return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
792 }
793
794 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)795 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
796 bool toLower, uint32_t startIndex)
797 {
798 const char start = toLower ? 'A' : 'a';
799 const char end = toLower ? 'Z' : 'z';
800 uint32_t srcLength = src->GetLength();
801 JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
802 auto srcFlat = FlattenAllString(vm, src);
803 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
804 auto newStringPtr = newString->GetDataUtf8Writable();
805 if (startIndex > 0) {
806 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
807 LOG_FULL(FATAL) << "memcpy_s failed";
808 UNREACHABLE();
809 }
810 }
811 for (uint32_t index = startIndex; index < srcLength; ++index) {
812 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
813 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
814 } else {
815 *(newStringPtr + index) = data[index];
816 }
817 }
818 return *newString;
819 }
820
821 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)822 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
823 {
824 FlatStringInfo srcFlat = FlattenAllString(vm, src);
825 uint32_t srcLength = srcFlat.GetLength();
826 auto factory = vm->GetFactory();
827 if (srcFlat.IsUtf16()) {
828 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
829 std::string res = base::StringHelper::ToUpper(u16str);
830 return *(factory->NewFromStdString(res));
831 } else {
832 return ConvertUtf8ToLowerOrUpper(vm, src, false);
833 }
834 }
835
836 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)837 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
838 {
839 auto factory = vm->GetFactory();
840 FlatStringInfo srcFlat = FlattenAllString(vm, src);
841 std::u16string utf16 = srcFlat.ToU16String();
842 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
843 return *(factory->NewFromStdString(res));
844 }
845
846 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)847 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
848 {
849 auto factory = vm->GetFactory();
850 FlatStringInfo srcFlat = FlattenAllString(vm, src);
851 std::u16string utf16 = srcFlat.ToU16String();
852 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
853 return *(factory->NewFromStdString(res));
854 }
855
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)856 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
857 {
858 FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
859 uint32_t srcLen = srcFlat.GetLength();
860 if (UNLIKELY(srcLen == 0)) {
861 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
862 }
863 if (srcFlat.IsUtf8()) {
864 Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
865 return TrimBody(thread, src, data, mode);
866 } else {
867 Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
868 return TrimBody(thread, src, data, mode);
869 }
870 }
871
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)872 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
873 {
874 ASSERT(string->IsTreeString() || string->IsSlicedString());
875 ASSERT(IsSMemSpace(type));
876 auto thread = vm->GetJSThread();
877 uint32_t length = string->GetLength();
878 EcmaString *result = nullptr;
879 if (string->IsUtf8()) {
880 result = CreateLineStringWithSpaceType(vm, length, true, type);
881 WriteToFlat<uint8_t>(thread, *string, result->GetDataUtf8Writable(), length);
882 } else {
883 result = CreateLineStringWithSpaceType(vm, length, false, type);
884 WriteToFlat<uint16_t>(thread, *string, result->GetDataUtf16Writable(), length);
885 }
886 if (string->IsTreeString()) {
887 JSHandle<TreeEcmaString> tree(string);
888 ASSERT(EcmaString::Cast(tree->GetSecond(thread))->GetLength() != 0);
889 tree->SetFirst(thread, JSTaggedValue(result));
890 tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
891 }
892 return result;
893 }
894
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)895 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
896 {
897 EcmaString *s = *string;
898 if (!s->IsTreeString()) {
899 return s;
900 }
901 JSThread *thread = vm->GetJSThread();
902 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
903 if (!tree->IsFlat(thread)) {
904 return SlowFlatten(vm, string, type);
905 }
906 return EcmaString::Cast(tree->GetFirst(thread));
907 }
908
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)909 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
910 {
911 ASSERT(IsSMemSpace(type));
912 EcmaString *s = *string;
913 uint32_t startIndex = 0;
914 if (s->IsLineString()) {
915 return FlatStringInfo(s, startIndex, s->GetLength());
916 }
917 JSThread *thread = vm->GetJSThread();
918 if (string->IsTreeString()) {
919 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
920 if (!tree->IsFlat(thread)) {
921 s = SlowFlatten(vm, string, type);
922 } else {
923 s = EcmaString::Cast(tree->GetFirst(thread));
924 }
925 } else if (string->IsSlicedString()) {
926 s = EcmaString::Cast(SlicedEcmaString::Cast(*string)->GetParent(thread));
927 startIndex = SlicedEcmaString::Cast(*string)->GetStartIndex();
928 }
929 return FlatStringInfo(s, startIndex, string->GetLength());
930 }
931
FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)932 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
933 {
934 DISALLOW_GARBAGE_COLLECTION;
935 if (string->IsLineString()) {
936 return string;
937 }
938 if (string->IsTreeString()) {
939 TreeEcmaString *tree = TreeEcmaString::Cast(string);
940 JSThread *thread = vm->GetJSThread();
941 if (tree->IsFlat(thread)) {
942 string = EcmaString::Cast(tree->GetFirst(thread));
943 } else {
944 uint32_t length = tree->GetLength();
945 EcmaString *result = nullptr;
946 if (tree->IsUtf8()) {
947 result = CreateLineStringNoGC(vm, length, true);
948 WriteToFlat<uint8_t>(thread, tree, result->GetDataUtf8Writable(), length);
949 } else {
950 result = CreateLineStringNoGC(vm, length, false);
951 WriteToFlat<uint16_t>(thread, tree, result->GetDataUtf16Writable(), length);
952 }
953 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
954 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
955 return result;
956 }
957 } else if (string->IsSlicedString()) {
958 SlicedEcmaString *str = SlicedEcmaString::Cast(string);
959 uint32_t length = str->GetLength();
960 JSThread *thread = vm->GetJSThread();
961 EcmaString *result = nullptr;
962 if (str->IsUtf8()) {
963 result = CreateLineStringNoGC(vm, length, true);
964 WriteToFlat<uint8_t>(thread, str, result->GetDataUtf8Writable(), length);
965 } else {
966 result = CreateLineStringNoGC(vm, length, false);
967 WriteToFlat<uint16_t>(thread, str, result->GetDataUtf16Writable(), length);
968 }
969 return result;
970 }
971 return string;
972 }
973
GetUtf8DataFlat(const JSThread * thread,const EcmaString * src,CVector<uint8_t> & buf)974 const uint8_t *EcmaString::GetUtf8DataFlat(const JSThread *thread, const EcmaString *src, CVector<uint8_t> &buf)
975 {
976 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
977 return Barriers::GetTaggedObject(thread, obj, offset);
978 };
979 return BaseString::GetUtf8DataFlat(std::move(readBarrier), src->ToBaseString(), buf);
980 }
981
GetNonTreeUtf8Data(const JSThread * thread,const EcmaString * src)982 const uint8_t *EcmaString::GetNonTreeUtf8Data(const JSThread *thread, const EcmaString *src)
983 {
984 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
985 return Barriers::GetTaggedObject(thread, obj, offset);
986 };
987 return BaseString::GetNonTreeUtf8Data(std::move(readBarrier), src->ToBaseString());
988 }
989
GetUtf16DataFlat(const JSThread * thread,const EcmaString * src,CVector<uint16_t> & buf)990 const uint16_t *EcmaString::GetUtf16DataFlat(const JSThread *thread, const EcmaString *src, CVector<uint16_t> &buf)
991 {
992 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
993 return Barriers::GetTaggedObject(thread, obj, offset);
994 };
995 return BaseString::GetUtf16DataFlat(std::move(readBarrier), src->ToBaseString(), buf);
996 }
997
GetNonTreeUtf16Data(const JSThread * thread,const EcmaString * src)998 const uint16_t *EcmaString::GetNonTreeUtf16Data(const JSThread *thread, const EcmaString *src)
999 {
1000 auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
1001 return Barriers::GetTaggedObject(thread, obj, offset);
1002 };
1003 return BaseString::GetNonTreeUtf16Data(std::move(readBarrier), src->ToBaseString());
1004 }
1005
ToU16String(uint32_t len)1006 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1007 {
1008 uint32_t length = len > 0 ? len : GetLength();
1009 std::u16string result;
1010 if (IsUtf16()) {
1011 const uint16_t *data = this->GetDataUtf16();
1012 result = base::StringHelper::Utf16ToU16String(data, length);
1013 } else {
1014 const uint8_t *data = this->GetDataUtf8();
1015 result = base::StringHelper::Utf8ToU16String(data, length);
1016 }
1017 return result;
1018 }
1019
EcmaStringAccessor(TaggedObject * obj)1020 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1021 {
1022 ASSERT(obj != nullptr);
1023 string_ = EcmaString::Cast(obj);
1024 }
1025
EcmaStringAccessor(JSTaggedValue value)1026 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1027 {
1028 ASSERT(value.IsString());
1029 string_ = EcmaString::Cast(value.GetTaggedObject());
1030 }
1031
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1032 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1033 : string_(*strHandle)
1034 {
1035 }
1036
ToStdString(const JSThread * thread,StringConvertedUsage usage)1037 std::string EcmaStringAccessor::ToStdString(const JSThread *thread, StringConvertedUsage usage)
1038 {
1039 if (string_ == nullptr) {
1040 return "";
1041 }
1042 bool modify = (usage != StringConvertedUsage::PRINT);
1043 CVector<uint8_t> buf;
1044 Span<const uint8_t> sp = string_->ToUtf8Span(thread, buf, modify);
1045 #if ENABLE_NEXT_OPTIMIZATION
1046 return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1047 #else
1048 std::string res;
1049 res.reserve(sp.size());
1050 for (const auto &c : sp) {
1051 res.push_back(c);
1052 }
1053 return res;
1054 #endif
1055 }
1056
Utf8ConvertToString(const JSThread * thread)1057 CString EcmaStringAccessor::Utf8ConvertToString(const JSThread *thread)
1058 {
1059 if (string_ == nullptr) {
1060 return CString("");
1061 }
1062 if (IsUtf8()) {
1063 std::string stdStr;
1064 if (IsLineString()) {
1065 return base::StringHelper::Utf8ToCString(GetDataUtf8(), GetLength());
1066 }
1067 CVector<uint8_t> buf;
1068 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1069 return base::StringHelper::Utf8ToCString(data, GetLength());
1070 } else {
1071 return ToCString(thread);
1072 }
1073 }
1074
DebuggerToStdString(const JSThread * thread,StringConvertedUsage usage)1075 std::string EcmaStringAccessor::DebuggerToStdString(const JSThread *thread, StringConvertedUsage usage)
1076 {
1077 if (string_ == nullptr) {
1078 return "";
1079 }
1080
1081 bool modify = (usage != StringConvertedUsage::PRINT);
1082 CVector<uint8_t> buf;
1083 Span<const uint8_t> sp = string_->DebuggerToUtf8Span(thread, buf, modify);
1084 #if ENABLE_NEXT_OPTIMIZATION
1085 return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1086 #else
1087 std::string res;
1088 res.reserve(sp.size());
1089 for (const auto &c : sp) {
1090 res.push_back(c);
1091 }
1092 return res;
1093 #endif
1094 }
1095
ToCString(const JSThread * thread,StringConvertedUsage usage,bool cesu8)1096 CString EcmaStringAccessor::ToCString(const JSThread *thread, StringConvertedUsage usage, bool cesu8)
1097 {
1098 if (string_ == nullptr) {
1099 return "";
1100 }
1101 bool modify = (usage != StringConvertedUsage::PRINT);
1102 CVector<uint8_t> buf;
1103 Span<const uint8_t> sp = string_->ToUtf8Span(thread, buf, modify, cesu8);
1104 #if ENABLE_NEXT_OPTIMIZATION
1105 return CString(reinterpret_cast<const char*>(sp.data()), sp.size());
1106 #else
1107 CString res;
1108 res.reserve(sp.size());
1109 for (const auto &c : sp) {
1110 res.push_back(c);
1111 }
1112 return res;
1113 #endif
1114 }
1115
1116 #if ENABLE_NEXT_OPTIMIZATION
AppendToCString(const JSThread * thread,CString & str)1117 void EcmaStringAccessor::AppendToCString(const JSThread *thread, CString &str)
1118 {
1119 if (string_ == nullptr) {
1120 return;
1121 }
1122
1123 size_t strLen = GetLength();
1124 CVector<uint8_t> buf;
1125 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1126 str.append(reinterpret_cast<const char *>(data), strLen);
1127 }
1128
AppendToC16String(const JSThread * thread,C16String & str)1129 void EcmaStringAccessor::AppendToC16String(const JSThread *thread, C16String &str)
1130 {
1131 if (string_ == nullptr) {
1132 return;
1133 }
1134 // used to append utf8 space to utf16 gap by stringify
1135 // In real world, space is usually utf8.
1136 if LIKELY(string_->IsUtf8()) {
1137 CVector<uint8_t> buf;
1138 const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1139 // only ascii codes, no need to convert to UTF-16, just append.
1140 AppendString(str, reinterpret_cast<const char*>(data), GetLength());
1141 } else {
1142 CVector<uint16_t> buf;
1143 const uint16_t *data = EcmaString::GetUtf16DataFlat(thread, string_, buf);
1144 str.append(reinterpret_cast<const char16_t *>(data), GetLength());
1145 }
1146 }
1147 #endif
1148 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1149 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1150 {
1151 return EcmaString::CreateLineString(vm, length, compressed);
1152 }
1153 } // namespace panda::ecmascript
1154