1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20
21 namespace panda::ecmascript {
22
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)23 EcmaString *EcmaString::Concat(const EcmaVM *vm,
24 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
25 {
26 // allocator may trig gc and move src, need to hold it
27 EcmaString *strLeft = *left;
28 EcmaString *strRight = *right;
29 uint32_t leftLength = strLeft->GetLength();
30 uint32_t rightLength = strRight->GetLength();
31 uint32_t newLength = leftLength + rightLength;
32 if (newLength == 0) {
33 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
34 }
35
36 if (leftLength == 0) {
37 if (type == MemSpaceType::OLD_SPACE) {
38 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*right));
39 if (objectRegion->InYoungSpace()) {
40 return CopyStringToOldSpace(vm, right, rightLength, strRight->IsUtf8());
41 }
42 }
43 return strRight;
44 }
45 if (rightLength == 0) {
46 if (type == MemSpaceType::OLD_SPACE) {
47 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*left));
48 if (objectRegion->InYoungSpace()) {
49 return CopyStringToOldSpace(vm, left, leftLength, strLeft->IsUtf8());
50 }
51 }
52 return strLeft;
53 }
54 // if the result string is small, make a LineString
55 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56 if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57 ASSERT(strLeft->IsLineOrConstantString());
58 ASSERT(strRight->IsLineOrConstantString());
59 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60 // retrieve strings after gc
61 strLeft = *left;
62 strRight = *right;
63 if (compressed) {
64 // copy left part
65 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68 // copy right part
69 sp = sp.SubSpan(leftLength);
70 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72 } else {
73 // copy left part
74 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75 if (strLeft->IsUtf8()) {
76 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77 } else {
78 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80 }
81 // copy right part
82 sp = sp.SubSpan(leftLength);
83 if (strRight->IsUtf8()) {
84 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85 } else {
86 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88 }
89 }
90 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91 return newString;
92 }
93 return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98 uint32_t length, bool compressed)
99 {
100 if (original->IsConstantString()) {
101 return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102 }
103 JSHandle<EcmaString> newString(vm->GetJSThread(),
104 CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105 auto strOrigin = FlattenAllString(vm, original);
106 if (compressed) {
107 // copy
108 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109 Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110 EcmaString::MemCopyChars(sp, length, srcSp, length);
111 } else {
112 // copy left part
113 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114 if (strOrigin.IsUtf8()) {
115 EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116 } else {
117 Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119 }
120 }
121 ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122 return *newString;
123 }
124
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129 ASSERT((start + length) <= src->GetLength());
130 if (length == 0) {
131 return *vm->GetFactory()->GetEmptyString();
132 }
133 if (start == 0 && length == src->GetLength()) {
134 return *src;
135 }
136 if (src->IsUtf8()) {
137 return FastSubUtf8String(vm, src, start, length);
138 }
139 return FastSubUtf16String(vm, src, start, length);
140 }
141
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146 ASSERT((start + length) <= src->GetLength());
147 JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148 FlatStringInfo srcFlat = FlattenAllString(vm, src);
149 slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150 slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151 slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152 return *slicedString;
153 }
154
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159 ASSERT((start + length) <= src->GetLength());
160 if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
161 if (start == 0 && length == src->GetLength()) {
162 return *src;
163 }
164 if (src->IsUtf16()) {
165 FlatStringInfo srcFlat = FlattenAllString(vm, src);
166 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
167 if (canBeCompressed) {
168 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
169 srcFlat = FlattenAllString(vm, src);
170 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
171 return *string;
172 }
173 }
174 return GetSlicedString(vm, src, start, length);
175 }
176 return FastSubString(vm, src, start, length);
177 }
178
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)179 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
180 {
181 ASSERT(IsLineString() && !IsConstantString());
182 if (IsUtf8()) {
183 ASSERT(src->IsUtf8());
184 CVector<uint8_t> buf;
185 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
186 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
187 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
188 LOG_FULL(FATAL) << "memcpy_s failed";
189 UNREACHABLE();
190 }
191 } else if (src->IsUtf8()) {
192 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
193 CVector<uint8_t> buf;
194 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
195 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
196 Span<const uint8_t> from(data, length);
197 for (uint32_t i = 0; i < length; i++) {
198 to[i] = from[i];
199 }
200 } else {
201 CVector<uint16_t> buf;
202 const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
203 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
205 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
206 LOG_FULL(FATAL) << "memcpy_s failed";
207 UNREACHABLE();
208 }
209 }
210 }
211
212 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)213 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
214 {
215 for (int32_t i = 0; i < count; ++i) {
216 auto left = static_cast<int32_t>(lhsSp[i]);
217 auto right = static_cast<int32_t>(rhsSp[i]);
218 if (left != right) {
219 return left - right;
220 }
221 }
222 return 0;
223 }
224
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)225 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
226 {
227 if (*left == *right) {
228 return 0;
229 }
230 FlatStringInfo lhs = FlattenAllString(vm, left);
231 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
232 FlatStringInfo rhs = FlattenAllString(vm, right);
233 lhs.SetString(*string);
234 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
235 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
236 int32_t countDiff = lhsCount - rhsCount;
237 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
238 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
239 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
240 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
241 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
242 if (charDiff != 0) {
243 return charDiff;
244 }
245 } else if (!lhs.IsUtf16()) {
246 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
247 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
248 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
249 if (charDiff != 0) {
250 return charDiff;
251 }
252 } else if (!rhs.IsUtf16()) {
253 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
254 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
255 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
256 if (charDiff != 0) {
257 return charDiff;
258 }
259 } else {
260 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
261 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
262 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
263 if (charDiff != 0) {
264 return charDiff;
265 }
266 }
267 return countDiff;
268 }
269
270 /* static */
271 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)272 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
273 {
274 ASSERT(rhsSp.size() > 0);
275 auto first = static_cast<int32_t>(rhsSp[0]);
276 for (int32_t i = pos; i <= max; i++) {
277 if (static_cast<int32_t>(lhsSp[i]) != first) {
278 i++;
279 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
280 i++;
281 }
282 }
283 /* Found first character, now look at the rest of rhsSp */
284 if (i <= max) {
285 int j = i + 1;
286 int end = j + static_cast<int>(rhsSp.size()) - 1;
287
288 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
289 }
290 if (j == end) {
291 /* Found whole string. */
292 return i;
293 }
294 }
295 }
296 return -1;
297 }
298
299 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)300 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
301 {
302 int rhsSize = static_cast<int>(rhsSp.size());
303 ASSERT(rhsSize > 0);
304 auto first = rhsSp[0];
305 for (int32_t i = pos; i >= 0; i--) {
306 if (lhsSp[i] != first) {
307 continue;
308 }
309 /* Found first character, now look at the rest of rhsSp */
310 int j = 1;
311 while (j < rhsSize) {
312 if (rhsSp[j] != lhsSp[i + j]) {
313 break;
314 }
315 j++;
316 }
317 if (j == rhsSize) {
318 return i;
319 }
320 }
321 return -1;
322 }
323
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)324 int32_t EcmaString::IndexOf(const EcmaVM *vm,
325 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
326 {
327 EcmaString *lhstring = *receiver;
328 EcmaString *rhstring = *search;
329 if (lhstring == nullptr || rhstring == nullptr) {
330 return -1;
331 }
332 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
333 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
334
335 if (pos > lhsCount) {
336 return -1;
337 }
338
339 if (rhsCount == 0) {
340 return pos;
341 }
342
343 if (pos < 0) {
344 pos = 0;
345 }
346
347 int32_t max = lhsCount - rhsCount;
348 if (max < 0) {
349 return -1;
350 }
351
352 if (pos + rhsCount > lhsCount) {
353 return -1;
354 }
355
356 FlatStringInfo lhs = FlattenAllString(vm, receiver);
357 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
358 FlatStringInfo rhs = FlattenAllString(vm, search);
359 lhs.SetString(*string);
360
361 if (rhs.IsUtf8() && lhs.IsUtf8()) {
362 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
363 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
364 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
365 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
366 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
367 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
368 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
369 } else if (rhs.IsUtf16()) {
370 return -1;
371 } else { // NOLINT(readability-else-after-return)
372 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
373 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
374 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
375 }
376 }
377
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)378 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
379 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
380 {
381 EcmaString *lhstring = *receiver;
382 EcmaString *rhstring = *search;
383 if (lhstring == nullptr || rhstring == nullptr) {
384 return -1;
385 }
386
387 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
388 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
389 if (lhsCount < rhsCount) {
390 return -1;
391 }
392
393 if (pos < 0) {
394 pos = 0;
395 }
396
397 if (pos > lhsCount) {
398 pos = lhsCount;
399 }
400
401 if (pos + rhsCount > lhsCount) {
402 pos = lhsCount - rhsCount;
403 }
404
405 if (rhsCount == 0) {
406 return pos;
407 }
408
409 FlatStringInfo lhs = FlattenAllString(vm, receiver);
410 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
411 FlatStringInfo rhs = FlattenAllString(vm, search);
412 lhs.SetString(*string);
413 if (rhs.IsUtf8() && lhs.IsUtf8()) {
414 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
415 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
416 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
417 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
418 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
419 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
420 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
421 } else if (rhs.IsUtf16()) {
422 return -1;
423 } else { // NOLINT(readability-else-after-return)
424 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
425 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
426 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
427 }
428 }
429
ToU16String(uint32_t len)430 std::u16string EcmaString::ToU16String(uint32_t len)
431 {
432 uint32_t length = len > 0 ? len : GetLength();
433 std::u16string result;
434 if (IsUtf16()) {
435 CVector<uint16_t> buf;
436 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
437 result = base::StringHelper::Utf16ToU16String(data, length);
438 } else {
439 CVector<uint8_t> buf;
440 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
441 result = base::StringHelper::Utf8ToU16String(data, length);
442 }
443 return result;
444 }
445
446 // static
CanBeCompressed(const EcmaString * string)447 bool EcmaString::CanBeCompressed(const EcmaString *string)
448 {
449 ASSERT(string->IsLineOrConstantString());
450 if (string->IsUtf8()) {
451 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
452 }
453 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
454 }
455
456 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)457 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
458 {
459 bool isCompressed = true;
460 uint32_t index = 0;
461 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
462 while (index < utf8Len) {
463 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
464 if (!IsASCIICharacter(utf8Data[index])) {
465 isCompressed = false;
466 break;
467 }
468 ++index;
469 }
470 return isCompressed;
471 }
472
473 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)474 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
475 {
476 bool isCompressed = true;
477 Span<const uint16_t> data(utf16Data, utf16Len);
478 for (uint32_t i = 0; i < utf16Len; i++) {
479 if (!IsASCIICharacter(data[i])) {
480 isCompressed = false;
481 break;
482 }
483 }
484 return isCompressed;
485 }
486
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)487 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
488 {
489 ASSERT(NotTreeString());
490 ASSERT(str1->NotTreeString() && str2->NotTreeString());
491 if (GetLength() != str1->GetLength() + str2->GetLength()) {
492 return false;
493 }
494 if (IsUtf16()) {
495 CVector<uint16_t> buf;
496 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
497 if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
498 return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
499 }
500 } else {
501 CVector<uint8_t> buf;
502 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
503 if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
504 return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
505 str2->GetLength(), this->IsUtf8());
506 }
507 }
508 return false;
509 }
510
511 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)512 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
513 {
514 CVector<uint16_t> bufLeftUft16;
515 CVector<uint16_t> bufRightUft16;
516 CVector<uint8_t> bufLeftUft8;
517 CVector<uint8_t> bufRightUft8;
518 int32_t lhsCount = static_cast<int32_t>(left->GetLength());
519 int32_t rhsCount = static_cast<int32_t>(right->GetLength());
520 if (!left->IsUtf16() && !right->IsUtf16()) {
521 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
522 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
523 Span<const uint8_t> lhsSp(data1, lhsCount);
524 Span<const uint8_t> rhsSp(data2, rhsCount);
525 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
526 } else if (!left->IsUtf16()) {
527 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
528 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
529 Span<const uint8_t> lhsSp(data1, lhsCount);
530 Span<const uint16_t> rhsSp(data2, rhsCount);
531 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
532 } else if (!right->IsUtf16()) {
533 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
534 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
535 Span<const uint16_t> lhsSp(data1, lhsCount);
536 Span<const uint8_t> rhsSp(data2, rhsCount);
537 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
538 } else {
539 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
540 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
541 Span<const uint16_t> lhsSp(data1, lhsCount);
542 Span<const uint16_t> rhsSp(data2, rhsCount);
543 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
544 }
545 }
546
547 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)548 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
549 {
550 int32_t lhsCount = static_cast<int32_t>(left.GetLength());
551 int32_t rhsCount = static_cast<int32_t>(right.GetLength());
552 if (!left.IsUtf16() && !right.IsUtf16()) {
553 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
554 Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
555 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
556 } else if (!left.IsUtf16()) {
557 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
558 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
559 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
560 } else if (!right.IsUtf16()) {
561 Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
562 Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
563 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
564 } else {
565 Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
566 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
567 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
568 }
569 }
570
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)571 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
572 {
573 if (str1 == str2) {
574 return true;
575 }
576 if (str1->IsInternString() && str2->IsInternString()) {
577 return false;
578 }
579 uint32_t str1Len = str1->GetLength();
580 if (str1Len != str2->GetLength()) {
581 return false;
582 }
583 if (str1Len == 0) {
584 return true;
585 }
586
587 uint32_t str1Hash;
588 uint32_t str2Hash;
589 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
590 if (str1Hash != str2Hash) {
591 return false;
592 }
593 }
594 FlatStringInfo str1Flat = FlattenAllString(vm, str1);
595 JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
596 FlatStringInfo str2Flat = FlattenAllString(vm, str2);
597 str1Flat.SetString(*string);
598 return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
599 }
600
601 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)602 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
603 {
604 if (str1 == str2) {
605 return true;
606 }
607 uint32_t str1Len = str1->GetLength();
608 if (str1Len != str2->GetLength()) {
609 return false;
610 }
611 if (str1Len == 0) {
612 return true;
613 }
614
615 uint32_t str1Hash;
616 uint32_t str2Hash;
617 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
618 if (str1Hash != str2Hash) {
619 return false;
620 }
621 }
622 return StringsAreEqualDiffUtfEncoding(str1, str2);
623 }
624
625 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)626 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
627 bool canBeCompressToUtf8)
628 {
629 if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
630 return false;
631 }
632 if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
633 return false;
634 }
635 if (str1->IsUtf8()) {
636 CVector<uint8_t> buf;
637 Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
638 Span<const uint8_t> data2(dataAddr, dataLen);
639 return EcmaString::StringsAreEquals(data1, data2);
640 }
641 CVector<uint16_t> buf;
642 uint32_t length = str1->GetLength();
643 const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
644 return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
645 }
646
647 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)648 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
649 {
650 uint32_t length = str1->GetLength();
651 if (length != utf16Len) {
652 return false;
653 }
654 if (str1->IsUtf8()) {
655 CVector<uint8_t> buf;
656 const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
657 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
658 } else {
659 CVector<uint16_t> buf;
660 Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
661 Span<const uint16_t> data2(utf16Data, utf16Len);
662 return EcmaString::StringsAreEquals(data1, data2);
663 }
664 }
665
666 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)667 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
668 {
669 ASSERT(dstMax >= count);
670 ASSERT(dst.Size() >= src.Size());
671 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
672 LOG_FULL(FATAL) << "memcpy_s failed";
673 UNREACHABLE();
674 }
675 return true;
676 }
677
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const678 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
679 {
680 ASSERT(length >= 0);
681 Span<const uint8_t> str = FastToUtf8Span();
682 return HashIntegerString(str.data(), length, hash, hashSeed);
683 }
684
ComputeHashcode() const685 uint32_t EcmaString::ComputeHashcode() const
686 {
687 auto [hash, isInteger] = ComputeRawHashcode();
688 return MixHashcode(hash, isInteger);
689 }
690
691 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const692 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
693 {
694 uint32_t hash = 0;
695 uint32_t length = GetLength();
696 if (length == 0) {
697 return {hash, false};
698 }
699
700 if (IsUtf8()) {
701 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
702 if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
703 return {hash, true};
704 }
705 CVector<uint8_t> buf;
706 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
707 // String can not convert to integer number, using normal hashcode computing algorithm.
708 hash = this->ComputeHashForData(data, length, 0);
709 return {hash, false};
710 } else {
711 CVector<uint16_t> buf;
712 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
713 // If rawSeed has certain value, and second string uses UTF16 encoding,
714 // then merged string can not be small integer number.
715 hash = this->ComputeHashForData(data, length, 0);
716 return {hash, false};
717 }
718 }
719
720 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const721 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
722 {
723 uint32_t hash;
724 uint32_t length = GetLength();
725 if (length == 0) {
726 return MixHashcode(rawHashSeed, isInteger);
727 }
728
729 if (IsUtf8()) {
730 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
731 if ((rawHashSeed == 0 || isInteger) &&
732 length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
733 return hash;
734 }
735 CVector<uint8_t> buf;
736 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
737 // String can not convert to integer number, using normal hashcode computing algorithm.
738 hash = this->ComputeHashForData(data, length, rawHashSeed);
739 return MixHashcode(hash, NOT_INTEGER);
740 } else {
741 CVector<uint16_t> buf;
742 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
743 // If rawSeed has certain value, and second string uses UTF16 encoding,
744 // then merged string can not be small integer number.
745 hash = this->ComputeHashForData(data, length, rawHashSeed);
746 return MixHashcode(hash, NOT_INTEGER);
747 }
748 }
749
750 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)751 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
752 {
753 uint32_t mixHash = 0;
754 if (canBeCompress) {
755 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
756 if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
757 return mixHash;
758 }
759 uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
760 return MixHashcode(hash, NOT_INTEGER);
761 } else {
762 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
763 CVector<uint16_t> tmpBuffer(utf16Len);
764 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
765 utf16Len, 0);
766 ASSERT(len == utf16Len);
767 uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
768 return MixHashcode(hash, NOT_INTEGER);
769 }
770 LOG_ECMA(FATAL) << "this branch is unreachable";
771 UNREACHABLE();
772 }
773
774 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)775 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
776 {
777 uint32_t mixHash = 0;
778 // String length smaller than 10, try to compute integer hash.
779 if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
780 return mixHash;
781 }
782 uint32_t hash = ComputeHashForData(utf16Data, length, 0);
783 return MixHashcode(hash, NOT_INTEGER);
784 }
785
786 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)787 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
788 uint32_t utf16Len)
789 {
790 size_t utf8Pos = 0;
791 size_t utf16Pos = 0;
792 while (utf8Pos < utf8Len) {
793 auto [pair, nbytes] = utf::ConvertMUtf8ToUtf16Pair(utf8Data, utf8Len - utf8Pos);
794 auto [pHigh, pLow] = utf::SplitUtf16Pair(pair);
795 utf8Data += nbytes;
796 utf8Pos += nbytes;
797 if (pHigh != 0) {
798 if (utf16Pos >= utf16Len - 1 || *utf16Data != pHigh) {
799 return false;
800 }
801 ++utf16Pos;
802 ++utf16Data;
803 }
804 if (utf16Pos >= utf16Len || *utf16Data != pLow) {
805 return false;
806 }
807 ++utf16Pos;
808 ++utf16Data;
809 }
810 return true;
811 }
812
ToElementIndex(uint32_t * index)813 bool EcmaString::ToElementIndex(uint32_t *index)
814 {
815 uint32_t len = GetLength();
816 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
817 return false;
818 }
819 if (UNLIKELY(IsUtf16())) {
820 return false;
821 }
822
823 // fast path: get integer from string's hash value
824 if (TryToGetInteger(index)) {
825 return true;
826 }
827
828 CVector<uint8_t> buf;
829 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
830 uint32_t c = data[0];
831 uint64_t n = 0;
832 if (c == '0') {
833 *index = 0;
834 return len == 1;
835 }
836 uint32_t loopStart = 0;
837 if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
838 *index = n;
839 return true;
840 }
841 return false;
842 }
843
ToInt(int32_t * index,bool * negative)844 bool EcmaString::ToInt(int32_t *index, bool *negative)
845 {
846 uint32_t len = GetLength();
847 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
848 return false;
849 }
850 if (UNLIKELY(IsUtf16())) {
851 return false;
852 }
853 CVector<uint8_t> buf;
854 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
855 uint32_t c = data[0];
856 uint32_t loopStart = 0;
857 uint64_t n = 0;
858 if (c == '0') {
859 *index = 0;
860 return len == 1;
861 }
862 if (c == '-' && len > 1) {
863 *negative = true;
864 loopStart = 1;
865 }
866
867 if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
868 *index = *negative ? -n : n;
869 return true;
870 }
871 return false;
872 }
873
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)874 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
875 {
876 uint64_t n = 0;
877 uint32_t len = GetLength();
878 if (UNLIKELY(loopStart >= len)) {
879 return false;
880 }
881 for (uint32_t i = loopStart; i < len; i++) {
882 uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
883 if (c < '0' || c > '9') {
884 return false;
885 }
886 // NOLINTNEXTLINE(readability-magic-numbers)
887 n = n * 10 + (c - '0'); // 10: decimal factor
888 }
889 *index = n;
890 return true;
891 }
892
ToTypedArrayIndex(uint32_t * index)893 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
894 {
895 uint32_t len = GetLength();
896 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
897 return false;
898 }
899 if (UNLIKELY(IsUtf16())) {
900 return false;
901 }
902
903 CVector<uint8_t> buf;
904 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
905 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
906 uint64_t n = 0;
907 if (c == '0') {
908 *index = 0;
909 return len == 1;
910 }
911 if (c > '0' && c <= '9') {
912 n = c - '0';
913 for (uint32_t i = 1; i < len; i++) {
914 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
915 if (c >= '0' && c <= '9') {
916 // NOLINTNEXTLINE(readability-magic-numbers)
917 n = n * 10 + (c - '0'); // 10: decimal factor
918 } else if (c == '.') {
919 n = JSObject::MAX_ELEMENT_INDEX;
920 break;
921 } else {
922 return false;
923 }
924 }
925 if (n < JSObject::MAX_ELEMENT_INDEX) {
926 *index = n;
927 return true;
928 } else {
929 *index = JSObject::MAX_ELEMENT_INDEX;
930 return true;
931 }
932 } else if (c == '-') {
933 *index = JSObject::MAX_ELEMENT_INDEX;
934 return true;
935 }
936 return false;
937 }
938
939 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)940 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
941 {
942 uint32_t srcLen = src->GetLength();
943 int32_t start = 0;
944 int32_t end = static_cast<int32_t>(srcLen) - 1;
945
946 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
947 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
948 }
949 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
950 end = base::StringHelper::GetEnd(data, start, srcLen);
951 }
952 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
953 return res;
954 }
955
956 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)957 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
958 {
959 auto srcFlat = FlattenAllString(vm, src);
960 uint32_t srcLength = srcFlat.GetLength();
961 auto factory = vm->GetFactory();
962 if (srcFlat.IsUtf16()) {
963 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
964 std::string res = base::StringHelper::ToLower(u16str);
965 return *(factory->NewFromStdString(res));
966 } else {
967 return ConvertUtf8ToLowerOrUpper(vm, src, true);
968 }
969 }
970
971 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)972 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
973 {
974 auto srcFlat = FlattenAllString(vm, src);
975 uint32_t srcLength = srcFlat.GetLength();
976 const char start = 'A';
977 const char end = 'Z';
978 uint32_t upperIndex = srcLength;
979 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
980 for (uint32_t index = 0; index < srcLength; ++index) {
981 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
982 upperIndex = index;
983 break;
984 }
985 }
986 if (upperIndex == srcLength) {
987 return *src;
988 }
989 return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
990 }
991
992 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)993 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
994 bool toLower, uint32_t startIndex)
995 {
996 const char start = toLower ? 'A' : 'a';
997 const char end = toLower ? 'Z' : 'z';
998 uint32_t srcLength = src->GetLength();
999 JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1000 auto srcFlat = FlattenAllString(vm, src);
1001 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1002 auto newStringPtr = newString->GetDataUtf8Writable();
1003 if (startIndex > 0) {
1004 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1005 LOG_FULL(FATAL) << "memcpy_s failed";
1006 UNREACHABLE();
1007 }
1008 }
1009 for (uint32_t index = startIndex; index < srcLength; ++index) {
1010 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1011 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
1012 } else {
1013 *(newStringPtr + index) = data[index];
1014 }
1015 }
1016 return *newString;
1017 }
1018
1019 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1020 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1021 {
1022 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1023 uint32_t srcLength = srcFlat.GetLength();
1024 auto factory = vm->GetFactory();
1025 if (srcFlat.IsUtf16()) {
1026 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1027 std::string res = base::StringHelper::ToUpper(u16str);
1028 return *(factory->NewFromStdString(res));
1029 } else {
1030 return ConvertUtf8ToLowerOrUpper(vm, src, false);
1031 }
1032 }
1033
1034 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1035 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1036 {
1037 auto factory = vm->GetFactory();
1038 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1039 std::u16string utf16 = srcFlat.ToU16String();
1040 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1041 return *(factory->NewFromStdString(res));
1042 }
1043
1044 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1045 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1046 {
1047 auto factory = vm->GetFactory();
1048 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1049 std::u16string utf16 = srcFlat.ToU16String();
1050 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1051 return *(factory->NewFromStdString(res));
1052 }
1053
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1054 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1055 {
1056 FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1057 uint32_t srcLen = srcFlat.GetLength();
1058 if (UNLIKELY(srcLen == 0)) {
1059 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1060 }
1061 if (srcFlat.IsUtf8()) {
1062 Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1063 return TrimBody(thread, src, data, mode);
1064 } else {
1065 Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1066 return TrimBody(thread, src, data, mode);
1067 }
1068 }
1069
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1070 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1071 {
1072 ASSERT(string->IsTreeString() || string->IsSlicedString());
1073 auto thread = vm->GetJSThread();
1074 uint32_t length = string->GetLength();
1075 EcmaString *result = nullptr;
1076 if (string->IsUtf8()) {
1077 result = CreateLineStringWithSpaceType(vm, length, true, type);
1078 WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1079 } else {
1080 result = CreateLineStringWithSpaceType(vm, length, false, type);
1081 WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1082 }
1083 if (string->IsTreeString()) {
1084 JSHandle<TreeEcmaString> tree(string);
1085 ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1086 tree->SetFirst(thread, JSTaggedValue(result));
1087 tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1088 }
1089 return result;
1090 }
1091
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1092 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1093 {
1094 EcmaString *s = *string;
1095 if (s->IsLineOrConstantString() || s->IsSlicedString()) {
1096 return s;
1097 }
1098 if (s->IsTreeString()) {
1099 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1100 if (!tree->IsFlat()) {
1101 return SlowFlatten(vm, string, type);
1102 }
1103 s = EcmaString::Cast(tree->GetFirst());
1104 }
1105 return s;
1106 }
1107
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1108 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1109 {
1110 EcmaString *s = *string;
1111 uint32_t startIndex = 0;
1112 if (s->IsLineOrConstantString()) {
1113 return FlatStringInfo(s, startIndex, s->GetLength());
1114 }
1115 if (string->IsTreeString()) {
1116 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1117 if (!tree->IsFlat()) {
1118 s = SlowFlatten(vm, string, type);
1119 } else {
1120 s = EcmaString::Cast(tree->GetFirst());
1121 }
1122 } else if (string->IsSlicedString()) {
1123 s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1124 startIndex = SlicedString::Cast(*string)->GetStartIndex();
1125 }
1126 return FlatStringInfo(s, startIndex, string->GetLength());
1127 }
1128
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1129 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1130 {
1131 DISALLOW_GARBAGE_COLLECTION;
1132 if (string->IsLineOrConstantString()) {
1133 return string;
1134 }
1135 if (string->IsTreeString()) {
1136 TreeEcmaString *tree = TreeEcmaString::Cast(string);
1137 if (tree->IsFlat()) {
1138 string = EcmaString::Cast(tree->GetFirst());
1139 } else {
1140 uint32_t length = tree->GetLength();
1141 EcmaString *result = nullptr;
1142 if (tree->IsUtf8()) {
1143 result = CreateLineStringNoGC(vm, length, true);
1144 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1145 } else {
1146 result = CreateLineStringNoGC(vm, length, false);
1147 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1148 }
1149 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1150 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1151 return result;
1152 }
1153 } else if (string->IsSlicedString()) {
1154 SlicedString *str = SlicedString::Cast(string);
1155 uint32_t length = str->GetLength();
1156 EcmaString *result = nullptr;
1157 if (str->IsUtf8()) {
1158 result = CreateLineStringNoGC(vm, length, true);
1159 WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1160 } else {
1161 result = CreateLineStringNoGC(vm, length, false);
1162 WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1163 }
1164 return result;
1165 }
1166 return string;
1167 }
1168
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1169 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1170 {
1171 ASSERT(src->IsUtf8());
1172 uint32_t length = src->GetLength();
1173 EcmaString *string = const_cast<EcmaString *>(src);
1174 if (string->IsTreeString()) {
1175 if (string->IsFlat()) {
1176 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1177 } else {
1178 buf.reserve(length);
1179 WriteToFlat(string, buf.data(), length);
1180 return buf.data();
1181 }
1182 } else if (string->IsSlicedString()) {
1183 SlicedString *str = SlicedString::Cast(string);
1184 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1185 }
1186 return string->GetDataUtf8();
1187 }
1188
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1189 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1190 {
1191 ASSERT(src->IsUtf16());
1192 uint32_t length = src->GetLength();
1193 EcmaString *string = const_cast<EcmaString *>(src);
1194 if (string->IsTreeString()) {
1195 if (string->IsFlat()) {
1196 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1197 } else {
1198 buf.reserve(length);
1199 WriteToFlat(string, buf.data(), length);
1200 return buf.data();
1201 }
1202 } else if (string->IsSlicedString()) {
1203 SlicedString *str = SlicedString::Cast(string);
1204 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1205 }
1206 return string->GetDataUtf16();
1207 }
1208
ToU16String(uint32_t len)1209 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1210 {
1211 uint32_t length = len > 0 ? len : GetLength();
1212 std::u16string result;
1213 if (IsUtf16()) {
1214 const uint16_t *data = this->GetDataUtf16();
1215 result = base::StringHelper::Utf16ToU16String(data, length);
1216 } else {
1217 const uint8_t *data = this->GetDataUtf8();
1218 result = base::StringHelper::Utf8ToU16String(data, length);
1219 }
1220 return result;
1221 }
1222
EcmaStringAccessor(EcmaString * string)1223 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
1224 {
1225 ASSERT(string != nullptr);
1226 string_ = string;
1227 }
1228
EcmaStringAccessor(TaggedObject * obj)1229 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1230 {
1231 ASSERT(obj != nullptr);
1232 string_ = EcmaString::Cast(obj);
1233 }
1234
EcmaStringAccessor(JSTaggedValue value)1235 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1236 {
1237 ASSERT(value.IsString());
1238 string_ = EcmaString::Cast(value.GetTaggedObject());
1239 }
1240
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1241 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1242 : string_(*strHandle)
1243 {
1244 }
1245
ToStdString(StringConvertedUsage usage)1246 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1247 {
1248 if (string_ == nullptr) {
1249 return "";
1250 }
1251 bool modify = (usage != StringConvertedUsage::PRINT);
1252 CVector<uint8_t> buf;
1253 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1254 std::string res;
1255 res.reserve(sp.size());
1256 for (const auto &c : sp) {
1257 res.push_back(c);
1258 }
1259 return res;
1260 }
1261
DebuggerToStdString(StringConvertedUsage usage)1262 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1263 {
1264 if (string_ == nullptr) {
1265 return "";
1266 }
1267
1268 bool modify = (usage != StringConvertedUsage::PRINT);
1269 CVector<uint8_t> buf;
1270 Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1271 std::string res;
1272 res.reserve(sp.size());
1273 for (const auto &c : sp) {
1274 res.push_back(c);
1275 }
1276 return res;
1277 }
1278
ToCString(StringConvertedUsage usage)1279 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
1280 {
1281 if (string_ == nullptr) {
1282 return "";
1283 }
1284 bool modify = (usage != StringConvertedUsage::PRINT);
1285 CVector<uint8_t> buf;
1286 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1287 CString res;
1288 res.reserve(sp.size());
1289 for (const auto &c : sp) {
1290 res.push_back(c);
1291 }
1292 return res;
1293 }
1294 } // namespace panda::ecmascript
1295