1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/base/json_helper.h"
19
20 namespace panda::ecmascript {
21
22 constexpr size_t LOW_3BITS = 0x7;
23 constexpr size_t LOW_4BITS = 0xF;
24 constexpr size_t LOW_5BITS = 0x1F;
25 constexpr size_t LOW_6BITS = 0x3F;
26 constexpr size_t L_SURROGATE_START = 0xDC00;
27 constexpr size_t H_SURROGATE_START = 0xD800;
28 constexpr size_t SURROGATE_RAIR_START = 0x10000;
29 constexpr size_t OFFSET_18POS = 18;
30 constexpr size_t OFFSET_12POS = 12;
31 constexpr size_t OFFSET_10POS = 10;
32 constexpr size_t OFFSET_6POS = 6;
33
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)34 EcmaString *EcmaString::Concat(const EcmaVM *vm,
35 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
36 {
37 ASSERT(IsSMemSpace(type));
38 // allocator may trig gc and move src, need to hold it
39 EcmaString *strLeft = *left;
40 EcmaString *strRight = *right;
41 uint32_t leftLength = strLeft->GetLength();
42 uint32_t rightLength = strRight->GetLength();
43 uint32_t newLength = leftLength + rightLength;
44 if (newLength == 0) {
45 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
46 }
47
48 if (leftLength == 0) {
49 return strRight;
50 }
51 if (rightLength == 0) {
52 return strLeft;
53 }
54 // if the result string is small, make a LineString
55 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56 if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57 ASSERT(strLeft->IsLineOrConstantString());
58 ASSERT(strRight->IsLineOrConstantString());
59 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60 // retrieve strings after gc
61 strLeft = *left;
62 strRight = *right;
63 if (compressed) {
64 // copy left part
65 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68 // copy right part
69 sp = sp.SubSpan(leftLength);
70 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72 } else {
73 // copy left part
74 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75 if (strLeft->IsUtf8()) {
76 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77 } else {
78 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80 }
81 // copy right part
82 sp = sp.SubSpan(leftLength);
83 if (strRight->IsUtf8()) {
84 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85 } else {
86 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88 }
89 }
90 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91 return newString;
92 }
93 return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98 uint32_t length, bool compressed)
99 {
100 if (original->IsConstantString()) {
101 return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102 }
103 JSHandle<EcmaString> newString(vm->GetJSThread(),
104 CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105 auto strOrigin = FlattenAllString(vm, original);
106 if (compressed) {
107 // copy
108 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109 Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110 EcmaString::MemCopyChars(sp, length, srcSp, length);
111 } else {
112 // copy left part
113 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114 if (strOrigin.IsUtf8()) {
115 EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116 } else {
117 Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119 }
120 }
121 ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122 return *newString;
123 }
124
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129 ASSERT((start + length) <= src->GetLength());
130 if (length == 0) {
131 return *vm->GetFactory()->GetEmptyString();
132 }
133 if (start == 0 && length == src->GetLength()) {
134 return *src;
135 }
136 if (src->IsUtf8()) {
137 return FastSubUtf8String(vm, src, start, length);
138 }
139 return FastSubUtf16String(vm, src, start, length);
140 }
141
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146 ASSERT((start + length) <= src->GetLength());
147 JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148 FlatStringInfo srcFlat = FlattenAllString(vm, src);
149 slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150 slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151 slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152 return *slicedString;
153 }
154
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159 ASSERT((start + length) <= src->GetLength());
160 if (length == 1) {
161 JSThread *thread = vm->GetJSThread();
162 uint16_t res = EcmaStringAccessor(src).Get<false>(start);
163 if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164 JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165 return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
166 }
167 }
168 if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
169 if (start == 0 && length == src->GetLength()) {
170 return *src;
171 }
172 if (src->IsUtf16()) {
173 FlatStringInfo srcFlat = FlattenAllString(vm, src);
174 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175 if (canBeCompressed) {
176 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177 srcFlat = FlattenAllString(vm, src);
178 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179 return *string;
180 }
181 }
182 return GetSlicedString(vm, src, start, length);
183 }
184 return FastSubString(vm, src, start, length);
185 }
186
SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)187 bool EcmaString::SubStringIsUtf8(const EcmaVM *vm,
188 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
189 {
190 ASSERT((start + length) <= src->GetLength());
191 if (length == 0) {
192 return true;
193 }
194 if (src->IsUtf8()) {
195 return true;
196 }
197 FlatStringInfo srcFlat = FlattenAllString(vm, src);
198 return CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
199 }
200
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)201 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
202 {
203 ASSERT(IsLineString() && !IsConstantString());
204 if (IsUtf8()) {
205 ASSERT(src->IsUtf8());
206 CVector<uint8_t> buf;
207 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
208 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
209 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
210 LOG_FULL(FATAL) << "memcpy_s failed";
211 UNREACHABLE();
212 }
213 } else if (src->IsUtf8()) {
214 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
215 CVector<uint8_t> buf;
216 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
217 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
218 Span<const uint8_t> from(data, length);
219 for (uint32_t i = 0; i < length; i++) {
220 to[i] = from[i];
221 }
222 } else {
223 CVector<uint16_t> buf;
224 const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
225 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
226 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
227 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
228 LOG_FULL(FATAL) << "memcpy_s failed";
229 UNREACHABLE();
230 }
231 }
232 }
233
234 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)235 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
236 {
237 for (int32_t i = 0; i < count; ++i) {
238 auto left = static_cast<int32_t>(lhsSp[i]);
239 auto right = static_cast<int32_t>(rhsSp[i]);
240 if (left != right) {
241 return left - right;
242 }
243 }
244 return 0;
245 }
246
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)247 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
248 {
249 if (*left == *right) {
250 return 0;
251 }
252 FlatStringInfo lhs = FlattenAllString(vm, left);
253 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
254 FlatStringInfo rhs = FlattenAllString(vm, right);
255 lhs.SetString(*string);
256 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
257 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
258 int32_t countDiff = lhsCount - rhsCount;
259 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
260 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
261 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
262 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
263 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
264 if (charDiff != 0) {
265 return charDiff;
266 }
267 } else if (!lhs.IsUtf16()) {
268 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
269 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
270 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
271 if (charDiff != 0) {
272 return charDiff;
273 }
274 } else if (!rhs.IsUtf16()) {
275 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
276 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
277 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
278 if (charDiff != 0) {
279 return charDiff;
280 }
281 } else {
282 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
283 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
284 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
285 if (charDiff != 0) {
286 return charDiff;
287 }
288 }
289 return countDiff;
290 }
291
292 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,uint32_t offset)293 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
294 {
295 int rhsSize = static_cast<int>(rhsSp.size());
296 ASSERT(rhsSize + offset <= lhsSp.size());
297 for (int i = 0; i < rhsSize; ++i) {
298 auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
299 auto right = static_cast<int32_t>(rhsSp[i]);
300 if (left != right) {
301 return false;
302 }
303 }
304 return true;
305 }
306
307
308 /**
309 * left: text string
310 * right: pattern string
311 * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
312 * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
313 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)314 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
315 const JSHandle<EcmaString>& right, uint32_t offset)
316 {
317 FlatStringInfo lhs = FlattenAllString(vm, left);
318 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
319 FlatStringInfo rhs = FlattenAllString(vm, right);
320 lhs.SetString(*string);
321 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
322 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
323 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
324 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
325 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
326 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
327 } else if (!lhs.IsUtf16()) {
328 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
329 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
330 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
331 } else if (!rhs.IsUtf16()) {
332 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
333 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
334 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
335 } else {
336 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
337 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
338 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
339 }
340 return false;
341 }
342
343 /* static */
344 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)345 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
346 {
347 ASSERT(rhsSp.size() > 0);
348 auto first = static_cast<int32_t>(rhsSp[0]);
349 for (int32_t i = pos; i <= max; i++) {
350 if (static_cast<int32_t>(lhsSp[i]) != first) {
351 i++;
352 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
353 i++;
354 }
355 }
356 /* Found first character, now look at the rest of rhsSp */
357 if (i <= max) {
358 int j = i + 1;
359 int end = j + static_cast<int>(rhsSp.size()) - 1;
360
361 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
362 }
363 if (j == end) {
364 /* Found whole string. */
365 return i;
366 }
367 }
368 }
369 return -1;
370 }
371
372 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)373 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
374 {
375 int rhsSize = static_cast<int>(rhsSp.size());
376 ASSERT(rhsSize > 0);
377 auto first = rhsSp[0];
378 for (int32_t i = pos; i >= 0; i--) {
379 if (lhsSp[i] != first) {
380 continue;
381 }
382 /* Found first character, now look at the rest of rhsSp */
383 int j = 1;
384 while (j < rhsSize) {
385 if (rhsSp[j] != lhsSp[i + j]) {
386 break;
387 }
388 j++;
389 }
390 if (j == rhsSize) {
391 return i;
392 }
393 }
394 return -1;
395 }
396
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)397 int32_t EcmaString::IndexOf(const EcmaVM *vm,
398 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
399 {
400 EcmaString *lhstring = *receiver;
401 EcmaString *rhstring = *search;
402 if (lhstring == nullptr || rhstring == nullptr) {
403 return -1;
404 }
405 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
406 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
407
408 if (pos > lhsCount) {
409 return -1;
410 }
411
412 if (rhsCount == 0) {
413 return pos;
414 }
415
416 if (pos < 0) {
417 pos = 0;
418 }
419
420 int32_t max = lhsCount - rhsCount;
421 if (max < 0) {
422 return -1;
423 }
424
425 if (pos + rhsCount > lhsCount) {
426 return -1;
427 }
428
429 FlatStringInfo lhs = FlattenAllString(vm, receiver);
430 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
431 FlatStringInfo rhs = FlattenAllString(vm, search);
432 lhs.SetString(*string);
433
434 if (rhs.IsUtf8() && lhs.IsUtf8()) {
435 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
436 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
437 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
438 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
439 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
440 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
441 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
442 } else if (rhs.IsUtf16()) {
443 return -1;
444 } else { // NOLINT(readability-else-after-return)
445 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
446 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
447 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
448 }
449 }
450
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)451 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
452 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
453 {
454 EcmaString *lhstring = *receiver;
455 EcmaString *rhstring = *search;
456 if (lhstring == nullptr || rhstring == nullptr) {
457 return -1;
458 }
459
460 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
461 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
462 if (lhsCount < rhsCount) {
463 return -1;
464 }
465
466 if (pos < 0) {
467 pos = 0;
468 }
469
470 if (pos > lhsCount) {
471 pos = lhsCount;
472 }
473
474 if (pos + rhsCount > lhsCount) {
475 pos = lhsCount - rhsCount;
476 }
477
478 if (rhsCount == 0) {
479 return pos;
480 }
481
482 FlatStringInfo lhs = FlattenAllString(vm, receiver);
483 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
484 FlatStringInfo rhs = FlattenAllString(vm, search);
485 lhs.SetString(*string);
486 if (rhs.IsUtf8() && lhs.IsUtf8()) {
487 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
488 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
489 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
490 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
491 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
492 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
493 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
494 } else if (rhs.IsUtf16()) {
495 return -1;
496 } else { // NOLINT(readability-else-after-return)
497 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
498 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
499 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
500 }
501 }
502
ToU16String(uint32_t len)503 std::u16string EcmaString::ToU16String(uint32_t len)
504 {
505 uint32_t length = len > 0 ? len : GetLength();
506 std::u16string result;
507 if (IsUtf16()) {
508 CVector<uint16_t> buf;
509 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
510 result = base::StringHelper::Utf16ToU16String(data, length);
511 } else {
512 CVector<uint8_t> buf;
513 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
514 result = base::StringHelper::Utf8ToU16String(data, length);
515 }
516 return result;
517 }
518
519 //static
CalculateAllConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)520 uint32_t EcmaString::CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
521 const JSHandle<EcmaString> &secondString)
522 {
523 uint32_t hashCode;
524 uint32_t firstLength = firstString->GetLength();
525 uint32_t secondLength = secondString->GetLength();
526 if ((firstLength + secondLength < MAX_ELEMENT_INDEX_LEN) &&
527 firstString->IsUtf8() && secondString->IsUtf8() &&
528 firstString->IsInteger() && secondString->IsInteger()) {
529 firstString->HashIntegerString(firstLength, &hashCode, 0);
530 secondString->HashIntegerString(secondLength, &hashCode, hashCode);
531 return hashCode;
532 }
533 hashCode = EcmaString::CalculateConcatHashCode(firstString, secondString);
534 hashCode = MixHashcode(hashCode, NOT_INTEGER);
535 return hashCode;
536 }
537
538 // static
539 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 * dataFirst,size_t sizeFirst,const T2 * dataSecond,size_t sizeSecond)540 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
541 const T2 *dataSecond, size_t sizeSecond)
542 {
543 uint32_t totalHash = 0;
544 constexpr uint32_t hashShift = static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT);
545 constexpr uint32_t blockSize = static_cast<size_t>(EcmaStringHash::BLOCK_SIZE);
546 // The concatenated length of the two strings is less than MIN_SIZE_FOR_UNROLLING.
547 if (sizeFirst + sizeSecond <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
548 for (uint32_t i = 0; i < sizeFirst; i++) {
549 totalHash = (totalHash << hashShift) - totalHash + dataFirst[i];
550 }
551 for (uint32_t i = 0; i < sizeSecond; i++) {
552 totalHash = (totalHash << hashShift) - totalHash + dataSecond[i];
553 }
554 return totalHash;
555 }
556 // Process the entire block of the first string.
557 uint32_t hash[blockSize] = {0};
558 uint32_t index = 0;
559 for (; index + blockSize <= sizeFirst; index += blockSize) {
560 hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
561 hash[1] = (hash[1] << hashShift) - hash[1] + dataFirst[index + 1]; // 1: the second element
562 hash[2] = (hash[2] << hashShift) - hash[2] + dataFirst[index + 2]; // 2: the third element
563 hash[3] = (hash[3] << hashShift) - hash[3] + dataFirst[index + 3]; // 3: the fourth element
564 }
565 // The remaining total string length is less than a whole block.
566 if ((sizeFirst % blockSize) + sizeSecond < blockSize) {
567 for (; index < sizeFirst; ++index) {
568 hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
569 }
570 index = 0;
571 } else {
572 //Calculate the non-integral block portion at the end of the first string.
573 for (; index < sizeFirst; ++index) {
574 hash[index % blockSize] = (hash[index % blockSize] << hashShift) -
575 hash[index % blockSize] + dataFirst[index];
576 }
577 //Calculate the portion of the second string
578 //that starts and aligns with an integral block at the end of the first string.
579 uint32_t wholeBlockRemain = (blockSize - sizeFirst % blockSize) % blockSize;
580 index = 0;
581 for (; index < wholeBlockRemain && index < sizeSecond; ++index) {
582 uint32_t nowHashIndex = sizeFirst % blockSize + index;
583 hash[nowHashIndex] = (hash[nowHashIndex] << hashShift) - hash[nowHashIndex] + dataSecond[index];
584 }
585 // Process the entire block of the Second string.
586 for (; index + blockSize <= sizeSecond; index += blockSize) {
587 hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
588 hash[1] = (hash[1] << hashShift) - hash[1] + dataSecond[index + 1]; // 1: the second element
589 hash[2] = (hash[2] << hashShift) - hash[2] + dataSecond[index + 2]; // 2: the third element
590 hash[3] = (hash[3] << hashShift) - hash[3] + dataSecond[index + 3]; // 3: the fourth element
591 }
592 }
593 for (; index < sizeSecond; ++index) {
594 hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
595 }
596 for (uint32_t i = 0; i < blockSize; ++i) {
597 totalHash = (totalHash << hashShift) - totalHash + hash[i];
598 }
599 return totalHash;
600 }
601
602 // static
CalculateConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)603 uint32_t EcmaString::CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
604 const JSHandle<EcmaString> &secondString)
605 {
606 bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
607 bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
608 EcmaString *firstStr = *firstString;
609 EcmaString *secondStr = *secondString;
610 CVector<uint8_t> bufFirstUint8;
611 CVector<uint8_t> bufSecondUint8;
612 CVector<uint16_t> bufFirstUint16;
613 CVector<uint16_t> bufSecondUint16;
614 if (isFirstStringUtf8 && isSecondStringUtf8) {
615 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
616 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
617 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
618 dataSecond, secondStr->GetLength());
619 }
620 if (!isFirstStringUtf8 && isSecondStringUtf8) {
621 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
622 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
623 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
624 dataSecond, secondStr->GetLength());
625 }
626 if (isFirstStringUtf8 && !isSecondStringUtf8) {
627 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
628 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
629 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
630 dataSecond, secondStr->GetLength());
631 }
632 {
633 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
634 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
635 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
636 dataSecond, secondStr->GetLength());
637 }
638 }
639
640 // static
CanBeCompressed(const EcmaString * string)641 bool EcmaString::CanBeCompressed(const EcmaString *string)
642 {
643 ASSERT(string->IsLineOrConstantString());
644 if (string->IsUtf8()) {
645 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
646 }
647 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
648 }
649
650 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)651 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
652 {
653 uint32_t index = 0;
654 for (; index + 4 <= utf8Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
655 // Check if all four characters in the current block are ASCII characters
656 if (!IsASCIICharacter(utf8Data[index]) ||
657 !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block
658 !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block
659 !IsASCIICharacter(utf8Data[index + 3])) { // 3: the fourth element of the block
660 return false;
661 }
662 }
663 // Check remaining characters if they are ASCII
664 for (; index < utf8Len; ++index) {
665 if (!IsASCIICharacter(utf8Data[index])) {
666 return false;
667 }
668 }
669 return true;
670 }
671
672 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)673 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
674 {
675 uint32_t index = 0;
676 for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
677 // Check if all four characters in the current block are ASCII characters
678 if (!IsASCIICharacter(utf16Data[index]) ||
679 !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
680 !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
681 !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
682 return false;
683 }
684 }
685 // Check remaining characters if they are ASCII
686 for (; index < utf16Len; ++index) {
687 if (!IsASCIICharacter(utf16Data[index])) {
688 return false;
689 }
690 }
691 return true;
692 }
693
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)694 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
695 {
696 ASSERT(NotTreeString());
697 ASSERT(str1->NotTreeString() && str2->NotTreeString());
698 if (GetLength() != str1->GetLength() + str2->GetLength()) {
699 return false;
700 }
701 if (IsUtf16()) {
702 CVector<uint16_t> buf;
703 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
704 if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
705 return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
706 }
707 } else {
708 CVector<uint8_t> buf;
709 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
710 if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
711 return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
712 str2->GetLength(), this->IsUtf8());
713 }
714 }
715 return false;
716 }
717
718 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)719 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
720 {
721 CVector<uint16_t> bufLeftUft16;
722 CVector<uint16_t> bufRightUft16;
723 CVector<uint8_t> bufLeftUft8;
724 CVector<uint8_t> bufRightUft8;
725 int32_t lhsCount = static_cast<int32_t>(left->GetLength());
726 int32_t rhsCount = static_cast<int32_t>(right->GetLength());
727 if (!left->IsUtf16() && !right->IsUtf16()) {
728 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
729 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
730 Span<const uint8_t> lhsSp(data1, lhsCount);
731 Span<const uint8_t> rhsSp(data2, rhsCount);
732 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
733 } else if (!left->IsUtf16()) {
734 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
735 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
736 Span<const uint8_t> lhsSp(data1, lhsCount);
737 Span<const uint16_t> rhsSp(data2, rhsCount);
738 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
739 } else if (!right->IsUtf16()) {
740 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
741 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
742 Span<const uint16_t> lhsSp(data1, lhsCount);
743 Span<const uint8_t> rhsSp(data2, rhsCount);
744 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
745 } else {
746 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
747 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
748 Span<const uint16_t> lhsSp(data1, lhsCount);
749 Span<const uint16_t> rhsSp(data2, rhsCount);
750 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
751 }
752 }
753
754 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)755 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
756 {
757 int32_t lhsCount = static_cast<int32_t>(left.GetLength());
758 int32_t rhsCount = static_cast<int32_t>(right.GetLength());
759 if (!left.IsUtf16() && !right.IsUtf16()) {
760 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
761 Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
762 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
763 } else if (!left.IsUtf16()) {
764 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
765 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
766 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
767 } else if (!right.IsUtf16()) {
768 Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
769 Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
770 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
771 } else {
772 Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
773 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
774 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
775 }
776 }
777
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)778 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
779 {
780 if (str1 == str2) {
781 return true;
782 }
783 if (str1->IsInternString() && str2->IsInternString()) {
784 return false;
785 }
786 uint32_t str1Len = str1->GetLength();
787 if (str1Len != str2->GetLength()) {
788 return false;
789 }
790 if (str1Len == 0) {
791 return true;
792 }
793
794 uint32_t str1Hash;
795 uint32_t str2Hash;
796 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
797 if (str1Hash != str2Hash) {
798 return false;
799 }
800 }
801 FlatStringInfo str1Flat = FlattenAllString(vm, str1);
802 JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
803 FlatStringInfo str2Flat = FlattenAllString(vm, str2);
804 str1Flat.SetString(*string);
805 return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
806 }
807
808 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)809 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
810 {
811 ASSERT(str1 != nullptr && str2 != nullptr);
812 if (str1 == str2) {
813 return true;
814 }
815 uint32_t str1Len = str1->GetLength();
816 if (str1Len != str2->GetLength()) {
817 return false;
818 }
819 if (str1Len == 0) {
820 return true;
821 }
822
823 uint32_t str1Hash;
824 uint32_t str2Hash;
825 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
826 if (str1Hash != str2Hash) {
827 return false;
828 }
829 }
830 return StringsAreEqualDiffUtfEncoding(str1, str2);
831 }
832
833 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)834 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
835 bool canBeCompressToUtf8)
836 {
837 if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
838 return false;
839 }
840 if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
841 return false;
842 }
843 if (str1->IsUtf8()) {
844 CVector<uint8_t> buf;
845 Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
846 Span<const uint8_t> data2(dataAddr, dataLen);
847 return EcmaString::StringsAreEquals(data1, data2);
848 }
849 CVector<uint16_t> buf;
850 uint32_t length = str1->GetLength();
851 const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
852 return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
853 }
854
855 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)856 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
857 {
858 uint32_t length = str1->GetLength();
859 if (length != utf16Len) {
860 return false;
861 }
862 if (str1->IsUtf8()) {
863 CVector<uint8_t> buf;
864 const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
865 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
866 } else {
867 CVector<uint16_t> buf;
868 Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
869 Span<const uint16_t> data2(utf16Data, utf16Len);
870 return EcmaString::StringsAreEquals(data1, data2);
871 }
872 }
873
874 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)875 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
876 {
877 ASSERT(dstMax >= count);
878 ASSERT(dst.Size() >= src.Size());
879 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
880 LOG_FULL(FATAL) << "memcpy_s failed";
881 UNREACHABLE();
882 }
883 return true;
884 }
885
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const886 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
887 {
888 ASSERT(length >= 0);
889 Span<const uint8_t> str = FastToUtf8Span();
890 return HashIntegerString(str.data(), length, hash, hashSeed);
891 }
892
ComputeHashcode() const893 uint32_t EcmaString::ComputeHashcode() const
894 {
895 auto [hash, isInteger] = ComputeRawHashcode();
896 return MixHashcode(hash, isInteger);
897 }
898
899 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const900 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
901 {
902 uint32_t hash = 0;
903 uint32_t length = GetLength();
904 if (length == 0) {
905 return {hash, false};
906 }
907
908 if (IsUtf8()) {
909 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
910 if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
911 return {hash, true};
912 }
913 CVector<uint8_t> buf;
914 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
915 // String can not convert to integer number, using normal hashcode computing algorithm.
916 hash = this->ComputeHashForData(data, length, 0);
917 return {hash, false};
918 } else {
919 CVector<uint16_t> buf;
920 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
921 // If rawSeed has certain value, and second string uses UTF16 encoding,
922 // then merged string can not be small integer number.
923 hash = this->ComputeHashForData(data, length, 0);
924 return {hash, false};
925 }
926 }
927
928 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const929 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
930 {
931 uint32_t hash;
932 uint32_t length = GetLength();
933 if (length == 0) {
934 return MixHashcode(rawHashSeed, isInteger);
935 }
936
937 if (IsUtf8()) {
938 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
939 if ((rawHashSeed == 0 || isInteger) &&
940 length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
941 return hash;
942 }
943 CVector<uint8_t> buf;
944 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
945 // String can not convert to integer number, using normal hashcode computing algorithm.
946 hash = this->ComputeHashForData(data, length, rawHashSeed);
947 return MixHashcode(hash, NOT_INTEGER);
948 } else {
949 CVector<uint16_t> buf;
950 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
951 // If rawSeed has certain value, and second string uses UTF16 encoding,
952 // then merged string can not be small integer number.
953 hash = this->ComputeHashForData(data, length, rawHashSeed);
954 return MixHashcode(hash, NOT_INTEGER);
955 }
956 }
957
958 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)959 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
960 {
961 uint32_t mixHash = 0;
962 if (canBeCompress) {
963 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
964 if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
965 return mixHash;
966 }
967 uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
968 return MixHashcode(hash, NOT_INTEGER);
969 } else {
970 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
971 CVector<uint16_t> tmpBuffer(utf16Len);
972 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
973 utf16Len);
974 ASSERT(len == utf16Len);
975 uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
976 return MixHashcode(hash, NOT_INTEGER);
977 }
978 LOG_ECMA(FATAL) << "this branch is unreachable";
979 UNREACHABLE();
980 }
981
982 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)983 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
984 {
985 uint32_t mixHash = 0;
986 // String length smaller than 10, try to compute integer hash.
987 if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
988 return mixHash;
989 }
990 uint32_t hash = ComputeHashForData(utf16Data, length, 0);
991 return MixHashcode(hash, NOT_INTEGER);
992 }
993
994 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t * utf8,size_t utf8Len)995 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
996 {
997 constexpr size_t TWO_BYTES_LENGTH = 2;
998 constexpr size_t THREE_BYTES_LENGTH = 3;
999 size_t trimSize = 0;
1000 if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
1001 // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
1002 trimSize = 1;
1003 }
1004 if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
1005 // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
1006 trimSize = TWO_BYTES_LENGTH;
1007 }
1008 if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
1009 // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
1010 trimSize = THREE_BYTES_LENGTH;
1011 }
1012 return utf8Len - trimSize;
1013 }
1014
1015
1016 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)1017 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
1018 const uint16_t *utf16Data, uint32_t utf16Len)
1019 {
1020 size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
1021 const uint8_t *utf8End = utf8Data + utf8Len;
1022 const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
1023 const uint16_t *utf16End = utf16Data + utf16Len;
1024 while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
1025 uint8_t src = *utf8Data;
1026 switch (src & 0xF0) {
1027 case 0xF0: {
1028 const uint8_t c2 = *(++utf8Data);
1029 const uint8_t c3 = *(++utf8Data);
1030 const uint8_t c4 = *(++utf8Data);
1031 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
1032 ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
1033 if (codePoint >= SURROGATE_RAIR_START) {
1034 if (utf16Data >= utf16End - 1) {
1035 return false;
1036 }
1037 codePoint -= SURROGATE_RAIR_START;
1038 if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
1039 return false;
1040 } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
1041 return false;
1042 }
1043 } else {
1044 if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
1045 return false;
1046 }
1047 }
1048 utf8Data++;
1049 break;
1050 }
1051 case 0xE0: {
1052 const uint8_t c2 = *(++utf8Data);
1053 const uint8_t c3 = *(++utf8Data);
1054 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
1055 ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
1056 return false;
1057 }
1058 utf8Data++;
1059 break;
1060 }
1061 case 0xD0:
1062 case 0xC0: {
1063 const uint8_t c2 = *(++utf8Data);
1064 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
1065 return false;
1066 }
1067 utf8Data++;
1068 break;
1069 }
1070 default:
1071 do {
1072 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1073 return false;
1074 }
1075 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
1076 break;
1077 }
1078 }
1079 // The remain chars should be treated as single byte char.
1080 while (utf8Data < utf8End && utf16Data < utf16End) {
1081 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1082 return false;
1083 }
1084 }
1085 return utf8Data == utf8End && utf16Data == utf16End;
1086 }
1087
ToElementIndex(uint32_t * index)1088 bool EcmaString::ToElementIndex(uint32_t *index)
1089 {
1090 uint32_t len = GetLength();
1091 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
1092 return false;
1093 }
1094 if (UNLIKELY(IsUtf16())) {
1095 return false;
1096 }
1097
1098 // fast path: get integer from string's hash value
1099 if (TryToGetInteger(index)) {
1100 return true;
1101 }
1102
1103 CVector<uint8_t> buf;
1104 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1105 uint32_t c = data[0];
1106 uint64_t n = 0;
1107 if (c == '0') {
1108 *index = 0;
1109 return len == 1;
1110 }
1111 uint32_t loopStart = 0;
1112 if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
1113 *index = n;
1114 return true;
1115 }
1116 return false;
1117 }
1118
ToInt(int32_t * index,bool * negative)1119 bool EcmaString::ToInt(int32_t *index, bool *negative)
1120 {
1121 uint32_t len = GetLength();
1122 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
1123 return false;
1124 }
1125 if (UNLIKELY(IsUtf16())) {
1126 return false;
1127 }
1128 CVector<uint8_t> buf;
1129 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1130 uint32_t c = data[0];
1131 uint32_t loopStart = 0;
1132 uint64_t n = 0;
1133 if (c == '0') {
1134 *index = 0;
1135 return len == 1;
1136 }
1137 if (c == '-' && len > 1) {
1138 *negative = true;
1139 loopStart = 1;
1140 }
1141
1142 if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
1143 *index = *negative ? -n : n;
1144 return true;
1145 }
1146 return false;
1147 }
1148
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)1149 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1150 {
1151 uint64_t n = 0;
1152 uint32_t len = GetLength();
1153 if (UNLIKELY(loopStart >= len)) {
1154 return false;
1155 }
1156 for (uint32_t i = loopStart; i < len; i++) {
1157 uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1158 if (c < '0' || c > '9') {
1159 return false;
1160 }
1161 // NOLINTNEXTLINE(readability-magic-numbers)
1162 n = n * 10 + (c - '0'); // 10: decimal factor
1163 }
1164 *index = n;
1165 return true;
1166 }
1167
ToTypedArrayIndex(uint32_t * index)1168 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1169 {
1170 uint32_t len = GetLength();
1171 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1172 return false;
1173 }
1174 if (UNLIKELY(IsUtf16())) {
1175 return false;
1176 }
1177
1178 CVector<uint8_t> buf;
1179 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1180 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1181 uint64_t n = 0;
1182 if (c == '0') {
1183 *index = 0;
1184 return len == 1;
1185 }
1186 if (c > '0' && c <= '9') {
1187 n = c - '0';
1188 for (uint32_t i = 1; i < len; i++) {
1189 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1190 if (c >= '0' && c <= '9') {
1191 // NOLINTNEXTLINE(readability-magic-numbers)
1192 n = n * 10 + (c - '0'); // 10: decimal factor
1193 } else if (c == '.') {
1194 n = JSObject::MAX_ELEMENT_INDEX;
1195 break;
1196 } else {
1197 return false;
1198 }
1199 }
1200 if (n < JSObject::MAX_ELEMENT_INDEX) {
1201 *index = n;
1202 return true;
1203 } else {
1204 *index = JSObject::MAX_ELEMENT_INDEX;
1205 return true;
1206 }
1207 } else if (c == '-') {
1208 *index = JSObject::MAX_ELEMENT_INDEX;
1209 return true;
1210 }
1211 return false;
1212 }
1213
1214 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)1215 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1216 {
1217 uint32_t srcLen = src->GetLength();
1218 int32_t start = 0;
1219 int32_t end = static_cast<int32_t>(srcLen) - 1;
1220
1221 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1222 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1223 }
1224 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1225 end = base::StringHelper::GetEnd(data, start, srcLen);
1226 }
1227 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1228 return res;
1229 }
1230
1231 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1232 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1233 {
1234 auto srcFlat = FlattenAllString(vm, src);
1235 uint32_t srcLength = srcFlat.GetLength();
1236 auto factory = vm->GetFactory();
1237 if (srcFlat.IsUtf16()) {
1238 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1239 std::string res = base::StringHelper::ToLower(u16str);
1240 return *(factory->NewFromStdString(res));
1241 } else {
1242 return ConvertUtf8ToLowerOrUpper(vm, src, true);
1243 }
1244 }
1245
1246 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1247 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1248 {
1249 auto srcFlat = FlattenAllString(vm, src);
1250 uint32_t srcLength = srcFlat.GetLength();
1251 const char start = 'A';
1252 const char end = 'Z';
1253 uint32_t upperIndex = srcLength;
1254 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1255 for (uint32_t index = 0; index < srcLength; ++index) {
1256 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1257 upperIndex = index;
1258 break;
1259 }
1260 }
1261 if (upperIndex == srcLength) {
1262 return *src;
1263 }
1264 return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1265 }
1266
1267 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1268 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1269 {
1270 auto srcFlat = FlattenAllString(vm, src);
1271 uint32_t srcLength = srcFlat.GetLength();
1272 const char start = 'a';
1273 const char end = 'z';
1274 uint32_t lowerIndex = srcLength;
1275 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1276 for (uint32_t index = 0; index < srcLength; ++index) {
1277 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1278 lowerIndex = index;
1279 break;
1280 }
1281 }
1282 if (lowerIndex == srcLength) {
1283 return *src;
1284 }
1285 return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1286 }
1287
1288 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)1289 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1290 bool toLower, uint32_t startIndex)
1291 {
1292 const char start = toLower ? 'A' : 'a';
1293 const char end = toLower ? 'Z' : 'z';
1294 uint32_t srcLength = src->GetLength();
1295 JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1296 auto srcFlat = FlattenAllString(vm, src);
1297 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1298 auto newStringPtr = newString->GetDataUtf8Writable();
1299 if (startIndex > 0) {
1300 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1301 LOG_FULL(FATAL) << "memcpy_s failed";
1302 UNREACHABLE();
1303 }
1304 }
1305 for (uint32_t index = startIndex; index < srcLength; ++index) {
1306 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1307 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
1308 } else {
1309 *(newStringPtr + index) = data[index];
1310 }
1311 }
1312 return *newString;
1313 }
1314
1315 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1316 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1317 {
1318 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1319 uint32_t srcLength = srcFlat.GetLength();
1320 auto factory = vm->GetFactory();
1321 if (srcFlat.IsUtf16()) {
1322 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1323 std::string res = base::StringHelper::ToUpper(u16str);
1324 return *(factory->NewFromStdString(res));
1325 } else {
1326 return ConvertUtf8ToLowerOrUpper(vm, src, false);
1327 }
1328 }
1329
1330 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1331 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1332 {
1333 auto factory = vm->GetFactory();
1334 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1335 std::u16string utf16 = srcFlat.ToU16String();
1336 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1337 return *(factory->NewFromStdString(res));
1338 }
1339
1340 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1341 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1342 {
1343 auto factory = vm->GetFactory();
1344 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1345 std::u16string utf16 = srcFlat.ToU16String();
1346 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1347 return *(factory->NewFromStdString(res));
1348 }
1349
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1350 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1351 {
1352 FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1353 uint32_t srcLen = srcFlat.GetLength();
1354 if (UNLIKELY(srcLen == 0)) {
1355 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1356 }
1357 if (srcFlat.IsUtf8()) {
1358 Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1359 return TrimBody(thread, src, data, mode);
1360 } else {
1361 Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1362 return TrimBody(thread, src, data, mode);
1363 }
1364 }
1365
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1366 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1367 {
1368 ASSERT(string->IsTreeString() || string->IsSlicedString());
1369 ASSERT(IsSMemSpace(type));
1370 auto thread = vm->GetJSThread();
1371 uint32_t length = string->GetLength();
1372 EcmaString *result = nullptr;
1373 if (string->IsUtf8()) {
1374 result = CreateLineStringWithSpaceType(vm, length, true, type);
1375 WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1376 } else {
1377 result = CreateLineStringWithSpaceType(vm, length, false, type);
1378 WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1379 }
1380 if (string->IsTreeString()) {
1381 JSHandle<TreeEcmaString> tree(string);
1382 ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1383 tree->SetFirst(thread, JSTaggedValue(result));
1384 tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1385 }
1386 return result;
1387 }
1388
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1389 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1390 {
1391 EcmaString *s = *string;
1392 if (!s->IsTreeString()) {
1393 return s;
1394 }
1395 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1396 if (!tree->IsFlat()) {
1397 return SlowFlatten(vm, string, type);
1398 }
1399 return EcmaString::Cast(tree->GetFirst());
1400 }
1401
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1402 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1403 {
1404 ASSERT(IsSMemSpace(type));
1405 EcmaString *s = *string;
1406 uint32_t startIndex = 0;
1407 if (s->IsLineOrConstantString()) {
1408 return FlatStringInfo(s, startIndex, s->GetLength());
1409 }
1410 if (string->IsTreeString()) {
1411 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1412 if (!tree->IsFlat()) {
1413 s = SlowFlatten(vm, string, type);
1414 } else {
1415 s = EcmaString::Cast(tree->GetFirst());
1416 }
1417 } else if (string->IsSlicedString()) {
1418 s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1419 startIndex = SlicedString::Cast(*string)->GetStartIndex();
1420 }
1421 return FlatStringInfo(s, startIndex, string->GetLength());
1422 }
1423
FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)1424 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1425 {
1426 DISALLOW_GARBAGE_COLLECTION;
1427 if (string->IsLineOrConstantString()) {
1428 return string;
1429 }
1430 if (string->IsTreeString()) {
1431 TreeEcmaString *tree = TreeEcmaString::Cast(string);
1432 if (tree->IsFlat()) {
1433 string = EcmaString::Cast(tree->GetFirst());
1434 } else {
1435 uint32_t length = tree->GetLength();
1436 EcmaString *result = nullptr;
1437 if (tree->IsUtf8()) {
1438 result = CreateLineStringNoGC(vm, length, true);
1439 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1440 } else {
1441 result = CreateLineStringNoGC(vm, length, false);
1442 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1443 }
1444 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1445 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1446 return result;
1447 }
1448 } else if (string->IsSlicedString()) {
1449 SlicedString *str = SlicedString::Cast(string);
1450 uint32_t length = str->GetLength();
1451 EcmaString *result = nullptr;
1452 if (str->IsUtf8()) {
1453 result = CreateLineStringNoGC(vm, length, true);
1454 WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1455 } else {
1456 result = CreateLineStringNoGC(vm, length, false);
1457 WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1458 }
1459 return result;
1460 }
1461 return string;
1462 }
1463
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1464 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1465 {
1466 ASSERT(src->IsUtf8());
1467 uint32_t length = src->GetLength();
1468 EcmaString *string = const_cast<EcmaString *>(src);
1469 if (string->IsTreeString()) {
1470 if (string->IsFlat()) {
1471 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1472 } else {
1473 buf.reserve(length);
1474 WriteToFlat(string, buf.data(), length);
1475 return buf.data();
1476 }
1477 } else if (string->IsSlicedString()) {
1478 SlicedString *str = SlicedString::Cast(string);
1479 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1480 }
1481 return string->GetDataUtf8();
1482 }
1483
GetNonTreeUtf8Data(const EcmaString * src)1484 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1485 {
1486 ASSERT(src->IsUtf8());
1487 ASSERT(!src->IsTreeString());
1488 EcmaString *string = const_cast<EcmaString *>(src);
1489 if (string->IsSlicedString()) {
1490 SlicedString *str = SlicedString::Cast(string);
1491 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1492 }
1493 ASSERT(src->IsLineOrConstantString());
1494 return string->GetDataUtf8();
1495 }
1496
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1497 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1498 {
1499 ASSERT(src->IsUtf16());
1500 uint32_t length = src->GetLength();
1501 EcmaString *string = const_cast<EcmaString *>(src);
1502 if (string->IsTreeString()) {
1503 if (string->IsFlat()) {
1504 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1505 } else {
1506 buf.reserve(length);
1507 WriteToFlat(string, buf.data(), length);
1508 return buf.data();
1509 }
1510 } else if (string->IsSlicedString()) {
1511 SlicedString *str = SlicedString::Cast(string);
1512 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1513 }
1514 return string->GetDataUtf16();
1515 }
1516
GetNonTreeUtf16Data(const EcmaString * src)1517 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1518 {
1519 ASSERT(src->IsUtf16());
1520 ASSERT(!src->IsTreeString());
1521 EcmaString *string = const_cast<EcmaString *>(src);
1522 if (string->IsSlicedString()) {
1523 SlicedString *str = SlicedString::Cast(string);
1524 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1525 }
1526 ASSERT(src->IsLineOrConstantString());
1527 return string->GetDataUtf16();
1528 }
1529
ToU16String(uint32_t len)1530 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1531 {
1532 uint32_t length = len > 0 ? len : GetLength();
1533 std::u16string result;
1534 if (IsUtf16()) {
1535 const uint16_t *data = this->GetDataUtf16();
1536 result = base::StringHelper::Utf16ToU16String(data, length);
1537 } else {
1538 const uint8_t *data = this->GetDataUtf8();
1539 result = base::StringHelper::Utf8ToU16String(data, length);
1540 }
1541 return result;
1542 }
1543
EcmaStringAccessor(TaggedObject * obj)1544 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1545 {
1546 ASSERT(obj != nullptr);
1547 string_ = EcmaString::Cast(obj);
1548 }
1549
EcmaStringAccessor(JSTaggedValue value)1550 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1551 {
1552 ASSERT(value.IsString());
1553 string_ = EcmaString::Cast(value.GetTaggedObject());
1554 }
1555
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1556 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1557 : string_(*strHandle)
1558 {
1559 }
1560
ToStdString(StringConvertedUsage usage)1561 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1562 {
1563 if (string_ == nullptr) {
1564 return "";
1565 }
1566 bool modify = (usage != StringConvertedUsage::PRINT);
1567 CVector<uint8_t> buf;
1568 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1569 #if ENABLE_NEXT_OPTIMIZATION
1570 return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1571 #else
1572 std::string res;
1573 res.reserve(sp.size());
1574 for (const auto &c : sp) {
1575 res.push_back(c);
1576 }
1577 return res;
1578 #endif
1579 }
1580
Utf8ConvertToString()1581 CString EcmaStringAccessor::Utf8ConvertToString()
1582 {
1583 if (string_ == nullptr) {
1584 return CString("");
1585 }
1586 if (IsUtf8()) {
1587 std::string stdStr;
1588 if (IsLineString()) {
1589 return base::StringHelper::Utf8ToCString(GetDataUtf8(), GetLength());
1590 }
1591 CVector<uint8_t> buf;
1592 const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1593 return base::StringHelper::Utf8ToCString(data, GetLength());
1594 } else {
1595 return ToCString();
1596 }
1597 }
1598
DebuggerToStdString(StringConvertedUsage usage)1599 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1600 {
1601 if (string_ == nullptr) {
1602 return "";
1603 }
1604
1605 bool modify = (usage != StringConvertedUsage::PRINT);
1606 CVector<uint8_t> buf;
1607 Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1608 #if ENABLE_NEXT_OPTIMIZATION
1609 return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1610 #else
1611 std::string res;
1612 res.reserve(sp.size());
1613 for (const auto &c : sp) {
1614 res.push_back(c);
1615 }
1616 return res;
1617 #endif
1618 }
1619
ToCString(StringConvertedUsage usage,bool cesu8)1620 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1621 {
1622 if (string_ == nullptr) {
1623 return "";
1624 }
1625 bool modify = (usage != StringConvertedUsage::PRINT);
1626 CVector<uint8_t> buf;
1627 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1628 #if ENABLE_NEXT_OPTIMIZATION
1629 return CString(reinterpret_cast<const char*>(sp.data()), sp.size());
1630 #else
1631 CString res;
1632 res.reserve(sp.size());
1633 for (const auto &c : sp) {
1634 res.push_back(c);
1635 }
1636 return res;
1637 #endif
1638 }
1639
AppendToCString(CString & str,StringConvertedUsage usage,bool cesu8)1640 void EcmaStringAccessor::AppendToCString(CString &str, StringConvertedUsage usage, bool cesu8)
1641 {
1642 if (string_ == nullptr) {
1643 return;
1644 }
1645 bool modify = (usage != StringConvertedUsage::PRINT);
1646 CVector<uint8_t> buf;
1647 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1648 str.append(reinterpret_cast<const char*>(sp.data()), sp.size());
1649 }
1650
AppendQuotedStringToCString(CString & str,StringConvertedUsage usage,bool cesu8)1651 void EcmaStringAccessor::AppendQuotedStringToCString(CString &str, StringConvertedUsage usage, bool cesu8)
1652 {
1653 if (string_ == nullptr) {
1654 return;
1655 }
1656 bool modify = (usage != StringConvertedUsage::PRINT);
1657 CVector<uint8_t> buf;
1658 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1659 base::JsonHelper::AppendValueToQuotedString(sp, str);
1660 }
1661
1662 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1663 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1664 {
1665 return EcmaString::CreateLineString(vm, length, compressed);
1666 }
1667 } // namespace panda::ecmascript
1668