1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/ecma_string_table.h"
19
20 namespace panda::ecmascript {
21
22 constexpr size_t LOW_3BITS = 0x7;
23 constexpr size_t LOW_4BITS = 0xF;
24 constexpr size_t LOW_5BITS = 0x1F;
25 constexpr size_t LOW_6BITS = 0x3F;
26 constexpr size_t L_SURROGATE_START = 0xDC00;
27 constexpr size_t H_SURROGATE_START = 0xD800;
28 constexpr size_t SURROGATE_RAIR_START = 0x10000;
29 constexpr size_t OFFSET_18POS = 18;
30 constexpr size_t OFFSET_12POS = 12;
31 constexpr size_t OFFSET_10POS = 10;
32 constexpr size_t OFFSET_6POS = 6;
33
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)34 EcmaString *EcmaString::Concat(const EcmaVM *vm,
35 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
36 {
37 ASSERT(IsSMemSpace(type));
38 // allocator may trig gc and move src, need to hold it
39 EcmaString *strLeft = *left;
40 EcmaString *strRight = *right;
41 uint32_t leftLength = strLeft->GetLength();
42 uint32_t rightLength = strRight->GetLength();
43 uint32_t newLength = leftLength + rightLength;
44 if (newLength == 0) {
45 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
46 }
47
48 if (leftLength == 0) {
49 return strRight;
50 }
51 if (rightLength == 0) {
52 return strLeft;
53 }
54 // if the result string is small, make a LineString
55 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56 if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57 ASSERT(strLeft->IsLineOrConstantString());
58 ASSERT(strRight->IsLineOrConstantString());
59 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60 // retrieve strings after gc
61 strLeft = *left;
62 strRight = *right;
63 if (compressed) {
64 // copy left part
65 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68 // copy right part
69 sp = sp.SubSpan(leftLength);
70 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72 } else {
73 // copy left part
74 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75 if (strLeft->IsUtf8()) {
76 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77 } else {
78 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80 }
81 // copy right part
82 sp = sp.SubSpan(leftLength);
83 if (strRight->IsUtf8()) {
84 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85 } else {
86 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88 }
89 }
90 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91 return newString;
92 }
93 return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98 uint32_t length, bool compressed)
99 {
100 if (original->IsConstantString()) {
101 return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102 }
103 JSHandle<EcmaString> newString(vm->GetJSThread(),
104 CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105 auto strOrigin = FlattenAllString(vm, original);
106 if (compressed) {
107 // copy
108 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109 Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110 EcmaString::MemCopyChars(sp, length, srcSp, length);
111 } else {
112 // copy left part
113 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114 if (strOrigin.IsUtf8()) {
115 EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116 } else {
117 Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119 }
120 }
121 ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122 return *newString;
123 }
124
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129 ASSERT((start + length) <= src->GetLength());
130 if (length == 0) {
131 return *vm->GetFactory()->GetEmptyString();
132 }
133 if (start == 0 && length == src->GetLength()) {
134 return *src;
135 }
136 if (src->IsUtf8()) {
137 return FastSubUtf8String(vm, src, start, length);
138 }
139 return FastSubUtf16String(vm, src, start, length);
140 }
141
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146 ASSERT((start + length) <= src->GetLength());
147 JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148 FlatStringInfo srcFlat = FlattenAllString(vm, src);
149 slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150 slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151 slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152 return *slicedString;
153 }
154
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159 ASSERT((start + length) <= src->GetLength());
160 if (length == 1) {
161 JSThread *thread = vm->GetJSThread();
162 uint16_t res = EcmaStringAccessor(src).Get<false>(start);
163 if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164 JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165 return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
166 }
167 }
168 if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
169 if (start == 0 && length == src->GetLength()) {
170 return *src;
171 }
172 if (src->IsUtf16()) {
173 FlatStringInfo srcFlat = FlattenAllString(vm, src);
174 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175 if (canBeCompressed) {
176 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177 srcFlat = FlattenAllString(vm, src);
178 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179 return *string;
180 }
181 }
182 return GetSlicedString(vm, src, start, length);
183 }
184 return FastSubString(vm, src, start, length);
185 }
186
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)187 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
188 {
189 ASSERT(IsLineString() && !IsConstantString());
190 if (IsUtf8()) {
191 ASSERT(src->IsUtf8());
192 CVector<uint8_t> buf;
193 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
194 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
195 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
196 LOG_FULL(FATAL) << "memcpy_s failed";
197 UNREACHABLE();
198 }
199 } else if (src->IsUtf8()) {
200 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
201 CVector<uint8_t> buf;
202 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
203 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
204 Span<const uint8_t> from(data, length);
205 for (uint32_t i = 0; i < length; i++) {
206 to[i] = from[i];
207 }
208 } else {
209 CVector<uint16_t> buf;
210 const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
211 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
212 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
213 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
214 LOG_FULL(FATAL) << "memcpy_s failed";
215 UNREACHABLE();
216 }
217 }
218 }
219
220 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)221 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
222 {
223 for (int32_t i = 0; i < count; ++i) {
224 auto left = static_cast<int32_t>(lhsSp[i]);
225 auto right = static_cast<int32_t>(rhsSp[i]);
226 if (left != right) {
227 return left - right;
228 }
229 }
230 return 0;
231 }
232
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)233 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
234 {
235 if (*left == *right) {
236 return 0;
237 }
238 FlatStringInfo lhs = FlattenAllString(vm, left);
239 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
240 FlatStringInfo rhs = FlattenAllString(vm, right);
241 lhs.SetString(*string);
242 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
243 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
244 int32_t countDiff = lhsCount - rhsCount;
245 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
246 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
247 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
248 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
249 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
250 if (charDiff != 0) {
251 return charDiff;
252 }
253 } else if (!lhs.IsUtf16()) {
254 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
255 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
256 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
257 if (charDiff != 0) {
258 return charDiff;
259 }
260 } else if (!rhs.IsUtf16()) {
261 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
262 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
263 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
264 if (charDiff != 0) {
265 return charDiff;
266 }
267 } else {
268 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
269 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
270 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
271 if (charDiff != 0) {
272 return charDiff;
273 }
274 }
275 return countDiff;
276 }
277
278 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,uint32_t offset)279 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
280 {
281 int rhsSize = static_cast<int>(rhsSp.size());
282 ASSERT(rhsSize + offset <= lhsSp.size());
283 for (int i = 0; i < rhsSize; ++i) {
284 auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
285 auto right = static_cast<int32_t>(rhsSp[i]);
286 if (left != right) {
287 return false;
288 }
289 }
290 return true;
291 }
292
293
294 /**
295 * left: text string
296 * right: pattern string
297 * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
298 * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
299 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)300 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
301 const JSHandle<EcmaString>& right, uint32_t offset)
302 {
303 FlatStringInfo lhs = FlattenAllString(vm, left);
304 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
305 FlatStringInfo rhs = FlattenAllString(vm, right);
306 lhs.SetString(*string);
307 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
308 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
309 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
310 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
311 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
312 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
313 } else if (!lhs.IsUtf16()) {
314 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
315 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
316 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
317 } else if (!rhs.IsUtf16()) {
318 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
319 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
320 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
321 } else {
322 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
323 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
324 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
325 }
326 return false;
327 }
328
329 /* static */
330 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)331 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
332 {
333 ASSERT(rhsSp.size() > 0);
334 auto first = static_cast<int32_t>(rhsSp[0]);
335 for (int32_t i = pos; i <= max; i++) {
336 if (static_cast<int32_t>(lhsSp[i]) != first) {
337 i++;
338 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
339 i++;
340 }
341 }
342 /* Found first character, now look at the rest of rhsSp */
343 if (i <= max) {
344 int j = i + 1;
345 int end = j + static_cast<int>(rhsSp.size()) - 1;
346
347 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
348 }
349 if (j == end) {
350 /* Found whole string. */
351 return i;
352 }
353 }
354 }
355 return -1;
356 }
357
358 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)359 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
360 {
361 int rhsSize = static_cast<int>(rhsSp.size());
362 ASSERT(rhsSize > 0);
363 auto first = rhsSp[0];
364 for (int32_t i = pos; i >= 0; i--) {
365 if (lhsSp[i] != first) {
366 continue;
367 }
368 /* Found first character, now look at the rest of rhsSp */
369 int j = 1;
370 while (j < rhsSize) {
371 if (rhsSp[j] != lhsSp[i + j]) {
372 break;
373 }
374 j++;
375 }
376 if (j == rhsSize) {
377 return i;
378 }
379 }
380 return -1;
381 }
382
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)383 int32_t EcmaString::IndexOf(const EcmaVM *vm,
384 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
385 {
386 EcmaString *lhstring = *receiver;
387 EcmaString *rhstring = *search;
388 if (lhstring == nullptr || rhstring == nullptr) {
389 return -1;
390 }
391 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
392 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
393
394 if (pos > lhsCount) {
395 return -1;
396 }
397
398 if (rhsCount == 0) {
399 return pos;
400 }
401
402 if (pos < 0) {
403 pos = 0;
404 }
405
406 int32_t max = lhsCount - rhsCount;
407 if (max < 0) {
408 return -1;
409 }
410
411 if (pos + rhsCount > lhsCount) {
412 return -1;
413 }
414
415 FlatStringInfo lhs = FlattenAllString(vm, receiver);
416 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
417 FlatStringInfo rhs = FlattenAllString(vm, search);
418 lhs.SetString(*string);
419
420 if (rhs.IsUtf8() && lhs.IsUtf8()) {
421 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
422 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
423 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
424 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
425 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
426 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
427 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
428 } else if (rhs.IsUtf16()) {
429 return -1;
430 } else { // NOLINT(readability-else-after-return)
431 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
432 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
433 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
434 }
435 }
436
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)437 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
438 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
439 {
440 EcmaString *lhstring = *receiver;
441 EcmaString *rhstring = *search;
442 if (lhstring == nullptr || rhstring == nullptr) {
443 return -1;
444 }
445
446 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
447 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
448 if (lhsCount < rhsCount) {
449 return -1;
450 }
451
452 if (pos < 0) {
453 pos = 0;
454 }
455
456 if (pos > lhsCount) {
457 pos = lhsCount;
458 }
459
460 if (pos + rhsCount > lhsCount) {
461 pos = lhsCount - rhsCount;
462 }
463
464 if (rhsCount == 0) {
465 return pos;
466 }
467
468 FlatStringInfo lhs = FlattenAllString(vm, receiver);
469 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
470 FlatStringInfo rhs = FlattenAllString(vm, search);
471 lhs.SetString(*string);
472 if (rhs.IsUtf8() && lhs.IsUtf8()) {
473 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
474 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
475 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
476 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
477 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
478 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
479 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
480 } else if (rhs.IsUtf16()) {
481 return -1;
482 } else { // NOLINT(readability-else-after-return)
483 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
484 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
485 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
486 }
487 }
488
ToU16String(uint32_t len)489 std::u16string EcmaString::ToU16String(uint32_t len)
490 {
491 uint32_t length = len > 0 ? len : GetLength();
492 std::u16string result;
493 if (IsUtf16()) {
494 CVector<uint16_t> buf;
495 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
496 result = base::StringHelper::Utf16ToU16String(data, length);
497 } else {
498 CVector<uint8_t> buf;
499 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
500 result = base::StringHelper::Utf8ToU16String(data, length);
501 }
502 return result;
503 }
504
505 // static
CanBeCompressed(const EcmaString * string)506 bool EcmaString::CanBeCompressed(const EcmaString *string)
507 {
508 ASSERT(string->IsLineOrConstantString());
509 if (string->IsUtf8()) {
510 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
511 }
512 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
513 }
514
515 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)516 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
517 {
518 bool isCompressed = true;
519 uint32_t index = 0;
520 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
521 while (index < utf8Len) {
522 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
523 if (!IsASCIICharacter(utf8Data[index])) {
524 isCompressed = false;
525 break;
526 }
527 ++index;
528 }
529 return isCompressed;
530 }
531
532 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)533 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
534 {
535 bool isCompressed = true;
536 Span<const uint16_t> data(utf16Data, utf16Len);
537 for (uint32_t i = 0; i < utf16Len; i++) {
538 if (!IsASCIICharacter(data[i])) {
539 isCompressed = false;
540 break;
541 }
542 }
543 return isCompressed;
544 }
545
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)546 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
547 {
548 ASSERT(NotTreeString());
549 ASSERT(str1->NotTreeString() && str2->NotTreeString());
550 if (GetLength() != str1->GetLength() + str2->GetLength()) {
551 return false;
552 }
553 if (IsUtf16()) {
554 CVector<uint16_t> buf;
555 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
556 if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
557 return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
558 }
559 } else {
560 CVector<uint8_t> buf;
561 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
562 if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
563 return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
564 str2->GetLength(), this->IsUtf8());
565 }
566 }
567 return false;
568 }
569
570 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)571 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
572 {
573 CVector<uint16_t> bufLeftUft16;
574 CVector<uint16_t> bufRightUft16;
575 CVector<uint8_t> bufLeftUft8;
576 CVector<uint8_t> bufRightUft8;
577 int32_t lhsCount = static_cast<int32_t>(left->GetLength());
578 int32_t rhsCount = static_cast<int32_t>(right->GetLength());
579 if (!left->IsUtf16() && !right->IsUtf16()) {
580 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
581 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
582 Span<const uint8_t> lhsSp(data1, lhsCount);
583 Span<const uint8_t> rhsSp(data2, rhsCount);
584 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
585 } else if (!left->IsUtf16()) {
586 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
587 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
588 Span<const uint8_t> lhsSp(data1, lhsCount);
589 Span<const uint16_t> rhsSp(data2, rhsCount);
590 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
591 } else if (!right->IsUtf16()) {
592 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
593 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
594 Span<const uint16_t> lhsSp(data1, lhsCount);
595 Span<const uint8_t> rhsSp(data2, rhsCount);
596 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
597 } else {
598 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
599 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
600 Span<const uint16_t> lhsSp(data1, lhsCount);
601 Span<const uint16_t> rhsSp(data2, rhsCount);
602 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
603 }
604 }
605
606 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)607 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
608 {
609 int32_t lhsCount = static_cast<int32_t>(left.GetLength());
610 int32_t rhsCount = static_cast<int32_t>(right.GetLength());
611 if (!left.IsUtf16() && !right.IsUtf16()) {
612 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
613 Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
614 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
615 } else if (!left.IsUtf16()) {
616 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
617 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
618 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
619 } else if (!right.IsUtf16()) {
620 Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
621 Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
622 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
623 } else {
624 Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
625 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
626 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
627 }
628 }
629
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)630 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
631 {
632 if (str1 == str2) {
633 return true;
634 }
635 if (str1->IsInternString() && str2->IsInternString()) {
636 return false;
637 }
638 uint32_t str1Len = str1->GetLength();
639 if (str1Len != str2->GetLength()) {
640 return false;
641 }
642 if (str1Len == 0) {
643 return true;
644 }
645
646 uint32_t str1Hash;
647 uint32_t str2Hash;
648 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
649 if (str1Hash != str2Hash) {
650 return false;
651 }
652 }
653 FlatStringInfo str1Flat = FlattenAllString(vm, str1);
654 JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
655 FlatStringInfo str2Flat = FlattenAllString(vm, str2);
656 str1Flat.SetString(*string);
657 return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
658 }
659
660 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)661 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
662 {
663 if (str1 == str2) {
664 return true;
665 }
666 uint32_t str1Len = str1->GetLength();
667 if (str1Len != str2->GetLength()) {
668 return false;
669 }
670 if (str1Len == 0) {
671 return true;
672 }
673
674 uint32_t str1Hash;
675 uint32_t str2Hash;
676 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
677 if (str1Hash != str2Hash) {
678 return false;
679 }
680 }
681 return StringsAreEqualDiffUtfEncoding(str1, str2);
682 }
683
684 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)685 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
686 bool canBeCompressToUtf8)
687 {
688 if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
689 return false;
690 }
691 if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
692 return false;
693 }
694 if (str1->IsUtf8()) {
695 CVector<uint8_t> buf;
696 Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
697 Span<const uint8_t> data2(dataAddr, dataLen);
698 return EcmaString::StringsAreEquals(data1, data2);
699 }
700 CVector<uint16_t> buf;
701 uint32_t length = str1->GetLength();
702 const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
703 return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
704 }
705
706 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)707 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
708 {
709 uint32_t length = str1->GetLength();
710 if (length != utf16Len) {
711 return false;
712 }
713 if (str1->IsUtf8()) {
714 CVector<uint8_t> buf;
715 const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
716 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
717 } else {
718 CVector<uint16_t> buf;
719 Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
720 Span<const uint16_t> data2(utf16Data, utf16Len);
721 return EcmaString::StringsAreEquals(data1, data2);
722 }
723 }
724
725 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)726 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
727 {
728 ASSERT(dstMax >= count);
729 ASSERT(dst.Size() >= src.Size());
730 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
731 LOG_FULL(FATAL) << "memcpy_s failed";
732 UNREACHABLE();
733 }
734 return true;
735 }
736
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const737 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
738 {
739 ASSERT(length >= 0);
740 Span<const uint8_t> str = FastToUtf8Span();
741 return HashIntegerString(str.data(), length, hash, hashSeed);
742 }
743
ComputeHashcode() const744 uint32_t EcmaString::ComputeHashcode() const
745 {
746 auto [hash, isInteger] = ComputeRawHashcode();
747 return MixHashcode(hash, isInteger);
748 }
749
750 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const751 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
752 {
753 uint32_t hash = 0;
754 uint32_t length = GetLength();
755 if (length == 0) {
756 return {hash, false};
757 }
758
759 if (IsUtf8()) {
760 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
761 if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
762 return {hash, true};
763 }
764 CVector<uint8_t> buf;
765 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
766 // String can not convert to integer number, using normal hashcode computing algorithm.
767 hash = this->ComputeHashForData(data, length, 0);
768 return {hash, false};
769 } else {
770 CVector<uint16_t> buf;
771 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
772 // If rawSeed has certain value, and second string uses UTF16 encoding,
773 // then merged string can not be small integer number.
774 hash = this->ComputeHashForData(data, length, 0);
775 return {hash, false};
776 }
777 }
778
779 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const780 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
781 {
782 uint32_t hash;
783 uint32_t length = GetLength();
784 if (length == 0) {
785 return MixHashcode(rawHashSeed, isInteger);
786 }
787
788 if (IsUtf8()) {
789 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
790 if ((rawHashSeed == 0 || isInteger) &&
791 length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
792 return hash;
793 }
794 CVector<uint8_t> buf;
795 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
796 // String can not convert to integer number, using normal hashcode computing algorithm.
797 hash = this->ComputeHashForData(data, length, rawHashSeed);
798 return MixHashcode(hash, NOT_INTEGER);
799 } else {
800 CVector<uint16_t> buf;
801 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
802 // If rawSeed has certain value, and second string uses UTF16 encoding,
803 // then merged string can not be small integer number.
804 hash = this->ComputeHashForData(data, length, rawHashSeed);
805 return MixHashcode(hash, NOT_INTEGER);
806 }
807 }
808
809 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)810 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
811 {
812 uint32_t mixHash = 0;
813 if (canBeCompress) {
814 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
815 if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
816 return mixHash;
817 }
818 uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
819 return MixHashcode(hash, NOT_INTEGER);
820 } else {
821 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
822 CVector<uint16_t> tmpBuffer(utf16Len);
823 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
824 utf16Len);
825 ASSERT(len == utf16Len);
826 uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
827 return MixHashcode(hash, NOT_INTEGER);
828 }
829 LOG_ECMA(FATAL) << "this branch is unreachable";
830 UNREACHABLE();
831 }
832
833 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)834 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
835 {
836 uint32_t mixHash = 0;
837 // String length smaller than 10, try to compute integer hash.
838 if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
839 return mixHash;
840 }
841 uint32_t hash = ComputeHashForData(utf16Data, length, 0);
842 return MixHashcode(hash, NOT_INTEGER);
843 }
844
845 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t * utf8,size_t utf8Len)846 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
847 {
848 constexpr size_t TWO_BYTES_LENGTH = 2;
849 constexpr size_t THREE_BYTES_LENGTH = 3;
850 size_t trimSize = 0;
851 if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
852 // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
853 trimSize = 1;
854 }
855 if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
856 // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
857 trimSize = TWO_BYTES_LENGTH;
858 }
859 if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
860 // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
861 trimSize = THREE_BYTES_LENGTH;
862 }
863 return utf8Len - trimSize;
864 }
865
866
867 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)868 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
869 const uint16_t *utf16Data, uint32_t utf16Len)
870 {
871 size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
872 const uint8_t *utf8End = utf8Data + utf8Len;
873 const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
874 const uint16_t *utf16End = utf16Data + utf16Len;
875 while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
876 uint8_t src = *utf8Data;
877 switch (src & 0xF0) {
878 case 0xF0: {
879 const uint8_t c2 = *(++utf8Data);
880 const uint8_t c3 = *(++utf8Data);
881 const uint8_t c4 = *(++utf8Data);
882 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
883 ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
884 if (codePoint >= SURROGATE_RAIR_START) {
885 if (utf16Data >= utf16End - 1) {
886 return false;
887 }
888 codePoint -= SURROGATE_RAIR_START;
889 if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
890 return false;
891 } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
892 return false;
893 }
894 } else {
895 if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
896 return false;
897 }
898 }
899 utf8Data++;
900 break;
901 }
902 case 0xE0: {
903 const uint8_t c2 = *(++utf8Data);
904 const uint8_t c3 = *(++utf8Data);
905 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
906 ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
907 return false;
908 }
909 utf8Data++;
910 break;
911 }
912 case 0xD0:
913 case 0xC0: {
914 const uint8_t c2 = *(++utf8Data);
915 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
916 return false;
917 }
918 utf8Data++;
919 break;
920 }
921 default:
922 do {
923 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
924 return false;
925 }
926 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
927 break;
928 }
929 }
930 // The remain chars should be treated as single byte char.
931 while (utf8Data < utf8End && utf16Data < utf16End) {
932 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
933 return false;
934 }
935 }
936 return utf8Data == utf8End && utf16Data == utf16End;
937 }
938
ToElementIndex(uint32_t * index)939 bool EcmaString::ToElementIndex(uint32_t *index)
940 {
941 uint32_t len = GetLength();
942 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
943 return false;
944 }
945 if (UNLIKELY(IsUtf16())) {
946 return false;
947 }
948
949 // fast path: get integer from string's hash value
950 if (TryToGetInteger(index)) {
951 return true;
952 }
953
954 CVector<uint8_t> buf;
955 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
956 uint32_t c = data[0];
957 uint64_t n = 0;
958 if (c == '0') {
959 *index = 0;
960 return len == 1;
961 }
962 uint32_t loopStart = 0;
963 if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
964 *index = n;
965 return true;
966 }
967 return false;
968 }
969
ToInt(int32_t * index,bool * negative)970 bool EcmaString::ToInt(int32_t *index, bool *negative)
971 {
972 uint32_t len = GetLength();
973 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
974 return false;
975 }
976 if (UNLIKELY(IsUtf16())) {
977 return false;
978 }
979 CVector<uint8_t> buf;
980 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
981 uint32_t c = data[0];
982 uint32_t loopStart = 0;
983 uint64_t n = 0;
984 if (c == '0') {
985 *index = 0;
986 return len == 1;
987 }
988 if (c == '-' && len > 1) {
989 *negative = true;
990 loopStart = 1;
991 }
992
993 if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
994 *index = *negative ? -n : n;
995 return true;
996 }
997 return false;
998 }
999
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)1000 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1001 {
1002 uint64_t n = 0;
1003 uint32_t len = GetLength();
1004 if (UNLIKELY(loopStart >= len)) {
1005 return false;
1006 }
1007 for (uint32_t i = loopStart; i < len; i++) {
1008 uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1009 if (c < '0' || c > '9') {
1010 return false;
1011 }
1012 // NOLINTNEXTLINE(readability-magic-numbers)
1013 n = n * 10 + (c - '0'); // 10: decimal factor
1014 }
1015 *index = n;
1016 return true;
1017 }
1018
ToTypedArrayIndex(uint32_t * index)1019 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1020 {
1021 uint32_t len = GetLength();
1022 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1023 return false;
1024 }
1025 if (UNLIKELY(IsUtf16())) {
1026 return false;
1027 }
1028
1029 CVector<uint8_t> buf;
1030 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1031 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1032 uint64_t n = 0;
1033 if (c == '0') {
1034 *index = 0;
1035 return len == 1;
1036 }
1037 if (c > '0' && c <= '9') {
1038 n = c - '0';
1039 for (uint32_t i = 1; i < len; i++) {
1040 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1041 if (c >= '0' && c <= '9') {
1042 // NOLINTNEXTLINE(readability-magic-numbers)
1043 n = n * 10 + (c - '0'); // 10: decimal factor
1044 } else if (c == '.') {
1045 n = JSObject::MAX_ELEMENT_INDEX;
1046 break;
1047 } else {
1048 return false;
1049 }
1050 }
1051 if (n < JSObject::MAX_ELEMENT_INDEX) {
1052 *index = n;
1053 return true;
1054 } else {
1055 *index = JSObject::MAX_ELEMENT_INDEX;
1056 return true;
1057 }
1058 } else if (c == '-') {
1059 *index = JSObject::MAX_ELEMENT_INDEX;
1060 return true;
1061 }
1062 return false;
1063 }
1064
1065 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)1066 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1067 {
1068 uint32_t srcLen = src->GetLength();
1069 int32_t start = 0;
1070 int32_t end = static_cast<int32_t>(srcLen) - 1;
1071
1072 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1073 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1074 }
1075 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1076 end = base::StringHelper::GetEnd(data, start, srcLen);
1077 }
1078 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1079 return res;
1080 }
1081
1082 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1083 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1084 {
1085 auto srcFlat = FlattenAllString(vm, src);
1086 uint32_t srcLength = srcFlat.GetLength();
1087 auto factory = vm->GetFactory();
1088 if (srcFlat.IsUtf16()) {
1089 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1090 std::string res = base::StringHelper::ToLower(u16str);
1091 return *(factory->NewFromStdString(res));
1092 } else {
1093 return ConvertUtf8ToLowerOrUpper(vm, src, true);
1094 }
1095 }
1096
1097 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1098 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1099 {
1100 auto srcFlat = FlattenAllString(vm, src);
1101 uint32_t srcLength = srcFlat.GetLength();
1102 const char start = 'A';
1103 const char end = 'Z';
1104 uint32_t upperIndex = srcLength;
1105 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1106 for (uint32_t index = 0; index < srcLength; ++index) {
1107 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1108 upperIndex = index;
1109 break;
1110 }
1111 }
1112 if (upperIndex == srcLength) {
1113 return *src;
1114 }
1115 return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1116 }
1117
1118 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1119 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1120 {
1121 auto srcFlat = FlattenAllString(vm, src);
1122 uint32_t srcLength = srcFlat.GetLength();
1123 const char start = 'a';
1124 const char end = 'z';
1125 uint32_t lowerIndex = srcLength;
1126 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1127 for (uint32_t index = 0; index < srcLength; ++index) {
1128 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1129 lowerIndex = index;
1130 break;
1131 }
1132 }
1133 if (lowerIndex == srcLength) {
1134 return *src;
1135 }
1136 return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1137 }
1138
1139 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)1140 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1141 bool toLower, uint32_t startIndex)
1142 {
1143 const char start = toLower ? 'A' : 'a';
1144 const char end = toLower ? 'Z' : 'z';
1145 uint32_t srcLength = src->GetLength();
1146 JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1147 auto srcFlat = FlattenAllString(vm, src);
1148 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1149 auto newStringPtr = newString->GetDataUtf8Writable();
1150 if (startIndex > 0) {
1151 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1152 LOG_FULL(FATAL) << "memcpy_s failed";
1153 UNREACHABLE();
1154 }
1155 }
1156 for (uint32_t index = startIndex; index < srcLength; ++index) {
1157 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1158 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
1159 } else {
1160 *(newStringPtr + index) = data[index];
1161 }
1162 }
1163 return *newString;
1164 }
1165
1166 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1167 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1168 {
1169 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1170 uint32_t srcLength = srcFlat.GetLength();
1171 auto factory = vm->GetFactory();
1172 if (srcFlat.IsUtf16()) {
1173 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1174 std::string res = base::StringHelper::ToUpper(u16str);
1175 return *(factory->NewFromStdString(res));
1176 } else {
1177 return ConvertUtf8ToLowerOrUpper(vm, src, false);
1178 }
1179 }
1180
1181 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1182 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1183 {
1184 auto factory = vm->GetFactory();
1185 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1186 std::u16string utf16 = srcFlat.ToU16String();
1187 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1188 return *(factory->NewFromStdString(res));
1189 }
1190
1191 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1192 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1193 {
1194 auto factory = vm->GetFactory();
1195 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1196 std::u16string utf16 = srcFlat.ToU16String();
1197 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1198 return *(factory->NewFromStdString(res));
1199 }
1200
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1201 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1202 {
1203 FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1204 uint32_t srcLen = srcFlat.GetLength();
1205 if (UNLIKELY(srcLen == 0)) {
1206 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1207 }
1208 if (srcFlat.IsUtf8()) {
1209 Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1210 return TrimBody(thread, src, data, mode);
1211 } else {
1212 Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1213 return TrimBody(thread, src, data, mode);
1214 }
1215 }
1216
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1217 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1218 {
1219 ASSERT(string->IsTreeString() || string->IsSlicedString());
1220 ASSERT(IsSMemSpace(type));
1221 auto thread = vm->GetJSThread();
1222 uint32_t length = string->GetLength();
1223 EcmaString *result = nullptr;
1224 if (string->IsUtf8()) {
1225 result = CreateLineStringWithSpaceType(vm, length, true, type);
1226 WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1227 } else {
1228 result = CreateLineStringWithSpaceType(vm, length, false, type);
1229 WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1230 }
1231 if (string->IsTreeString()) {
1232 JSHandle<TreeEcmaString> tree(string);
1233 ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1234 tree->SetFirst(thread, JSTaggedValue(result));
1235 tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1236 }
1237 return result;
1238 }
1239
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1240 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1241 {
1242 EcmaString *s = *string;
1243 if (!s->IsTreeString()) {
1244 return s;
1245 }
1246 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1247 if (!tree->IsFlat()) {
1248 return SlowFlatten(vm, string, type);
1249 }
1250 return EcmaString::Cast(tree->GetFirst());
1251 }
1252
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1253 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1254 {
1255 ASSERT(IsSMemSpace(type));
1256 EcmaString *s = *string;
1257 uint32_t startIndex = 0;
1258 if (s->IsLineOrConstantString()) {
1259 return FlatStringInfo(s, startIndex, s->GetLength());
1260 }
1261 if (string->IsTreeString()) {
1262 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1263 if (!tree->IsFlat()) {
1264 s = SlowFlatten(vm, string, type);
1265 } else {
1266 s = EcmaString::Cast(tree->GetFirst());
1267 }
1268 } else if (string->IsSlicedString()) {
1269 s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1270 startIndex = SlicedString::Cast(*string)->GetStartIndex();
1271 }
1272 return FlatStringInfo(s, startIndex, string->GetLength());
1273 }
1274
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1275 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1276 {
1277 DISALLOW_GARBAGE_COLLECTION;
1278 if (string->IsLineOrConstantString()) {
1279 return string;
1280 }
1281 if (string->IsTreeString()) {
1282 TreeEcmaString *tree = TreeEcmaString::Cast(string);
1283 if (tree->IsFlat()) {
1284 string = EcmaString::Cast(tree->GetFirst());
1285 } else {
1286 uint32_t length = tree->GetLength();
1287 EcmaString *result = nullptr;
1288 if (tree->IsUtf8()) {
1289 result = CreateLineStringNoGC(vm, length, true);
1290 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1291 } else {
1292 result = CreateLineStringNoGC(vm, length, false);
1293 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1294 }
1295 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1296 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1297 return result;
1298 }
1299 } else if (string->IsSlicedString()) {
1300 SlicedString *str = SlicedString::Cast(string);
1301 uint32_t length = str->GetLength();
1302 EcmaString *result = nullptr;
1303 if (str->IsUtf8()) {
1304 result = CreateLineStringNoGC(vm, length, true);
1305 WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1306 } else {
1307 result = CreateLineStringNoGC(vm, length, false);
1308 WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1309 }
1310 return result;
1311 }
1312 return string;
1313 }
1314
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1315 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1316 {
1317 ASSERT(src->IsUtf8());
1318 uint32_t length = src->GetLength();
1319 EcmaString *string = const_cast<EcmaString *>(src);
1320 if (string->IsTreeString()) {
1321 if (string->IsFlat()) {
1322 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1323 } else {
1324 buf.reserve(length);
1325 WriteToFlat(string, buf.data(), length);
1326 return buf.data();
1327 }
1328 } else if (string->IsSlicedString()) {
1329 SlicedString *str = SlicedString::Cast(string);
1330 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1331 }
1332 return string->GetDataUtf8();
1333 }
1334
GetNonTreeUtf8Data(const EcmaString * src)1335 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1336 {
1337 ASSERT(src->IsUtf8());
1338 ASSERT(!src->IsTreeString());
1339 EcmaString *string = const_cast<EcmaString *>(src);
1340 if (string->IsSlicedString()) {
1341 SlicedString *str = SlicedString::Cast(string);
1342 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1343 }
1344 ASSERT(src->IsLineOrConstantString());
1345 return string->GetDataUtf8();
1346 }
1347
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1348 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1349 {
1350 ASSERT(src->IsUtf16());
1351 uint32_t length = src->GetLength();
1352 EcmaString *string = const_cast<EcmaString *>(src);
1353 if (string->IsTreeString()) {
1354 if (string->IsFlat()) {
1355 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1356 } else {
1357 buf.reserve(length);
1358 WriteToFlat(string, buf.data(), length);
1359 return buf.data();
1360 }
1361 } else if (string->IsSlicedString()) {
1362 SlicedString *str = SlicedString::Cast(string);
1363 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1364 }
1365 return string->GetDataUtf16();
1366 }
1367
GetNonTreeUtf16Data(const EcmaString * src)1368 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1369 {
1370 ASSERT(src->IsUtf16());
1371 ASSERT(!src->IsTreeString());
1372 EcmaString *string = const_cast<EcmaString *>(src);
1373 if (string->IsSlicedString()) {
1374 SlicedString *str = SlicedString::Cast(string);
1375 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1376 }
1377 ASSERT(src->IsLineOrConstantString());
1378 return string->GetDataUtf16();
1379 }
1380
ToU16String(uint32_t len)1381 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1382 {
1383 uint32_t length = len > 0 ? len : GetLength();
1384 std::u16string result;
1385 if (IsUtf16()) {
1386 const uint16_t *data = this->GetDataUtf16();
1387 result = base::StringHelper::Utf16ToU16String(data, length);
1388 } else {
1389 const uint8_t *data = this->GetDataUtf8();
1390 result = base::StringHelper::Utf8ToU16String(data, length);
1391 }
1392 return result;
1393 }
1394
EcmaStringAccessor(TaggedObject * obj)1395 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1396 {
1397 ASSERT(obj != nullptr);
1398 string_ = EcmaString::Cast(obj);
1399 }
1400
EcmaStringAccessor(JSTaggedValue value)1401 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1402 {
1403 ASSERT(value.IsString());
1404 string_ = EcmaString::Cast(value.GetTaggedObject());
1405 }
1406
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1407 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1408 : string_(*strHandle)
1409 {
1410 }
1411
ToStdString(StringConvertedUsage usage)1412 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1413 {
1414 if (string_ == nullptr) {
1415 return "";
1416 }
1417 bool modify = (usage != StringConvertedUsage::PRINT);
1418 CVector<uint8_t> buf;
1419 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1420 std::string res;
1421 res.reserve(sp.size());
1422 for (const auto &c : sp) {
1423 res.push_back(c);
1424 }
1425 return res;
1426 }
1427
Utf8ConvertToString()1428 CString EcmaStringAccessor::Utf8ConvertToString()
1429 {
1430 if (string_ == nullptr) {
1431 return CString("");
1432 }
1433 if (IsUtf8()) {
1434 std::string stdStr;
1435 if (IsLineString()) {
1436 return base::StringHelper::Utf8ToString(GetDataUtf8(), GetLength()).c_str();
1437 }
1438 CVector<uint8_t> buf;
1439 const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1440 return base::StringHelper::Utf8ToString(data, GetLength()).c_str();
1441 } else {
1442 return ToCString();
1443 }
1444 }
1445
DebuggerToStdString(StringConvertedUsage usage)1446 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1447 {
1448 if (string_ == nullptr) {
1449 return "";
1450 }
1451
1452 bool modify = (usage != StringConvertedUsage::PRINT);
1453 CVector<uint8_t> buf;
1454 Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1455 std::string res;
1456 res.reserve(sp.size());
1457 for (const auto &c : sp) {
1458 res.push_back(c);
1459 }
1460 return res;
1461 }
1462
ToCString(StringConvertedUsage usage,bool cesu8)1463 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1464 {
1465 if (string_ == nullptr) {
1466 return "";
1467 }
1468 bool modify = (usage != StringConvertedUsage::PRINT);
1469 CVector<uint8_t> buf;
1470 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1471 CString res;
1472 res.reserve(sp.size());
1473 for (const auto &c : sp) {
1474 res.push_back(c);
1475 }
1476 return res;
1477 }
1478
1479 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1480 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1481 {
1482 return EcmaString::CreateLineString(vm, length, compressed);
1483 }
1484 } // namespace panda::ecmascript
1485