1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20
21 namespace panda::ecmascript {
22
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)23 EcmaString *EcmaString::Concat(const EcmaVM *vm,
24 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
25 {
26 // allocator may trig gc and move src, need to hold it
27 EcmaString *strLeft = *left;
28 EcmaString *strRight = *right;
29 uint32_t leftLength = strLeft->GetLength();
30 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
31 uint32_t rightLength = strRight->GetLength();
32 uint32_t newLength = leftLength + rightLength;
33 if (newLength == 0) {
34 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
35 }
36
37 if (leftLength == 0) {
38 if (type == MemSpaceType::OLD_SPACE) {
39 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*right));
40 if (objectRegion->InYoungSpace()) {
41 return CopyStringToOldSpace(vm, right, rightLength, compressed);
42 }
43 }
44 return strRight;
45 }
46 if (rightLength == 0) {
47 if (type == MemSpaceType::OLD_SPACE) {
48 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*left));
49 if (objectRegion->InYoungSpace()) {
50 return CopyStringToOldSpace(vm, left, leftLength, compressed);
51 }
52 }
53 return strLeft;
54 }
55
56 // if the result string is small, make a LineString
57 if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
58 ASSERT(strLeft->IsLineOrConstantString());
59 ASSERT(strRight->IsLineOrConstantString());
60 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
61 // retrieve strings after gc
62 strLeft = *left;
63 strRight = *right;
64 if (compressed) {
65 // copy left part
66 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
67 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
68 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
69 // copy right part
70 sp = sp.SubSpan(leftLength);
71 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
72 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
73 } else {
74 // copy left part
75 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
76 if (strLeft->IsUtf8()) {
77 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
78 } else {
79 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
80 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
81 }
82 // copy right part
83 sp = sp.SubSpan(leftLength);
84 if (strRight->IsUtf8()) {
85 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
86 } else {
87 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
88 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
89 }
90 }
91 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
92 return newString;
93 }
94 return CreateTreeString(vm, left, right, newLength, compressed);
95 }
96
97 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)98 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
99 uint32_t length, bool compressed)
100 {
101 EcmaString *strOrigin = *original;
102 ASSERT(strOrigin->IsLineOrConstantString());
103 EcmaString *newString = nullptr;
104 if (strOrigin->IsLineString()) {
105 newString = CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE);
106 } else if (strOrigin->IsConstantString()) {
107 return CreateConstantString(vm, strOrigin->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
108 }
109 strOrigin = *original;
110 if (compressed) {
111 // copy
112 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
113 Span<const uint8_t> srcSp(strOrigin->GetDataUtf8(), length);
114 EcmaString::MemCopyChars(sp, length, srcSp, length);
115 } else {
116 // copy left part
117 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
118 if (strOrigin->IsUtf8()) {
119 EcmaString::CopyChars(sp.data(), strOrigin->GetDataUtf8(), length);
120 } else {
121 Span<const uint16_t> srcSp(strOrigin->GetDataUtf16(), length);
122 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
123 }
124 }
125 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
126 return newString;
127 }
128
129 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)130 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
131 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
132 {
133 ASSERT((start + length) <= src->GetLength());
134 if (length == 0) {
135 return *vm->GetFactory()->GetEmptyString();
136 }
137 if (start == 0 && length == src->GetLength()) {
138 return *src;
139 }
140 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
141 if (srcFlat->IsUtf8()) {
142 return FastSubUtf8String(vm, srcFlat, start, length);
143 }
144 return FastSubUtf16String(vm, srcFlat, start, length);
145 }
146
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)147 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
148 {
149 ASSERT(IsLineString() && !IsConstantString());
150 if (IsUtf8()) {
151 ASSERT(src->IsUtf8());
152 CVector<uint8_t> buf;
153 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
154 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
155 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
156 LOG_FULL(FATAL) << "memcpy_s failed";
157 UNREACHABLE();
158 }
159 } else if (src->IsUtf8()) {
160 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
161 CVector<uint8_t> buf;
162 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
163 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
164 Span<const uint8_t> from(data, length);
165 for (uint32_t i = 0; i < length; i++) {
166 to[i] = from[i];
167 }
168 } else {
169 CVector<uint16_t> buf;
170 const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
171 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
172 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
173 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
174 LOG_FULL(FATAL) << "memcpy_s failed";
175 UNREACHABLE();
176 }
177 }
178 }
179
180 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)181 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
182 {
183 for (int32_t i = 0; i < count; ++i) {
184 auto left = static_cast<int32_t>(lhsSp[i]);
185 auto right = static_cast<int32_t>(rhsSp[i]);
186 if (left != right) {
187 return left - right;
188 }
189 }
190 return 0;
191 }
192
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)193 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
194 {
195 if (*left == *right) {
196 return 0;
197 }
198 auto leftFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, left));
199 auto rightFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, right));
200 EcmaString *lhs = *leftFlat;
201 EcmaString *rhs = *rightFlat;
202 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
203 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
204 int32_t countDiff = lhsCount - rhsCount;
205 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
206 if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
207 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
208 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
209 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
210 if (charDiff != 0) {
211 return charDiff;
212 }
213 } else if (!lhs->IsUtf16()) {
214 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
215 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
216 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
217 if (charDiff != 0) {
218 return charDiff;
219 }
220 } else if (!rhs->IsUtf16()) {
221 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
222 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
223 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
224 if (charDiff != 0) {
225 return charDiff;
226 }
227 } else {
228 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
229 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
230 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
231 if (charDiff != 0) {
232 return charDiff;
233 }
234 }
235 return countDiff;
236 }
237
238 /* static */
239 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)240 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
241 {
242 ASSERT(rhsSp.size() > 0);
243 auto first = static_cast<int32_t>(rhsSp[0]);
244 for (int32_t i = pos; i <= max; i++) {
245 if (static_cast<int32_t>(lhsSp[i]) != first) {
246 i++;
247 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
248 i++;
249 }
250 }
251 /* Found first character, now look at the rest of rhsSp */
252 if (i <= max) {
253 int j = i + 1;
254 int end = j + static_cast<int>(rhsSp.size()) - 1;
255
256 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
257 }
258 if (j == end) {
259 /* Found whole string. */
260 return i;
261 }
262 }
263 }
264 return -1;
265 }
266
267 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)268 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
269 {
270 int rhsSize = static_cast<int>(rhsSp.size());
271 ASSERT(rhsSize > 0);
272 auto first = rhsSp[0];
273 for (int32_t i = pos; i >= 0; i--) {
274 if (lhsSp[i] != first) {
275 continue;
276 }
277 /* Found first character, now look at the rest of rhsSp */
278 int j = 1;
279 while (j < rhsSize) {
280 if (rhsSp[j] != lhsSp[i + j]) {
281 break;
282 }
283 j++;
284 }
285 if (j == rhsSize) {
286 return i;
287 }
288 }
289 return -1;
290 }
291
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)292 int32_t EcmaString::IndexOf(const EcmaVM *vm,
293 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
294 {
295 EcmaString *lhs = *receiver;
296 EcmaString *rhs = *search;
297 if (lhs == nullptr || rhs == nullptr) {
298 return -1;
299 }
300 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
301 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
302
303 if (pos > lhsCount) {
304 return -1;
305 }
306
307 if (rhsCount == 0) {
308 return pos;
309 }
310
311 if (pos < 0) {
312 pos = 0;
313 }
314
315 int32_t max = lhsCount - rhsCount;
316 if (max < 0) {
317 return -1;
318 }
319
320 if (pos + rhsCount > lhsCount) {
321 return -1;
322 }
323
324 auto receiverFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, receiver));
325 auto searchFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, search));
326 lhs = *receiverFlat;
327 rhs = *searchFlat;
328
329 if (rhs->IsUtf8() && lhs->IsUtf8()) {
330 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
331 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
332 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
333 } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return)
334 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
335 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
336 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
337 } else if (rhs->IsUtf16()) {
338 return -1;
339 } else { // NOLINT(readability-else-after-return)
340 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
341 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
342 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
343 }
344 }
345
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)346 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
347 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
348 {
349 EcmaString *lhs = *receiver;
350 EcmaString *rhs = *search;
351 if (lhs == nullptr || rhs == nullptr) {
352 return -1;
353 }
354
355 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
356 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
357 if (lhsCount < rhsCount) {
358 return -1;
359 }
360
361 if (pos < 0) {
362 pos = 0;
363 }
364
365 if (pos > lhsCount) {
366 pos = lhsCount;
367 }
368
369 if (pos + rhsCount > lhsCount) {
370 pos = lhsCount - rhsCount;
371 }
372
373 if (rhsCount == 0) {
374 return pos;
375 }
376
377 auto receiverFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, receiver));
378 auto searchFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, search));
379 lhs = *receiverFlat;
380 rhs = *searchFlat;
381
382 if (rhs->IsUtf8() && lhs->IsUtf8()) {
383 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
384 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
385 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
386 } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return)
387 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
388 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
389 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
390 } else if (rhs->IsUtf16()) {
391 return -1;
392 } else { // NOLINT(readability-else-after-return)
393 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
394 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
395 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
396 }
397 }
398
ToU16String(uint32_t len)399 std::u16string EcmaString::ToU16String(uint32_t len)
400 {
401 uint32_t length = len > 0 ? len : GetLength();
402 std::u16string result;
403 if (IsUtf16()) {
404 CVector<uint16_t> buf;
405 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
406 result = base::StringHelper::Utf16ToU16String(data, length);
407 } else {
408 CVector<uint8_t> buf;
409 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
410 result = base::StringHelper::Utf8ToU16String(data, length);
411 }
412 return result;
413 }
414
415 // static
CanBeCompressed(const EcmaString * string)416 bool EcmaString::CanBeCompressed(const EcmaString *string)
417 {
418 ASSERT(string->IsLineOrConstantString());
419 if (string->IsUtf8()) {
420 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
421 }
422 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
423 }
424
425 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)426 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
427 {
428 bool isCompressed = true;
429 uint32_t index = 0;
430 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
431 while (index < utf8Len) {
432 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
433 if (!IsASCIICharacter(utf8Data[index])) {
434 isCompressed = false;
435 break;
436 }
437 ++index;
438 }
439 return isCompressed;
440 }
441
442 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)443 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
444 {
445 bool isCompressed = true;
446 Span<const uint16_t> data(utf16Data, utf16Len);
447 for (uint32_t i = 0; i < utf16Len; i++) {
448 if (!IsASCIICharacter(data[i])) {
449 isCompressed = false;
450 break;
451 }
452 }
453 return isCompressed;
454 }
455
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)456 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
457 {
458 ASSERT(IsLineOrConstantString());
459 ASSERT(str1->IsLineOrConstantString() && str2->IsLineOrConstantString());
460 if (GetLength() != str1->GetLength() + str2->GetLength()) {
461 return false;
462 }
463 if (IsUtf16()) {
464 if (str1->IsUtf8() && str2->IsUtf8()) {
465 return false;
466 }
467 if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
468 return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
469 }
470 } else {
471 if (str1->IsUtf16() || str2->IsUtf16()) {
472 return false;
473 }
474 Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
475 Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
476 if (EcmaString::StringsAreEquals(concatData, data1)) {
477 concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
478 Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
479 return EcmaString::StringsAreEquals(concatData, data2);
480 }
481 }
482 return false;
483 }
484
485 /* static */
StringsAreEqualSameUtfEncoding(EcmaString * str1,EcmaString * str2)486 bool EcmaString::StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2)
487 {
488 if (str1->IsUtf16()) {
489 CVector<uint16_t> buf1;
490 CVector<uint16_t> buf2;
491 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(str1, buf1);
492 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(str2, buf2);
493 Span<const uint16_t> sp1(data1, str1->GetLength());
494 Span<const uint16_t> sp2(data2, str2->GetLength());
495 return EcmaString::StringsAreEquals(sp1, sp2);
496 } else { // NOLINT(readability-else-after-return)
497 CVector<uint8_t> buf1;
498 CVector<uint8_t> buf2;
499 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(str1, buf1);
500 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(str2, buf2);
501 Span<const uint8_t> sp1(data1, str1->GetLength());
502 Span<const uint8_t> sp2(data2, str2->GetLength());
503 return EcmaString::StringsAreEquals(sp1, sp2);
504 }
505 }
506
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)507 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
508 {
509 if (str1 == str2) {
510 return true;
511 }
512 if (str1->IsUtf16() != str2->IsUtf16()) {
513 return false;
514 }
515 uint32_t str1Len = str1->GetLength();
516 if (str1Len != str2->GetLength()) {
517 return false;
518 }
519 if (str1Len == 0) {
520 return true;
521 }
522
523 uint32_t str1Hash;
524 uint32_t str2Hash;
525 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
526 if (str1Hash != str2Hash) {
527 return false;
528 }
529 }
530
531 auto str1Flat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, str1));
532 auto str2Flat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, str2));
533 return StringsAreEqualSameUtfEncoding(*str1Flat, *str2Flat);
534 }
535
536 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)537 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
538 {
539 if (str1 == str2) {
540 return true;
541 }
542 if (str1->IsUtf16() != str2->IsUtf16()) {
543 return false;
544 }
545 uint32_t str1Len = str1->GetLength();
546 if (str1Len != str2->GetLength()) {
547 return false;
548 }
549 if (str1Len == 0) {
550 return true;
551 }
552
553 uint32_t str1Hash;
554 uint32_t str2Hash;
555 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
556 if (str1Hash != str2Hash) {
557 return false;
558 }
559 }
560 return StringsAreEqualSameUtfEncoding(str1, str2);
561 }
562
563 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)564 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
565 bool canBeCompress)
566 {
567 if (canBeCompress != str1->IsUtf8()) {
568 return false;
569 }
570 if (canBeCompress && str1->GetLength() != utf8Len) {
571 return false;
572 }
573 if (canBeCompress) {
574 CVector<uint8_t> buf;
575 Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), utf8Len);
576 Span<const uint8_t> data2(utf8Data, utf8Len);
577 return EcmaString::StringsAreEquals(data1, data2);
578 }
579 CVector<uint16_t> buf;
580 uint32_t length = str1->GetLength();
581 const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
582 return IsUtf8EqualsUtf16(utf8Data, utf8Len, data, length);
583 }
584
585 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)586 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
587 {
588 uint32_t length = str1->GetLength();
589 if (length != utf16Len) {
590 return false;
591 }
592 if (str1->IsUtf8()) {
593 CVector<uint8_t> buf;
594 const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
595 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
596 } else {
597 CVector<uint16_t> buf;
598 Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
599 Span<const uint16_t> data2(utf16Data, utf16Len);
600 return EcmaString::StringsAreEquals(data1, data2);
601 }
602 }
603
604 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)605 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
606 {
607 ASSERT(dstMax >= count);
608 ASSERT(dst.Size() >= src.Size());
609 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
610 LOG_FULL(FATAL) << "memcpy_s failed";
611 UNREACHABLE();
612 }
613 return true;
614 }
615
ComputeHashcode(uint32_t hashSeed) const616 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
617 {
618 uint32_t hash;
619 uint32_t length = GetLength();
620 if (IsUtf8()) {
621 CVector<uint8_t> buf;
622 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
623 hash = ComputeHashForData(data, length, hashSeed);
624 } else {
625 CVector<uint16_t> buf;
626 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
627 hash = ComputeHashForData(data, length, hashSeed);
628 }
629 return hash;
630 }
631
632 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)633 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
634 {
635 uint32_t hash = 0;
636 if (canBeCompress) {
637 hash = ComputeHashForData(utf8Data, utf8Len, 0);
638 } else {
639 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
640 CVector<uint16_t> tmpBuffer(utf16Len);
641 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
642 utf16Len, 0);
643 ASSERT(len == utf16Len);
644 hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
645 }
646 return hash;
647 }
648
649 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)650 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
651 {
652 return ComputeHashForData(utf16Data, length, 0);
653 }
654
655 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)656 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
657 uint32_t utf16Len)
658 {
659 // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
660 uint32_t utf8ConvertLength = utf16Len + 1;
661 CVector<uint16_t> tmpBuffer(utf8ConvertLength);
662 auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
663 if (len != utf16Len) {
664 return false;
665 }
666
667 Span<const uint16_t> data1(tmpBuffer.data(), len);
668 Span<const uint16_t> data2(utf16Data, utf16Len);
669 return EcmaString::StringsAreEquals(data1, data2);
670 }
671
ToElementIndex(uint32_t * index)672 bool EcmaString::ToElementIndex(uint32_t *index)
673 {
674 uint32_t len = GetLength();
675 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
676 return false;
677 }
678 if (UNLIKELY(IsUtf16())) {
679 return false;
680 }
681
682 CVector<uint8_t> buf;
683 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
684 uint32_t c = data[0];
685 uint64_t n = 0;
686 if (c == '0') {
687 *index = 0;
688 return len == 1;
689 }
690 if (c > '0' && c <= '9') {
691 n = c - '0';
692 for (uint32_t i = 1; i < len; i++) {
693 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
694 if (c < '0' || c > '9') {
695 return false;
696 }
697 // NOLINTNEXTLINE(readability-magic-numbers)
698 n = n * 10 + (c - '0'); // 10: decimal factor
699 }
700 if (n < JSObject::MAX_ELEMENT_INDEX) {
701 *index = n;
702 return true;
703 }
704 }
705 return false;
706 }
707
ToTypedArrayIndex(uint32_t * index)708 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
709 {
710 uint32_t len = GetLength();
711 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
712 return false;
713 }
714 if (UNLIKELY(IsUtf16())) {
715 return false;
716 }
717
718 CVector<uint8_t> buf;
719 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
720 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
721 uint64_t n = 0;
722 if (c == '0') {
723 *index = 0;
724 return len == 1;
725 }
726 if (c > '0' && c <= '9') {
727 n = c - '0';
728 for (uint32_t i = 1; i < len; i++) {
729 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
730 if (c >= '0' && c <= '9') {
731 // NOLINTNEXTLINE(readability-magic-numbers)
732 n = n * 10 + (c - '0'); // 10: decimal factor
733 } else if (c == '.') {
734 n = JSObject::MAX_ELEMENT_INDEX;
735 break;
736 } else {
737 return false;
738 }
739 }
740 if (n < JSObject::MAX_ELEMENT_INDEX) {
741 *index = n;
742 return true;
743 } else {
744 *index = JSObject::MAX_ELEMENT_INDEX;
745 return true;
746 }
747 } else if (c == '-') {
748 *index = JSObject::MAX_ELEMENT_INDEX;
749 return true;
750 }
751 return false;
752 }
753
754 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)755 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
756 {
757 uint32_t srcLen = src->GetLength();
758 int32_t start = 0;
759 int32_t end = static_cast<int32_t>(srcLen) - 1;
760
761 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
762 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
763 }
764 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
765 end = base::StringHelper::GetEnd(data, start, srcLen);
766 }
767 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
768 return res;
769 }
770
771 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)772 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
773 {
774 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
775 uint32_t srcLength = srcFlat->GetLength();
776 auto factory = vm->GetFactory();
777 if (srcFlat->IsUtf16()) {
778 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength);
779 std::string res = base::StringHelper::ToLower(u16str);
780 return *(factory->NewFromStdString(res));
781 } else {
782 return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true);
783 }
784 }
785
786 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)787 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
788 {
789 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
790 uint32_t srcLength = srcFlat->GetLength();
791 const char start = 'A';
792 const char end = 'Z';
793 uint32_t upperIndex = srcLength;
794 Span<uint8_t> data(srcFlat->GetDataUtf8Writable(), srcLength);
795 for (uint32_t index = 0; index < srcLength; ++index) {
796 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
797 upperIndex = index;
798 break;
799 }
800 }
801 if (upperIndex == srcLength) {
802 return *src;
803 }
804 return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true, upperIndex);
805 }
806
807 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & srcFlat,bool toLower,uint32_t startIndex)808 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &srcFlat,
809 bool toLower, uint32_t startIndex)
810 {
811 const char start = toLower ? 'A' : 'a';
812 const char end = toLower ? 'Z' : 'z';
813 uint32_t srcLength = srcFlat->GetLength();
814 auto newString = CreateLineString(vm, srcLength, true);
815 Span<uint8_t> data(srcFlat->GetDataUtf8Writable(), srcLength);
816 auto newStringPtr = newString->GetDataUtf8Writable();
817 if (startIndex > 0) {
818 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
819 LOG_FULL(FATAL) << "memcpy_s failed";
820 UNREACHABLE();
821 }
822 }
823 for (uint32_t index = startIndex; index < srcLength; ++index) {
824 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
825 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
826 } else {
827 *(newStringPtr + index) = data[index];
828 }
829 }
830 return newString;
831 }
832
833 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)834 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
835 {
836 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
837 uint32_t srcLength = srcFlat->GetLength();
838 auto factory = vm->GetFactory();
839 if (srcFlat->IsUtf16()) {
840 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength);
841 std::string res = base::StringHelper::ToUpper(u16str);
842 return *(factory->NewFromStdString(res));
843 } else {
844 return ConvertUtf8ToLowerOrUpper(vm, srcFlat, false);
845 }
846 }
847
848 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)849 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
850 {
851 auto factory = vm->GetFactory();
852 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
853 std::u16string utf16 = srcFlat->ToU16String();
854 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
855 return *(factory->NewFromStdString(res));
856 }
857
858 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)859 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
860 {
861 auto factory = vm->GetFactory();
862 auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
863 std::u16string utf16 = srcFlat->ToU16String();
864 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
865 return *(factory->NewFromStdString(res));
866 }
867
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)868 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
869 {
870 auto srcFlat = JSHandle<EcmaString>(thread, Flatten(thread->GetEcmaVM(), src));
871 uint32_t srcLen = srcFlat->GetLength();
872 if (UNLIKELY(srcLen == 0)) {
873 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
874 }
875 if (srcFlat->IsUtf8()) {
876 Span<const uint8_t> data(srcFlat->GetDataUtf8(), srcLen);
877 return TrimBody(thread, srcFlat, data, mode);
878 } else {
879 Span<const uint16_t> data(srcFlat->GetDataUtf16(), srcLen);
880 return TrimBody(thread, srcFlat, data, mode);
881 }
882 }
883
SlowFlatten(const EcmaVM * vm,const JSHandle<TreeEcmaString> & string,MemSpaceType type)884 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<TreeEcmaString> &string, MemSpaceType type)
885 {
886 auto thread = vm->GetJSThread();
887 ASSERT(EcmaString::Cast(string->GetSecond())->GetLength() != 0);
888
889 uint32_t length = string->GetLength();
890 EcmaString *result = nullptr;
891 if (string->IsUtf8()) {
892 result = CreateLineStringWithSpaceType(vm, length, true, type);
893 WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
894 } else {
895 result = CreateLineStringWithSpaceType(vm, length, false, type);
896 WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
897 }
898 string->SetFirst(thread, JSTaggedValue(result));
899 string->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
900 return result;
901 }
902
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)903 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
904 {
905 EcmaString *s = *string;
906 if (s->IsLineOrConstantString()) {
907 return s;
908 }
909 if (s->IsTreeString()) {
910 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
911 if (!tree->IsFlat()) {
912 return SlowFlatten(vm, tree, type);
913 }
914 s = EcmaString::Cast(tree->GetFirst());
915 }
916 return s;
917 }
918
FlattenNoGC(const EcmaVM * vm,EcmaString * string)919 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
920 {
921 DISALLOW_GARBAGE_COLLECTION;
922 if (string->IsLineOrConstantString()) {
923 return string;
924 }
925 if (string->IsTreeString()) {
926 TreeEcmaString *tree = TreeEcmaString::Cast(string);
927 if (tree->IsFlat()) {
928 string = EcmaString::Cast(tree->GetFirst());
929 } else {
930 uint32_t length = tree->GetLength();
931 EcmaString *result = nullptr;
932 if (tree->IsUtf8()) {
933 result = CreateLineStringNoGC(vm, length, true);
934 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
935 } else {
936 result = CreateLineStringNoGC(vm, length, false);
937 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
938 }
939 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
940 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
941 return result;
942 }
943 }
944 return string;
945 }
946
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)947 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
948 {
949 ASSERT(src->IsUtf8());
950 uint32_t length = src->GetLength();
951 EcmaString *string = const_cast<EcmaString *>(src);
952 if (string->IsTreeString()) {
953 if (string->IsFlat()) {
954 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
955 } else {
956 buf.reserve(length);
957 WriteToFlat(string, buf.data(), length);
958 return buf.data();
959 }
960 }
961 return string->GetDataUtf8();
962 }
963
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)964 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
965 {
966 ASSERT(src->IsUtf16());
967 uint32_t length = src->GetLength();
968 EcmaString *string = const_cast<EcmaString *>(src);
969 if (string->IsTreeString()) {
970 if (string->IsFlat()) {
971 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
972 } else {
973 buf.reserve(length);
974 WriteToFlat(string, buf.data(), length);
975 return buf.data();
976 }
977 }
978 return string->GetDataUtf16();
979 }
980
EcmaStringAccessor(EcmaString * string)981 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
982 {
983 ASSERT(string != nullptr);
984 string_ = string;
985 }
986
EcmaStringAccessor(TaggedObject * obj)987 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
988 {
989 ASSERT(obj != nullptr);
990 string_ = EcmaString::Cast(obj);
991 }
992
EcmaStringAccessor(JSTaggedValue value)993 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
994 {
995 ASSERT(value.IsString());
996 string_ = EcmaString::Cast(value.GetTaggedObject());
997 }
998
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)999 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1000 : string_(*strHandle)
1001 {
1002 }
1003
ToStdString(StringConvertedUsage usage)1004 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1005 {
1006 if (string_ == nullptr) {
1007 return "";
1008 }
1009 bool modify = (usage != StringConvertedUsage::PRINT);
1010 CVector<uint8_t> buf;
1011 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1012 std::string res;
1013 res.reserve(sp.size());
1014 for (const auto &c : sp) {
1015 res.push_back(c);
1016 }
1017 return res;
1018 }
1019
ToCString(StringConvertedUsage usage)1020 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
1021 {
1022 if (string_ == nullptr) {
1023 return "";
1024 }
1025 bool modify = (usage != StringConvertedUsage::PRINT);
1026 CVector<uint8_t> buf;
1027 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1028 CString res;
1029 res.reserve(sp.size());
1030 for (const auto &c : sp) {
1031 res.push_back(c);
1032 }
1033 return res;
1034 }
1035 } // namespace panda::ecmascript
1036