1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_STRING_H 17 #define ECMASCRIPT_STRING_H 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstring> 22 23 #include "ecmascript/base/utf_helper.h" 24 #include "ecmascript/common.h" 25 #include "ecmascript/ecma_macros.h" 26 #include "ecmascript/js_hclass.h" 27 #include "ecmascript/js_tagged_value.h" 28 #include "ecmascript/mem/barriers.h" 29 #include "ecmascript/mem/space.h" 30 #include "ecmascript/mem/tagged_object.h" 31 #include "ecmascript/platform/ecma_string_hash_helper.h" 32 33 #include "libpandabase/macros.h" 34 #include "securec.h" 35 #include "unicode/locid.h" 36 37 namespace panda { 38 namespace test { 39 class EcmaStringEqualsTest; 40 } 41 namespace ecmascript { 42 template<typename T> 43 class JSHandle; 44 class JSPandaFile; 45 class EcmaVM; 46 class LineEcmaString; 47 class ConstantString; 48 class TreeEcmaString; 49 class SlicedString; 50 class FlatStringInfo; 51 52 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 53 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length) \ 54 if ((length) >= MAX_STRING_LENGTH) { \ 55 THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr); \ 56 } 57 58 class EcmaString : public TaggedObject { 59 /* Mix Hash Code: -- { 0 | [31 bits raw hash code] } computed through string 60 \ { 1 | [31 bits integer numbers] } fastpath for string to number 61 */ 62 public: 63 CAST_CHECK(EcmaString, IsString); 64 65 static constexpr uint32_t IS_INTEGER_MASK = 1U << 31; 66 static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; 67 static constexpr uint32_t STRING_INTERN_BIT = 0x2; 68 static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning 69 static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U; 70 static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF; 71 static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9; 72 73 static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize(); 74 // In last bit of mix_length we store if this string is compressed or not. 75 ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET) 76 // In last bit of mix_hash we store if this string is small-integer number or not. 77 ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE) 78 79 enum CompressedStatus { 80 STRING_COMPRESSED, 81 STRING_UNCOMPRESSED, 82 }; 83 84 enum IsIntegerStatus { 85 NOT_INTEGER = 0, 86 IS_INTEGER, 87 }; 88 89 enum TrimMode : uint8_t { 90 TRIM, 91 TRIM_START, 92 TRIM_END, 93 }; 94 95 enum ConcatOptStatus { 96 BEGIN_STRING_ADD = 1, 97 IN_STRING_ADD, 98 CONFIRMED_IN_STRING_ADD, 99 END_STRING_ADD, 100 INVALID_STRING_ADD, 101 HAS_BACKING_STORE, 102 }; 103 104 private: 105 friend class EcmaStringAccessor; 106 friend class LineEcmaString; 107 friend class ConstantString; 108 friend class TreeEcmaString; 109 friend class SlicedString; 110 friend class FlatStringInfo; 111 friend class NameDictionary; 112 friend class panda::test::EcmaStringEqualsTest; 113 114 static EcmaString *CreateEmptyString(const EcmaVM *vm); 115 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 116 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 117 uint32_t idOffset = 0); 118 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 119 uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 120 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 121 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 122 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 123 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 124 static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 125 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 126 static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed); 127 static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm, 128 size_t length, bool compressed, MemSpaceType type); 129 static EcmaString *CreateTreeString(const EcmaVM *vm, 130 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed); 131 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, 132 size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0); 133 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, 134 const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 135 template<typename T1, typename T2> 136 static uint32_t CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, 137 const T2 *dataSecond, size_t sizeSecond); 138 static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, 139 const JSHandle<EcmaString> &secondString); 140 static uint32_t CalculateConcatHashCode(const JSHandle<EcmaString> &firstString, 141 const JSHandle<EcmaString> &secondString); 142 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 143 uint32_t length, bool compressed); 144 static EcmaString *FastSubString(const EcmaVM *vm, 145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 146 static bool SubStringIsUtf8(const EcmaVM *vm, 147 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 148 static EcmaString *GetSlicedString(const EcmaVM *vm, 149 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 150 static EcmaString *GetSubString(const EcmaVM *vm, 151 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 152 // require src is LineString 153 // not change src data structure 154 static inline EcmaString *FastSubUtf8String(const EcmaVM *vm, 155 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 156 // require src is LineString 157 // not change src data structure 158 static inline EcmaString *FastSubUtf16String(const EcmaVM *vm, 159 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 160 inline void TrimLineString(const JSThread *thread, uint32_t newLength); IsUtf8()161 inline bool IsUtf8() const 162 { 163 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED; 164 } 165 IsUtf16()166 inline bool IsUtf16() const 167 { 168 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED; 169 } 170 IsInteger()171 inline bool IsInteger() 172 { 173 return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK; 174 } 175 176 // require is LineString 177 inline uint16_t *GetData() const; 178 inline const uint8_t *GetDataUtf8() const; 179 inline const uint16_t *GetDataUtf16() const; 180 181 // require is LineString 182 inline uint8_t *GetDataUtf8Writable(); 183 inline uint16_t *GetDataUtf16Writable(); 184 GetLength()185 inline uint32_t GetLength() const 186 { 187 return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT; 188 } 189 190 inline void SetLength(uint32_t length, bool compressed = false) 191 { 192 ASSERT(length < MAX_STRING_LENGTH); 193 // Use 0u for compressed/utf8 expression 194 SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED)); 195 } 196 GetRawHashcode()197 inline uint32_t GetRawHashcode() const 198 { 199 return GetMixHashcode() & (~IS_INTEGER_MASK); 200 } 201 MixHashcode(uint32_t hashcode,bool isInteger)202 static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger) 203 { 204 return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK)); 205 } 206 207 inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false) 208 { 209 // Use 0u for not integer string's expression 210 SetMixHashcode(MixHashcode(hashcode, isInteger)); 211 } 212 213 inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const; 214 SetIsInternString()215 inline void SetIsInternString() 216 { 217 SetMixLength(GetMixLength() | STRING_INTERN_BIT); 218 } 219 IsInternString()220 inline bool IsInternString() const 221 { 222 return (GetMixLength() & STRING_INTERN_BIT) != 0; 223 } 224 ClearInternStringFlag()225 inline void ClearInternStringFlag() 226 { 227 SetMixLength(GetMixLength() & ~STRING_INTERN_BIT); 228 } 229 TryGetHashCode(uint32_t * hash)230 inline bool TryGetHashCode(uint32_t *hash) 231 { 232 uint32_t hashcode = GetMixHashcode(); 233 if (hashcode == 0 && GetLength() != 0) { 234 return false; 235 } 236 *hash = hashcode; 237 return true; 238 } 239 GetIntegerCode()240 inline uint32_t GetIntegerCode() 241 { 242 ASSERT(GetMixHashcode() & IS_INTEGER_MASK); 243 return GetRawHashcode(); 244 } 245 246 // not change this data structure. 247 // if string is not flat, this func has low efficiency. GetHashcode()248 uint32_t PUBLIC_API GetHashcode() 249 { 250 uint32_t hashcode = GetMixHashcode(); 251 // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0. 252 if (hashcode == 0 && GetLength() != 0) { 253 hashcode = ComputeHashcode(); 254 SetMixHashcode(hashcode); 255 } 256 return hashcode; 257 } 258 259 template<typename T> IsDecimalDigitChar(const T c)260 inline static bool IsDecimalDigitChar(const T c) 261 { 262 return (c >= '0' && c <= '9'); 263 } 264 ComputeIntegerHash(uint32_t * num,uint8_t c)265 static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c) 266 { 267 if (!IsDecimalDigitChar(c)) { 268 return false; 269 } 270 int charDate = c - '0'; 271 *num = (*num) * 10 + charDate; // 10: decimal factor 272 return true; 273 } 274 275 bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const; 276 277 template<typename T> HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)278 static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed) 279 { 280 ASSERT(size >= 0); 281 if (hashSeed == 0) { 282 if (IsDecimalDigitChar(data[0]) && data[0] != '0') { 283 uint32_t num = data[0] - '0'; 284 uint32_t i = 1; 285 do { 286 if (i == size) { 287 // compute mix hash 288 if (num <= MAX_INTEGER_HASH_NUMBER) { 289 *hash = MixHashcode(num, IS_INTEGER); 290 return true; 291 } 292 return false; 293 } 294 } while (ComputeIntegerHash(&num, data[i++])); 295 } 296 if (size == 1 && (data[0] == '0')) { 297 *hash = MixHashcode(0, IS_INTEGER); 298 return true; 299 } 300 } else { 301 if (IsDecimalDigitChar(data[0])) { 302 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor 303 uint32_t i = 1; 304 do { 305 if (i == size) { 306 // compute mix hash 307 if (num <= MAX_INTEGER_HASH_NUMBER) { 308 *hash = MixHashcode(num, IS_INTEGER); 309 return true; 310 } 311 return false; 312 } 313 } while (ComputeIntegerHash(&num, data[i++])); 314 } 315 } 316 return false; 317 } 318 319 // not change this data structure. 320 // if string is not flat, this func has low efficiency. 321 uint32_t PUBLIC_API ComputeHashcode() const; 322 std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const; 323 uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const; 324 325 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); 326 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); 327 328 template<bool verify = true> 329 uint16_t At(int32_t index) const; 330 331 // require is LineString 332 void WriteData(uint32_t index, uint16_t src); 333 334 // can change left and right data structure 335 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right); 336 337 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 338 const JSHandle<EcmaString>& right, uint32_t offset); 339 340 // Check that two spans are equal. Should have the same length. 341 /* static */ 342 template<typename T, typename T1> StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)343 static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2) 344 { 345 ASSERT(str1.Size() <= str2.Size()); 346 size_t size = str1.Size(); 347 if constexpr (std::is_same_v<T, T1>) { 348 return !memcmp(str1.data(), str2.data(), size * sizeof(T)); 349 } else { 350 for (size_t i = 0; i < size; i++) { 351 auto left = static_cast<uint16_t>(str1[i]); 352 auto right = static_cast<uint16_t>(str2[i]); 353 if (left != right) { 354 return false; 355 } 356 } 357 return true; 358 } 359 } 360 361 // Converts utf8Data to utf16 and compare it with given utf16_data. 362 static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, 363 uint32_t utf16Len); 364 // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence. 365 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2); 366 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 367 static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, 368 const JSHandle<EcmaString> &str2); 369 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 370 static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2); 371 // Two strings have the same type of utf encoding format. 372 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2); 373 static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2); 374 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 375 // not change str1 data structure. 376 // if str1 is not flat, this func has low efficiency. 377 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 378 bool canBeCompress); 379 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 380 // not change str1 data structure. 381 // if str1 is not flat, this func has low efficiency. 382 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); 383 384 // can change receiver and search data structure 385 static int32_t IndexOf(const EcmaVM *vm, 386 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 387 388 // can change receiver and search data structure 389 static int32_t LastIndexOf(const EcmaVM *vm, 390 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 391 392 inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const 393 { 394 if (maxLength == 0) { 395 return 1; // maxLength was -1 at napi 396 } 397 size_t length = GetLength(); 398 if (length > maxLength) { 399 return 0; 400 } 401 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 402 buf[maxLength - 1] = '\0'; 403 // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8 404 return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1; // add place for zero in the end 405 } 406 407 // It allows user to copy into buffer even if maxLength < length 408 inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const 409 { 410 if (maxLength == 0) { 411 return 1; // maxLength was -1 at napi 412 } 413 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 414 buf[maxLength - 1] = '\0'; 415 return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1; 416 } 417 CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)418 size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const 419 { 420 if (IsUtf16()) { 421 CVector<uint16_t> tmpBuf; 422 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 423 if (length > bufLength) { 424 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) { 425 LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength"; 426 UNREACHABLE(); 427 } 428 return bufLength; 429 } 430 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 431 LOG_FULL(FATAL) << "memcpy_s failed"; 432 UNREACHABLE(); 433 } 434 return length; 435 } 436 CVector<uint8_t> tmpBuf; 437 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 438 if (length > bufLength) { 439 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength); 440 } 441 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength); 442 } 443 444 // It allows user to copy into buffer even if maxLength < length WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)445 inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const 446 { 447 if (bufLength == 0) { 448 return 0; 449 } 450 // Returns a number representing a valid backrest length. 451 return CopyDataToUtf16(buf, targetLength, bufLength); 452 } 453 WriteOneByte(uint8_t * buf,size_t maxLength)454 size_t WriteOneByte(uint8_t *buf, size_t maxLength) const 455 { 456 if (maxLength == 0) { 457 return 0; 458 } 459 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 460 buf[maxLength - 1] = '\0'; 461 uint32_t length = GetLength(); 462 if (!IsUtf16()) { 463 CVector<uint8_t> tmpBuf; 464 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 465 if (length > maxLength) { 466 length = maxLength; 467 } 468 if (memcpy_s(buf, maxLength, data, length) != EOK) { 469 LOG_FULL(FATAL) << "memcpy_s failed when write one byte"; 470 UNREACHABLE(); 471 } 472 return length; 473 } 474 475 CVector<uint16_t> tmpBuf; 476 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 477 if (length > maxLength) { 478 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength); 479 } 480 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); 481 } 482 483 size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength, 484 bool modify = true, bool isWriteBuffer = false) const 485 { 486 uint32_t len = GetLength(); 487 if (start + length > len) { 488 return 0; 489 } 490 if (!IsUtf16()) { 491 if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half 492 LOG_FULL(FATAL) << " length is higher than half of size_t::max"; 493 UNREACHABLE(); 494 } 495 CVector<uint8_t> tmpBuf; 496 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start; 497 // Only copy maxLength number of chars into buffer if length > maxLength 498 auto dataLen = std::min(length, maxLength); 499 std::copy(data, data + dataLen, buf); 500 return dataLen; 501 } 502 CVector<uint16_t> tmpBuf; 503 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 504 if (length > maxLength) { 505 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, 506 modify, isWriteBuffer); 507 } 508 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, 509 modify, isWriteBuffer); 510 } 511 CopyDataUtf16(uint16_t * buf,uint32_t maxLength)512 inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const 513 { 514 uint32_t length = GetLength(); 515 if (length > maxLength) { 516 return 0; 517 } 518 if (IsUtf16()) { 519 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 520 CVector<uint16_t> tmpBuf; 521 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 522 if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 523 LOG_FULL(FATAL) << "memcpy_s failed"; 524 UNREACHABLE(); 525 } 526 return length; 527 } 528 CVector<uint8_t> tmpBuf; 529 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 530 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength); 531 } 532 533 std::u16string ToU16String(uint32_t len = 0); 534 ToOneByteDataForced()535 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 536 { 537 uint8_t *buf = nullptr; 538 auto length = GetLength(); 539 if (IsUtf16()) { 540 auto size = length * sizeof(uint16_t); 541 buf = new uint8_t[size](); 542 CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length); 543 } else { 544 buf = new uint8_t[length + 1](); 545 CopyDataUtf8(buf, length + 1); 546 } 547 return std::unique_ptr<uint8_t[]>(buf); 548 } 549 550 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false) 551 { 552 Span<const uint8_t> str; 553 uint32_t strLen = GetLength(); 554 if (UNLIKELY(IsUtf16())) { 555 CVector<uint16_t> tmpBuf; 556 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 557 ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0); 558 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1; 559 buf.reserve(len); 560 len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8); 561 str = Span<const uint8_t>(buf.data(), len); 562 } else { 563 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 564 str = Span<const uint8_t>(data, strLen); 565 } 566 return str; 567 } 568 569 Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 570 { 571 Span<const uint8_t> str; 572 uint32_t strLen = GetLength(); 573 if (UNLIKELY(IsUtf16())) { 574 CVector<uint16_t> tmpBuf; 575 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 576 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 577 buf.reserve(len); 578 len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 579 str = Span<const uint8_t>(buf.data(), len); 580 } else { 581 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 582 str = Span<const uint8_t>(data, strLen); 583 } 584 return str; 585 } 586 587 inline Span<const uint8_t> FastToUtf8Span() const; 588 TryToGetInteger(uint32_t * result)589 bool TryToGetInteger(uint32_t *result) 590 { 591 if (!IsInteger()) { 592 return false; 593 } 594 ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE); 595 *result = GetIntegerCode(); 596 return true; 597 } 598 599 // using integer number set into hash TryToSetIntegerHash(int32_t num)600 inline bool TryToSetIntegerHash(int32_t num) 601 { 602 uint32_t hashcode = GetMixHashcode(); 603 if (hashcode == 0 && GetLength() != 0) { 604 SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER); 605 return true; 606 } 607 return false; 608 } 609 610 void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 611 612 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); 613 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); 614 static bool CanBeCompressed(const EcmaString *string); 615 616 bool PUBLIC_API ToElementIndex(uint32_t *index); 617 618 bool ToInt(int32_t *index, bool *negative); 619 620 bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data); 621 622 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index); 623 624 template<bool isLower> 625 static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src); 626 627 template<bool isLower> 628 static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 629 630 template<typename T> 631 static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode); 632 633 static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM); 634 635 // single char copy for loop 636 template<typename DstType, typename SrcType> CopyChars(DstType * dst,SrcType * src,uint32_t count)637 static void CopyChars(DstType *dst, SrcType *src, uint32_t count) 638 { 639 Span<SrcType> srcSp(src, count); 640 Span<DstType> dstSp(dst, count); 641 for (uint32_t i = 0; i < count; i++) { 642 dstSp[i] = srcSp[i]; 643 } 644 } 645 646 // memory block copy 647 template<typename T> 648 static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count); 649 650 // To change the hash algorithm of EcmaString, please modify EcmaString::CalculateConcatHashCode 651 // and EcmaStringHashHelper::ComputeHashForDataPlatform simultaneously!! 652 template <typename T> ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)653 static uint32_t ComputeHashForData(const T *data, size_t size, 654 uint32_t hashSeed) 655 { 656 if (size <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) { 657 uint32_t hash = hashSeed; 658 for (uint32_t i = 0; i < size ; i++) { 659 hash = (hash << static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT)) - hash + data[i]; 660 } 661 return hash; 662 } 663 return EcmaStringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed); 664 } 665 IsASCIICharacter(uint16_t data)666 static bool IsASCIICharacter(uint16_t data) 667 { 668 if (data == 0) { 669 return false; 670 } 671 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 672 return data <= base::utf_helper::UTF8_1B_MAX; 673 } 674 675 template<typename T1, typename T2> 676 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max); 677 678 template<typename T1, typename T2> 679 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos); 680 681 bool IsFlat() const; 682 IsLineString()683 bool IsLineString() const 684 { 685 return GetClass()->IsLineString(); 686 } IsConstantString()687 bool IsConstantString() const 688 { 689 return GetClass()->IsConstantString(); 690 } IsSlicedString()691 bool IsSlicedString() const 692 { 693 return GetClass()->IsSlicedString(); 694 } IsTreeString()695 bool IsTreeString() const 696 { 697 return GetClass()->IsTreeString(); 698 } NotTreeString()699 bool NotTreeString() const 700 { 701 return !IsTreeString(); 702 } IsLineOrConstantString()703 bool IsLineOrConstantString() const 704 { 705 auto hclass = GetClass(); 706 return hclass->IsLineString() || hclass->IsConstantString(); 707 } 708 GetStringType()709 JSType GetStringType() const 710 { 711 JSType type = GetClass()->GetObjectType(); 712 ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST); 713 return type; 714 } 715 716 template <typename Char> 717 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength); 718 719 template <typename Char> 720 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos); 721 722 static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf); 723 724 static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src); 725 726 static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf); 727 728 static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src); 729 730 // string must be not flat 731 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type); 732 733 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 734 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 735 736 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 737 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 738 739 static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string); 740 741 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 742 743 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 744 745 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 746 747 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 748 749 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 750 751 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 752 753 static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, 754 bool toLower, uint32_t startIndex = 0); 755 }; 756 757 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data 758 class LineEcmaString : public EcmaString { 759 public: 760 static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; 761 static constexpr uint32_t INIT_LENGTH_TIMES = 4; 762 // DATA_OFFSET: the string data stored after the string header. 763 // Data can be stored in utf8 or utf16 form according to compressed bit. 764 static constexpr size_t DATA_OFFSET = EcmaString::SIZE; // DATA_OFFSET equal to Empty String size 765 766 CAST_CHECK(LineEcmaString, IsLineString); 767 768 DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength()); 769 Cast(EcmaString * str)770 static LineEcmaString *Cast(EcmaString *str) 771 { 772 return static_cast<LineEcmaString *>(str); 773 } 774 Cast(const EcmaString * str)775 static LineEcmaString *Cast(const EcmaString *str) 776 { 777 return LineEcmaString::Cast(const_cast<EcmaString *>(str)); 778 } 779 ComputeSizeUtf8(uint32_t utf8Len)780 static size_t ComputeSizeUtf8(uint32_t utf8Len) 781 { 782 return DATA_OFFSET + utf8Len; 783 } 784 ComputeSizeUtf16(uint32_t utf16Len)785 static size_t ComputeSizeUtf16(uint32_t utf16Len) 786 { 787 return DATA_OFFSET + utf16Len * sizeof(uint16_t); 788 } 789 ObjectSize(EcmaString * str)790 static size_t ObjectSize(EcmaString *str) 791 { 792 uint32_t length = str->GetLength(); 793 return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length); 794 } 795 DataSize(EcmaString * str)796 static size_t DataSize(EcmaString *str) 797 { 798 uint32_t length = str->GetLength(); 799 return str->IsUtf16() ? length * sizeof(uint16_t) : length; 800 } 801 GetPointerLength()802 size_t GetPointerLength() 803 { 804 size_t byteSize = DataSize(this); 805 return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType); 806 } 807 GetData()808 uint16_t *GetData() const 809 { 810 return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET); 811 } 812 813 template<bool verify = true> Get(int32_t index)814 uint16_t Get(int32_t index) const 815 { 816 int32_t length = static_cast<int32_t>(GetLength()); 817 if (verify) { 818 if ((index < 0) || (index >= length)) { 819 return 0; 820 } 821 } 822 if (!IsUtf16()) { 823 Span<const uint8_t> sp(GetDataUtf8(), length); 824 return sp[index]; 825 } 826 Span<const uint16_t> sp(GetDataUtf16(), length); 827 return sp[index]; 828 } 829 Set(uint32_t index,uint16_t src)830 void Set(uint32_t index, uint16_t src) 831 { 832 ASSERT(index < GetLength()); 833 if (IsUtf8()) { 834 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 835 *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src); 836 } else { 837 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 838 *(GetData() + index) = src; 839 } 840 } 841 }; 842 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0); 843 844 class ConstantString : public EcmaString { 845 public: 846 static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE; 847 // ConstantData is the pointer of const string in the pandafile. 848 // String in pandafile is encoded by the utf8 format. 849 // EntityId is normally the uint32_t index in the pandafile. 850 // When the pandafile is to be removed, EntityId will become -1. 851 // The real string data will be reloacted into bytearray and stored in RelocatedData. 852 // ConstantData will also point at data of bytearray data. 853 ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 854 ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET); 855 ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET); 856 DEFINE_ALIGN_SIZE(LAST_OFFSET); 857 858 CAST_CHECK(ConstantString, IsConstantString); 859 DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 860 Cast(EcmaString * str)861 static ConstantString *Cast(EcmaString *str) 862 { 863 return static_cast<ConstantString *>(str); 864 } 865 Cast(const EcmaString * str)866 static ConstantString *Cast(const EcmaString *str) 867 { 868 return ConstantString::Cast(const_cast<EcmaString *>(str)); 869 } 870 ObjectSize()871 static size_t ObjectSize() 872 { 873 return ConstantString::SIZE; 874 } 875 GetEntityIdU32()876 uint32_t GetEntityIdU32() const 877 { 878 ASSERT(GetEntityId() >= 0); 879 return static_cast<uint32_t>(GetEntityId()); 880 } 881 882 template<bool verify = true> Get(int32_t index)883 uint16_t Get(int32_t index) const 884 { 885 int32_t length = static_cast<int32_t>(GetLength()); 886 if (verify) { 887 if ((index < 0) || (index >= length)) { 888 return 0; 889 } 890 } 891 ASSERT(IsUtf8()); 892 Span<const uint8_t> sp(GetConstantData(), length); 893 return sp[index]; 894 } 895 }; 896 897 // The substrings of another string use SlicedString to describe. 898 class SlicedString : public EcmaString { 899 public: 900 static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13; 901 static constexpr size_t PARENT_OFFSET = EcmaString::SIZE; 902 ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET); 903 ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG); 904 ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE); 905 906 DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET); 907 908 CAST_CHECK(SlicedString, IsSlicedString); 909 private: 910 friend class EcmaString; Cast(EcmaString * str)911 static SlicedString *Cast(EcmaString *str) 912 { 913 return static_cast<SlicedString *>(str); 914 } 915 Cast(const EcmaString * str)916 static SlicedString *Cast(const EcmaString *str) 917 { 918 return SlicedString::Cast(const_cast<EcmaString *>(str)); 919 } 920 ObjectSize()921 static size_t ObjectSize() 922 { 923 return SlicedString::SIZE; 924 } 925 926 // Minimum length for a sliced string 927 template<bool verify = true> Get(int32_t index)928 uint16_t Get(int32_t index) const 929 { 930 int32_t length = static_cast<int32_t>(GetLength()); 931 if (verify) { 932 if ((index < 0) || (index >= length)) { 933 return 0; 934 } 935 } 936 EcmaString *parent = EcmaString::Cast(GetParent()); 937 if (parent->IsLineString()) { 938 if (parent->IsUtf8()) { 939 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length); 940 return sp[index]; 941 } 942 Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length); 943 return sp[index]; 944 } 945 Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length); 946 return sp[index]; 947 } 948 }; 949 950 class TreeEcmaString : public EcmaString { 951 public: 952 // Minimum length for a tree string 953 static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13; 954 955 static constexpr size_t FIRST_OFFSET = EcmaString::SIZE; 956 ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET); 957 ACCESSORS(Second, SECOND_OFFSET, SIZE); 958 959 DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE); 960 961 CAST_CHECK(TreeEcmaString, IsTreeString); 962 Cast(EcmaString * str)963 static TreeEcmaString *Cast(EcmaString *str) 964 { 965 return static_cast<TreeEcmaString *>(str); 966 } 967 Cast(const EcmaString * str)968 static TreeEcmaString *Cast(const EcmaString *str) 969 { 970 return TreeEcmaString::Cast(const_cast<EcmaString *>(str)); 971 } 972 IsFlat()973 bool IsFlat() const 974 { 975 auto strSecond = EcmaString::Cast(GetSecond()); 976 return strSecond->GetLength() == 0; 977 } 978 979 template<bool verify = true> Get(int32_t index)980 uint16_t Get(int32_t index) const 981 { 982 int32_t length = static_cast<int32_t>(GetLength()); 983 if (verify) { 984 if ((index < 0) || (index >= length)) { 985 return 0; 986 } 987 } 988 989 if (IsFlat()) { 990 EcmaString *first = EcmaString::Cast(GetFirst()); 991 return first->At<verify>(index); 992 } 993 EcmaString *string = const_cast<TreeEcmaString *>(this); 994 while (true) { 995 if (string->IsTreeString()) { 996 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 997 if (static_cast<int32_t>(first->GetLength()) > index) { 998 string = first; 999 } else { 1000 index -= static_cast<int32_t>(first->GetLength()); 1001 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond()); 1002 } 1003 } else { 1004 return string->At<verify>(index); 1005 } 1006 } 1007 UNREACHABLE(); 1008 } 1009 }; 1010 1011 // FlatStringInfo holds an EcmaString* instead of a JSHandle. If a GC occurs during its usage period, 1012 // it may cause the pointer to become invalid, necessitating the pointer to be reset. 1013 class FlatStringInfo { 1014 public: FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1015 FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string), 1016 startIndex_(startIndex), 1017 length_(length) {} IsUtf8()1018 bool IsUtf8() const 1019 { 1020 return string_->IsUtf8(); 1021 } 1022 IsUtf16()1023 bool IsUtf16() const 1024 { 1025 return string_->IsUtf16(); 1026 } 1027 GetString()1028 EcmaString *GetString() const 1029 { 1030 return string_; 1031 } 1032 SetString(EcmaString * string)1033 void SetString(EcmaString *string) 1034 { 1035 string_ = string; 1036 } 1037 GetStartIndex()1038 uint32_t GetStartIndex() const 1039 { 1040 return startIndex_; 1041 } 1042 SetStartIndex(uint32_t index)1043 void SetStartIndex(uint32_t index) 1044 { 1045 startIndex_ = index; 1046 } 1047 GetLength()1048 uint32_t GetLength() const 1049 { 1050 return length_; 1051 } 1052 1053 const uint8_t *GetDataUtf8() const; 1054 const uint16_t *GetDataUtf16() const; 1055 uint8_t *GetDataUtf8Writable() const; 1056 uint16_t *GetDataUtf16Writable() const; 1057 std::u16string ToU16String(uint32_t len = 0); 1058 private: 1059 EcmaString *string_ {nullptr}; 1060 uint32_t startIndex_ {0}; 1061 uint32_t length_ {0}; 1062 }; 1063 1064 // if you want to use functions of EcmaString, please not use directly, 1065 // and use functions of EcmaStringAccessor alternatively. 1066 // eg: EcmaString *str = ***; str->GetLength() -----> EcmaStringAccessor(str).GetLength() 1067 class PUBLIC_API EcmaStringAccessor { 1068 public: EcmaStringAccessor(EcmaString * string)1069 explicit inline EcmaStringAccessor(EcmaString *string) 1070 { 1071 ASSERT(string != nullptr); 1072 string_ = string; 1073 } 1074 1075 explicit EcmaStringAccessor(TaggedObject *obj); 1076 1077 explicit EcmaStringAccessor(JSTaggedValue value); 1078 1079 explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle); 1080 CalculateAllConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)1081 static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, 1082 const JSHandle<EcmaString> &secondString) 1083 { 1084 return EcmaString::CalculateAllConcatHashCode(firstString, secondString); 1085 } 1086 1087 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 1088 CreateEmptyString(const EcmaVM * vm)1089 static EcmaString *CreateEmptyString(const EcmaVM *vm) 1090 { 1091 return EcmaString::CreateEmptyString(vm); 1092 } 1093 1094 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, 1095 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 1096 uint32_t idOffset = 0) 1097 { 1098 return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset); 1099 } 1100 1101 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1102 uint32_t offset, uint32_t utf8Len, 1103 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1104 { 1105 return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type); 1106 } 1107 1108 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, 1109 bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0) 1110 { 1111 return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset); 1112 } 1113 1114 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 1115 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1116 { 1117 return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type); 1118 } 1119 1120 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 1121 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1122 { 1123 return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type); 1124 } 1125 1126 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, 1127 const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1128 { 1129 return EcmaString::Concat(vm, str1Handle, str2Handle, type); 1130 } 1131 CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1132 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 1133 uint32_t length, bool compressed) 1134 { 1135 return EcmaString::CopyStringToOldSpace(vm, original, length, compressed); 1136 } 1137 1138 // can change src data structure FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1139 static EcmaString *FastSubString(const EcmaVM *vm, 1140 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1141 { 1142 return EcmaString::FastSubString(vm, src, start, length); 1143 } SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1144 static bool SubStringIsUtf8(const EcmaVM *vm, 1145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1146 { 1147 return EcmaString::SubStringIsUtf8(vm, src, start, length); 1148 } 1149 // get GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1150 static EcmaString *GetSubString(const EcmaVM *vm, 1151 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1152 { 1153 return EcmaString::GetSubString(vm, src, start, length); 1154 } 1155 IsUtf8()1156 bool IsUtf8() const 1157 { 1158 return string_->IsUtf8(); 1159 } 1160 IsUtf16()1161 bool IsUtf16() const 1162 { 1163 return string_->IsUtf16(); 1164 } 1165 GetLength()1166 uint32_t GetLength() const 1167 { 1168 return string_->GetLength(); 1169 } 1170 1171 // require is LineString 1172 inline size_t GetUtf8Length(bool isGetBufferSize = false) const; 1173 ObjectSize()1174 size_t ObjectSize() const 1175 { 1176 if (string_->IsLineString()) { 1177 return LineEcmaString::ObjectSize(string_); 1178 } if (string_->IsConstantString()) { 1179 return ConstantString::ObjectSize(); 1180 } else { 1181 return TreeEcmaString::SIZE; 1182 } 1183 } 1184 1185 // For TreeString, the calculation result is size of LineString correspondingly. GetFlatStringSize()1186 size_t GetFlatStringSize() const 1187 { 1188 if (string_->IsConstantString()) { 1189 return ConstantString::ObjectSize(); 1190 } 1191 return LineEcmaString::ObjectSize(string_); 1192 } 1193 IsInternString()1194 bool IsInternString() const 1195 { 1196 return string_->IsInternString(); 1197 } 1198 SetInternString()1199 void SetInternString() 1200 { 1201 string_->SetIsInternString(); 1202 } 1203 ClearInternString()1204 void ClearInternString() 1205 { 1206 string_->ClearInternStringFlag(); 1207 } 1208 1209 // require is LineString 1210 // It's Utf8 format, but without 0 in the end. 1211 inline const uint8_t *GetDataUtf8(); 1212 1213 // require is LineString 1214 inline const uint16_t *GetDataUtf16(); 1215 1216 // not change string data structure. 1217 // if string is not flat, this func has low efficiency. 1218 std::u16string ToU16String(uint32_t len = 0) 1219 { 1220 return string_->ToU16String(len); 1221 } 1222 1223 // not change string data structure. 1224 // if string is not flat, this func has low efficiency. ToOneByteDataForced()1225 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 1226 { 1227 return string_->ToOneByteDataForced(); 1228 } 1229 1230 // not change string data structure. 1231 // if string is not flat, this func has low efficiency. ToUtf8Span(CVector<uint8_t> & buf)1232 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf) 1233 { 1234 return string_->ToUtf8Span(buf); 1235 } 1236 1237 // only for string is flat and using UTF8 encoding 1238 inline Span<const uint8_t> FastToUtf8Span(); 1239 1240 // Using string's hash to figure out whether the string can be converted to integer TryToGetInteger(uint32_t * result)1241 inline bool TryToGetInteger(uint32_t *result) 1242 { 1243 return string_->TryToGetInteger(result); 1244 } 1245 TryToSetIntegerHash(int32_t num)1246 inline bool TryToSetIntegerHash(int32_t num) 1247 { 1248 return string_->TryToSetIntegerHash(num); 1249 } 1250 1251 // not change string data structure. 1252 // if string is not flat, this func has low efficiency. 1253 std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1254 1255 // this function convert for Utf8 1256 CString Utf8ConvertToString(); 1257 1258 std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1259 // not change string data structure. 1260 // if string is not flat, this func has low efficiency. 1261 CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false); 1262 1263 void AppendToCString(CString &str, StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, 1264 bool cesu8 = false); 1265 1266 void AppendQuotedStringToCString(CString &str, StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, 1267 bool cesu8 = false); 1268 1269 // not change string data structure. 1270 // if string is not flat, this func has low efficiency. 1271 uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false) 1272 { 1273 return string_->WriteUtf8(buf, maxLength, isWriteBuffer); 1274 } 1275 WriteToUtf16(uint16_t * buf,uint32_t bufLength)1276 uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength) 1277 { 1278 return string_->WriteUtf16(buf, GetLength(), bufLength); 1279 } 1280 WriteToOneByte(uint8_t * buf,uint32_t maxLength)1281 uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength) 1282 { 1283 return string_->WriteOneByte(buf, maxLength); 1284 } 1285 1286 // not change string data structure. 1287 // if string is not flat, this func has low efficiency. WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1288 uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const 1289 { 1290 return string_->CopyDataUtf16(buf, maxLength); 1291 } 1292 1293 template <typename Char> WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1294 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos) 1295 { 1296 src->WriteToFlatWithPos(src, buf, length, pos); 1297 } 1298 1299 template <typename Char> WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1300 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength) 1301 { 1302 src->WriteToFlat(src, buf, maxLength); 1303 } 1304 1305 // require dst is LineString 1306 // not change src data structure. 1307 // if src is not flat, this func has low efficiency. 1308 inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 1309 1310 // not change src data structure. 1311 // if src is not flat, this func has low efficiency. 1312 template<bool verify = true> Get(uint32_t index)1313 uint16_t Get(uint32_t index) const 1314 { 1315 return string_->At<verify>(index); 1316 } 1317 1318 // require string is LineString. Set(uint32_t index,uint16_t src)1319 void Set(uint32_t index, uint16_t src) 1320 { 1321 return string_->WriteData(index, src); 1322 } 1323 1324 // not change src data structure. 1325 // if src is not flat, this func has low efficiency. GetHashcode()1326 uint32_t GetHashcode() 1327 { 1328 return string_->GetHashcode(); 1329 } 1330 GetRawHashcode()1331 uint32_t GetRawHashcode() 1332 { 1333 return string_->GetRawHashcode(); 1334 } 1335 1336 // not change src data structure. 1337 // if src is not flat, this func has low efficiency. ComputeRawHashcode()1338 std::pair<uint32_t, bool> ComputeRawHashcode() 1339 { 1340 return string_->ComputeRawHashcode(); 1341 } 1342 ComputeHashcode()1343 uint32_t ComputeHashcode() 1344 { 1345 return string_->ComputeHashcode(); 1346 } 1347 ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1348 uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger) 1349 { 1350 return string_->ComputeHashcode(rawHashSeed, isInteger); 1351 } 1352 ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1353 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 1354 { 1355 return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); 1356 } 1357 ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1358 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 1359 { 1360 return EcmaString::ComputeHashcodeUtf16(utf16Data, length); 1361 } 1362 1363 // can change receiver and search data structure 1364 static int32_t IndexOf(const EcmaVM *vm, 1365 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1366 { 1367 return EcmaString::IndexOf(vm, receiver, search, pos); 1368 } 1369 1370 // can change receiver and search data structure 1371 static int32_t LastIndexOf(const EcmaVM *vm, 1372 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1373 { 1374 return EcmaString::LastIndexOf(vm, receiver, search, pos); 1375 } 1376 1377 // can change receiver and search data structure Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1378 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right) 1379 { 1380 return EcmaString::Compare(vm, left, right); 1381 } 1382 1383 1384 // can change receiver and search data structure 1385 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 1386 const JSHandle<EcmaString>& right, uint32_t offset = 0) 1387 { 1388 return EcmaString::IsSubStringAt(vm, left, right, offset); 1389 } 1390 1391 // can change str1 and str2 data structure StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1392 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 1393 { 1394 return EcmaString::StringsAreEqual(vm, str1, str2); 1395 } 1396 1397 // not change str1 and str2 data structure. 1398 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqual(EcmaString * str1,EcmaString * str2)1399 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2) 1400 { 1401 return EcmaString::StringsAreEqual(str1, str2); 1402 } 1403 1404 // not change str1 and str2 data structure. 1405 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1406 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2) 1407 { 1408 return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2); 1409 } 1410 1411 // not change str1 data structure. 1412 // if str1 is not flat, this func has low efficiency. StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1413 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 1414 bool canBeCompress) 1415 { 1416 return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress); 1417 } 1418 1419 // not change str1 data structure. 1420 // if str1 is not flat, this func has low efficiency. StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1421 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 1422 { 1423 return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len); 1424 } 1425 1426 // require str1 and str2 are LineString. 1427 // not change string data structure. 1428 // if string is not flat, this func has low efficiency. EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1429 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 1430 { 1431 return string_->EqualToSplicedString(str1, str2); 1432 } 1433 CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1434 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 1435 { 1436 return EcmaString::CanBeCompressed(utf8Data, utf8Len); 1437 } 1438 CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1439 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 1440 { 1441 return EcmaString::CanBeCompressed(utf16Data, utf16Len); 1442 } 1443 1444 // require string is LineString CanBeCompressed(const EcmaString * string)1445 static bool CanBeCompressed(const EcmaString *string) 1446 { 1447 return EcmaString::CanBeCompressed(string); 1448 } 1449 1450 // not change string data structure. 1451 // if string is not flat, this func has low efficiency. ToElementIndex(uint32_t * index)1452 bool ToElementIndex(uint32_t *index) 1453 { 1454 return string_->ToElementIndex(index); 1455 } 1456 1457 // not change string data structure. 1458 // if string is not flat, this func has low efficiency. ToInt(int32_t * index,bool * negative)1459 bool ToInt(int32_t *index, bool *negative) 1460 { 1461 return string_->ToInt(index, negative); 1462 } 1463 1464 // not change string data structure. 1465 // if string is not flat, this func has low efficiency. ToTypedArrayIndex(uint32_t * index)1466 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index) 1467 { 1468 return string_->ToTypedArrayIndex(index); 1469 } 1470 ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1471 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1472 { 1473 return EcmaString::ToLower(vm, src); 1474 } 1475 TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1476 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1477 { 1478 return EcmaString::TryToLower(vm, src); 1479 } 1480 TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1481 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1482 { 1483 return EcmaString::TryToUpper(vm, src); 1484 } 1485 ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1486 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1487 { 1488 return EcmaString::ToUpper(vm, src); 1489 } 1490 ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1491 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1492 { 1493 return EcmaString::ToLocaleLower(vm, src, locale); 1494 } 1495 ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1496 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1497 { 1498 return EcmaString::ToLocaleUpper(vm, src, locale); 1499 } 1500 1501 static EcmaString *Trim(const JSThread *thread, 1502 const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM) 1503 { 1504 return EcmaString::Trim(thread, src, mode); 1505 } 1506 IsASCIICharacter(uint16_t data)1507 static bool IsASCIICharacter(uint16_t data) 1508 { 1509 if (data == 0) { 1510 return false; 1511 } 1512 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 1513 return data <= base::utf_helper::UTF8_1B_MAX; 1514 } 1515 IsFlat()1516 bool IsFlat() const 1517 { 1518 return string_->IsFlat(); 1519 } 1520 IsLineString()1521 bool IsLineString() const 1522 { 1523 return string_->IsLineString(); 1524 } 1525 IsConstantString()1526 bool IsConstantString() const 1527 { 1528 return string_->IsConstantString(); 1529 } 1530 IsSlicedString()1531 bool IsSlicedString() const 1532 { 1533 return string_->IsSlicedString(); 1534 } 1535 IsLineOrConstantString()1536 bool IsLineOrConstantString() const 1537 { 1538 return string_->IsLineOrConstantString(); 1539 } 1540 IsInteger()1541 bool IsInteger() const 1542 { 1543 return string_->IsInteger(); 1544 } 1545 GetIntegerCode()1546 uint32_t GetIntegerCode() const 1547 { 1548 return string_->GetIntegerCode(); 1549 } 1550 GetStringType()1551 JSType GetStringType() const 1552 { 1553 return string_->GetStringType(); 1554 } 1555 IsTreeString()1556 bool IsTreeString() const 1557 { 1558 return string_->IsTreeString(); 1559 } 1560 NotTreeString()1561 bool NotTreeString() const 1562 { 1563 return string_->NotTreeString(); 1564 } 1565 1566 // the returned string may be a linestring, constantstring, or slicestring!! 1567 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1568 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1569 { 1570 return EcmaString::Flatten(vm, string, type); 1571 } 1572 1573 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1574 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1575 { 1576 return EcmaString::FlattenAllString(vm, string, type); 1577 } 1578 1579 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1580 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1581 { 1582 return EcmaString::SlowFlatten(vm, string, type); 1583 } 1584 FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)1585 static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string) 1586 { 1587 return EcmaString::FlattenNoGCForSnapshot(vm, string); 1588 } 1589 GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1590 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 1591 { 1592 return EcmaString::GetUtf8DataFlat(src, buf); 1593 } 1594 GetNonTreeUtf8Data(const EcmaString * src)1595 static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src) 1596 { 1597 return EcmaString::GetNonTreeUtf8Data(src); 1598 } 1599 GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1600 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 1601 { 1602 return EcmaString::GetUtf16DataFlat(src, buf); 1603 } 1604 GetNonTreeUtf16Data(const EcmaString * src)1605 static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src) 1606 { 1607 return EcmaString::GetNonTreeUtf16Data(src); 1608 } 1609 1610 static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str); 1611 1612 private: 1613 EcmaString *string_ {nullptr}; 1614 }; 1615 } // namespace ecmascript 1616 } // namespace panda 1617 #endif // ECMASCRIPT_STRING_H 1618