1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_STRING_H 17 #define ECMASCRIPT_STRING_H 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstring> 22 23 #include "ecmascript/base/utf_helper.h" 24 #include "ecmascript/common.h" 25 #include "ecmascript/ecma_macros.h" 26 #include "ecmascript/js_hclass.h" 27 #include "ecmascript/js_tagged_value.h" 28 #include "ecmascript/mem/barriers.h" 29 #include "ecmascript/mem/space.h" 30 #include "ecmascript/mem/tagged_object.h" 31 32 #include "libpandabase/macros.h" 33 #include "securec.h" 34 #include "unicode/locid.h" 35 36 namespace panda { 37 namespace ecmascript { 38 template<typename T> 39 class JSHandle; 40 class JSPandaFile; 41 class EcmaVM; 42 class LineEcmaString; 43 class ConstantString; 44 class TreeEcmaString; 45 class SlicedString; 46 class FlatStringInfo; 47 48 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 49 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length) \ 50 if ((length) >= MAX_STRING_LENGTH) { \ 51 THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr); \ 52 } 53 54 class EcmaString : public TaggedObject { 55 /* Mix Hash Code: -- { 0 | [31 bits raw hash code] } computed through string 56 \ { 1 | [31 bits integer numbers] } fastpath for string to number 57 */ 58 public: 59 CAST_CHECK(EcmaString, IsString); 60 61 static constexpr uint32_t IS_INTEGER_MASK = 1U << 31; 62 static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; 63 static constexpr uint32_t STRING_INTERN_BIT = 0x2; 64 static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning 65 static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U; 66 static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF; 67 static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9; 68 69 static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize(); 70 // In last bit of mix_length we store if this string is compressed or not. 71 ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET) 72 // In last bit of mix_hash we store if this string is small-integer number or not. 73 ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE) 74 75 enum CompressedStatus { 76 STRING_COMPRESSED, 77 STRING_UNCOMPRESSED, 78 }; 79 80 enum IsIntegerStatus { 81 NOT_INTEGER = 0, 82 IS_INTEGER, 83 }; 84 85 enum TrimMode : uint8_t { 86 TRIM, 87 TRIM_START, 88 TRIM_END, 89 }; 90 91 enum ConcatOptStatus { 92 BEGIN_STRING_ADD = 1, 93 IN_STRING_ADD, 94 CONFIRMED_IN_STRING_ADD, 95 END_STRING_ADD, 96 INVALID_STRING_ADD, 97 HAS_BACKING_STORE, 98 }; 99 100 private: 101 friend class EcmaStringAccessor; 102 friend class LineEcmaString; 103 friend class ConstantString; 104 friend class TreeEcmaString; 105 friend class SlicedString; 106 friend class FlatStringInfo; 107 friend class NameDictionary; 108 109 static constexpr int SMALL_STRING_SIZE = 128; 110 111 static EcmaString *CreateEmptyString(const EcmaVM *vm); 112 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 113 bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE, bool isConstantString = false, 114 uint32_t idOffset = 0); 115 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 116 MemSpaceType type = MemSpaceType::SEMI_SPACE); 117 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 118 bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE); 119 static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SEMI_SPACE); 120 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 121 static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed); 122 static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm, 123 size_t length, bool compressed, MemSpaceType type); 124 static EcmaString *CreateTreeString(const EcmaVM *vm, 125 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed); 126 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, 127 size_t length, bool compressed, MemSpaceType type = MemSpaceType::SEMI_SPACE, uint32_t idOffset = 0); 128 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, 129 const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SEMI_SPACE); 130 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 131 uint32_t length, bool compressed); 132 static EcmaString *FastSubString(const EcmaVM *vm, 133 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 134 static EcmaString *GetSlicedString(const EcmaVM *vm, 135 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 136 static EcmaString *GetSubString(const EcmaVM *vm, 137 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 138 // require src is LineString 139 // not change src data structure 140 static inline EcmaString *FastSubUtf8String(const EcmaVM *vm, 141 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 142 // require src is LineString 143 // not change src data structure 144 static inline EcmaString *FastSubUtf16String(const EcmaVM *vm, 145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 146 inline void TrimLineString(const JSThread *thread, uint32_t newLength); IsUtf8()147 inline bool IsUtf8() const 148 { 149 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED; 150 } 151 IsUtf16()152 inline bool IsUtf16() const 153 { 154 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED; 155 } 156 IsInteger()157 inline bool IsInteger() 158 { 159 return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK; 160 } 161 162 // require is LineString 163 inline uint16_t *GetData() const; 164 inline const uint8_t *GetDataUtf8() const; 165 inline const uint16_t *GetDataUtf16() const; 166 167 // require is LineString 168 inline uint8_t *GetDataUtf8Writable(); 169 inline uint16_t *GetDataUtf16Writable(); 170 GetLength()171 inline uint32_t GetLength() const 172 { 173 return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT; 174 } 175 176 inline void SetLength(uint32_t length, bool compressed = false) 177 { 178 ASSERT(length < MAX_STRING_LENGTH); 179 // Use 0u for compressed/utf8 expression 180 SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED)); 181 } 182 GetRawHashcode()183 inline uint32_t GetRawHashcode() const 184 { 185 return GetMixHashcode() & (~IS_INTEGER_MASK); 186 } 187 MixHashcode(uint32_t hashcode,bool isInteger)188 static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger) 189 { 190 return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK)); 191 } 192 193 inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false) 194 { 195 // Use 0u for not integer string's expression 196 SetMixHashcode(MixHashcode(hashcode, isInteger)); 197 } 198 199 inline size_t GetUtf8Length(bool modify = true) const; 200 SetIsInternString()201 inline void SetIsInternString() 202 { 203 SetMixLength(GetMixLength() | STRING_INTERN_BIT); 204 } 205 IsInternString()206 inline bool IsInternString() const 207 { 208 return (GetMixLength() & STRING_INTERN_BIT) != 0; 209 } 210 ClearInternStringFlag()211 inline void ClearInternStringFlag() 212 { 213 SetMixLength(GetMixLength() & ~STRING_INTERN_BIT); 214 } 215 TryGetHashCode(uint32_t * hash)216 inline bool TryGetHashCode(uint32_t *hash) 217 { 218 uint32_t hashcode = GetMixHashcode(); 219 if (hashcode == 0 && GetLength() != 0) { 220 return false; 221 } 222 *hash = hashcode; 223 return true; 224 } 225 GetIntegerCode()226 inline uint32_t GetIntegerCode() 227 { 228 ASSERT(GetMixHashcode() & IS_INTEGER_MASK); 229 return GetRawHashcode(); 230 } 231 232 // not change this data structure. 233 // if string is not flat, this func has low efficiency. GetHashcode()234 uint32_t PUBLIC_API GetHashcode() 235 { 236 uint32_t hashcode = GetMixHashcode(); 237 // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0. 238 if (hashcode == 0 && GetLength() != 0) { 239 hashcode = ComputeHashcode(); 240 SetMixHashcode(hashcode); 241 } 242 return hashcode; 243 } 244 245 template<typename T> IsDecimalDigitChar(const T c)246 inline static bool IsDecimalDigitChar(const T c) 247 { 248 return (c >= '0' && c <= '9'); 249 } 250 ComputeIntegerHash(uint32_t * num,uint8_t c)251 static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c) 252 { 253 if (!IsDecimalDigitChar(c)) { 254 return false; 255 } 256 int charDate = c - '0'; 257 *num = (*num) * 10 + charDate; // 10: decimal factor 258 return true; 259 } 260 261 bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const; 262 263 template<typename T> HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)264 static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed) 265 { 266 ASSERT(size >= 0); 267 if (hashSeed == 0) { 268 if (IsDecimalDigitChar(data[0]) && data[0] != '0') { 269 uint32_t num = data[0] - '0'; 270 uint32_t i = 1; 271 do { 272 if (i == size) { 273 // compute mix hash 274 if (num <= MAX_INTEGER_HASH_NUMBER) { 275 *hash = MixHashcode(num, IS_INTEGER); 276 return true; 277 } 278 return false; 279 } 280 } while (ComputeIntegerHash(&num, data[i++])); 281 } 282 if (size == 1 && (data[0] == '0')) { 283 *hash = MixHashcode(0, IS_INTEGER); 284 return true; 285 } 286 } else { 287 if (IsDecimalDigitChar(data[0])) { 288 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor 289 uint32_t i = 1; 290 do { 291 if (i == size) { 292 // compute mix hash 293 if (num <= MAX_INTEGER_HASH_NUMBER) { 294 *hash = MixHashcode(num, IS_INTEGER); 295 return true; 296 } 297 return false; 298 } 299 } while (ComputeIntegerHash(&num, data[i++])); 300 } 301 } 302 return false; 303 } 304 305 // not change this data structure. 306 // if string is not flat, this func has low efficiency. 307 uint32_t PUBLIC_API ComputeHashcode() const; 308 std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const; 309 uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const; 310 311 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); 312 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); 313 314 template<bool verify = true> 315 uint16_t At(int32_t index) const; 316 317 // require is LineString 318 void WriteData(uint32_t index, uint16_t src); 319 320 // can change left and right data structure 321 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right); 322 323 // Check that two spans are equal. Should have the same length. 324 /* static */ 325 template<typename T, typename T1> StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)326 static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2) 327 { 328 ASSERT(str1.Size() <= str2.Size()); 329 size_t size = str1.Size(); 330 if (!std::is_same_v<T, T1>) { 331 for (size_t i = 0; i < size; i++) { 332 auto left = static_cast<uint16_t>(str1[i]); 333 auto right = static_cast<uint16_t>(str2[i]); 334 if (left != right) { 335 return false; 336 } 337 } 338 return true; 339 } 340 if (size < SMALL_STRING_SIZE) { 341 for (size_t i = 0; i < size; i++) { 342 if (str1[i] != str2[i]) { 343 return false; 344 } 345 } 346 return true; 347 } 348 return memcmp(str1.data(), str2.data(), size * sizeof(T)) == 0; 349 } 350 351 // Converts utf8Data to utf16 and compare it with given utf16_data. 352 static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, 353 uint32_t utf16Len); 354 // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence. 355 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2); 356 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 357 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2); 358 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 359 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2); 360 // Two strings have the same type of utf encoding format. 361 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2); 362 static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2); 363 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 364 // not change str1 data structure. 365 // if str1 is not flat, this func has low efficiency. 366 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 367 bool canBeCompress); 368 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 369 // not change str1 data structure. 370 // if str1 is not flat, this func has low efficiency. 371 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); 372 373 // can change receiver and search data structure 374 static int32_t IndexOf(const EcmaVM *vm, 375 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 376 377 // can change receiver and search data structure 378 static int32_t LastIndexOf(const EcmaVM *vm, 379 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 380 381 inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const 382 { 383 if (maxLength == 0) { 384 return 1; // maxLength was -1 at napi 385 } 386 size_t length = GetLength(); 387 if (length > maxLength) { 388 return 0; 389 } 390 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 391 buf[maxLength - 1] = '\0'; 392 // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8 393 return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1; // add place for zero in the end 394 } 395 396 // It allows user to copy into buffer even if maxLength < length 397 inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const 398 { 399 if (maxLength == 0) { 400 return 1; // maxLength was -1 at napi 401 } 402 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 403 buf[maxLength - 1] = '\0'; 404 return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1; 405 } 406 CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)407 size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const 408 { 409 if (IsUtf16()) { 410 CVector<uint16_t> tmpBuf; 411 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 412 if (length > bufLength) { 413 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) { 414 LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength"; 415 UNREACHABLE(); 416 } 417 return bufLength; 418 } 419 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 420 LOG_FULL(FATAL) << "memcpy_s failed"; 421 UNREACHABLE(); 422 } 423 return length; 424 } 425 CVector<uint8_t> tmpBuf; 426 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 427 if (length > bufLength) { 428 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength, 0); 429 } 430 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength, 0); 431 } 432 433 // It allows user to copy into buffer even if maxLength < length WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)434 inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const 435 { 436 if (bufLength == 0) { 437 return 0; 438 } 439 // Returns a number representing a valid backrest length. 440 return CopyDataToUtf16(buf, targetLength, bufLength); 441 } 442 WriteOneByte(uint8_t * buf,size_t maxLength)443 size_t WriteOneByte(uint8_t *buf, size_t maxLength) const 444 { 445 if (maxLength == 0) { 446 return 0; 447 } 448 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 449 buf[maxLength - 1] = '\0'; 450 uint32_t length = GetLength(); 451 if (!IsUtf16()) { 452 CVector<uint8_t> tmpBuf; 453 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 454 if (length > maxLength) { 455 length = maxLength; 456 } 457 if (memcpy_s(buf, maxLength, data, length) != EOK) { 458 LOG_FULL(FATAL) << "memcpy_s failed when write one byte"; 459 UNREACHABLE(); 460 } 461 return length; 462 } 463 464 CVector<uint16_t> tmpBuf; 465 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 466 if (length > maxLength) { 467 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength); 468 } 469 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); 470 } 471 472 size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength, 473 bool modify = true, bool isWriteBuffer = false) const 474 { 475 uint32_t len = GetLength(); 476 if (start + length > len) { 477 return 0; 478 } 479 if (!IsUtf16()) { 480 if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half 481 LOG_FULL(FATAL) << " length is higher than half of size_t::max"; 482 UNREACHABLE(); 483 } 484 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 485 // Only memcpy_s maxLength number of chars into buffer if length > maxLength 486 CVector<uint8_t> tmpBuf; 487 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 488 if (length > maxLength) { 489 if (memcpy_s(buf, maxLength, data + start, maxLength) != EOK) { 490 LOG_FULL(FATAL) << "memcpy_s failed when length > maxlength"; 491 UNREACHABLE(); 492 } 493 return maxLength; 494 } 495 if (memcpy_s(buf, maxLength, data + start, length) != EOK) { 496 LOG_FULL(FATAL) << "memcpy_s failed when length <= maxlength"; 497 UNREACHABLE(); 498 } 499 return length; 500 } 501 CVector<uint16_t> tmpBuf; 502 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 503 if (length > maxLength) { 504 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, 505 modify, isWriteBuffer); 506 } 507 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, 508 modify, isWriteBuffer); 509 } 510 CopyDataUtf16(uint16_t * buf,uint32_t maxLength)511 inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const 512 { 513 return CopyDataRegionUtf16(buf, 0, GetLength(), maxLength); 514 } 515 CopyDataRegionUtf16(uint16_t * buf,uint32_t start,uint32_t length,uint32_t maxLength)516 uint32_t CopyDataRegionUtf16(uint16_t *buf, uint32_t start, uint32_t length, uint32_t maxLength) const 517 { 518 if (length > maxLength) { 519 return 0; 520 } 521 uint32_t len = GetLength(); 522 if (start + length > len) { 523 return 0; 524 } 525 if (IsUtf16()) { 526 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 527 CVector<uint16_t> tmpBuf; 528 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 529 if (memcpy_s(buf, maxLength * sizeof(uint16_t), data + start, length * sizeof(uint16_t)) != EOK) { 530 LOG_FULL(FATAL) << "memcpy_s failed"; 531 UNREACHABLE(); 532 } 533 return length; 534 } 535 CVector<uint8_t> tmpBuf; 536 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 537 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, len, maxLength, start); 538 } 539 540 std::u16string ToU16String(uint32_t len = 0); 541 ToOneByteDataForced()542 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 543 { 544 uint8_t *buf = nullptr; 545 auto length = GetLength(); 546 if (IsUtf16()) { 547 auto size = length * sizeof(uint16_t); 548 buf = new uint8_t[size](); 549 CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length); 550 } else { 551 buf = new uint8_t[length + 1](); 552 CopyDataUtf8(buf, length + 1); 553 } 554 return std::unique_ptr<uint8_t[]>(buf); 555 } 556 557 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 558 { 559 Span<const uint8_t> str; 560 uint32_t strLen = GetLength(); 561 if (UNLIKELY(IsUtf16())) { 562 CVector<uint16_t> tmpBuf; 563 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 564 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 565 buf.reserve(len); 566 len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 567 str = Span<const uint8_t>(buf.data(), len); 568 } else { 569 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 570 str = Span<const uint8_t>(data, strLen); 571 } 572 return str; 573 } 574 575 Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 576 { 577 Span<const uint8_t> str; 578 uint32_t strLen = GetLength(); 579 if (UNLIKELY(IsUtf16())) { 580 CVector<uint16_t> tmpBuf; 581 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 582 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 583 buf.reserve(len); 584 len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 585 str = Span<const uint8_t>(buf.data(), len); 586 } else { 587 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 588 str = Span<const uint8_t>(data, strLen); 589 } 590 return str; 591 } 592 593 inline Span<const uint8_t> FastToUtf8Span() const; 594 TryToGetInteger(uint32_t * result)595 bool TryToGetInteger(uint32_t *result) 596 { 597 if (!IsInteger()) { 598 return false; 599 } 600 ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE); 601 *result = GetIntegerCode(); 602 return true; 603 } 604 605 // using integer number set into hash TryToSetIntegerHash(int32_t num)606 inline bool TryToSetIntegerHash(int32_t num) 607 { 608 uint32_t hashcode = GetMixHashcode(); 609 if (hashcode == 0 && GetLength() != 0) { 610 SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER); 611 return true; 612 } 613 return false; 614 } 615 616 void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 617 618 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); 619 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); 620 static bool CanBeCompressed(const EcmaString *string); 621 622 bool ToElementIndex(uint32_t *index); 623 624 bool ToInt(int32_t *index, bool *negative); 625 626 bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data); 627 628 bool ToTypedArrayIndex(uint32_t *index); 629 630 template<bool isLower> 631 static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src); 632 633 template<bool isLower> 634 static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 635 636 template<typename T> 637 static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode); 638 639 static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM); 640 641 // single char copy for loop 642 template<typename DstType, typename SrcType> CopyChars(DstType * dst,SrcType * src,uint32_t count)643 static void CopyChars(DstType *dst, SrcType *src, uint32_t count) 644 { 645 Span<SrcType> srcSp(src, count); 646 Span<DstType> dstSp(dst, count); 647 for (uint32_t i = 0; i < count; i++) { 648 dstSp[i] = srcSp[i]; 649 } 650 } 651 652 // memory block copy 653 template<typename T> 654 static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count); 655 656 template<typename T> ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)657 static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed) 658 { 659 uint32_t hash = hashSeed; 660 Span<const T> sp(data, size); 661 for (auto c : sp) { 662 constexpr size_t SHIFT = 5; 663 hash = (hash << SHIFT) - hash + c; 664 } 665 return hash; 666 } 667 IsASCIICharacter(uint16_t data)668 static bool IsASCIICharacter(uint16_t data) 669 { 670 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 671 return data - 1U < base::utf_helper::UTF8_1B_MAX; 672 } 673 674 template<typename T1, typename T2> 675 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max); 676 677 template<typename T1, typename T2> 678 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos); 679 680 bool IsFlat() const; 681 IsLineString()682 bool IsLineString() const 683 { 684 return GetClass()->IsLineString(); 685 } IsConstantString()686 bool IsConstantString() const 687 { 688 return GetClass()->IsConstantString(); 689 } IsSlicedString()690 bool IsSlicedString() const 691 { 692 return GetClass()->IsSlicedString(); 693 } IsTreeString()694 bool IsTreeString() const 695 { 696 return GetClass()->IsTreeString(); 697 } NotTreeString()698 bool NotTreeString() const 699 { 700 return !IsTreeString(); 701 } IsLineOrConstantString()702 bool IsLineOrConstantString() const 703 { 704 auto hclass = GetClass(); 705 return hclass->IsLineString() || hclass->IsConstantString(); 706 } 707 GetStringType()708 JSType GetStringType() const 709 { 710 JSType type = GetClass()->GetObjectType(); 711 ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST); 712 return type; 713 } 714 715 template <typename Char> 716 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength); 717 718 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf); 719 720 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf); 721 722 // string must be not flat 723 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type); 724 725 static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 726 MemSpaceType type = MemSpaceType::SEMI_SPACE); 727 728 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 729 MemSpaceType type = MemSpaceType::SEMI_SPACE); 730 731 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string); 732 733 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 734 735 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 736 737 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 738 739 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 740 741 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 742 743 static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &srcFlat, 744 bool toLower, uint32_t startIndex = 0); 745 }; 746 747 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data 748 class LineEcmaString : public EcmaString { 749 public: 750 static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; 751 static constexpr uint32_t INIT_LENGTH_TIMES = 4; 752 // DATA_OFFSET: the string data stored after the string header. 753 // Data can be stored in utf8 or utf16 form according to compressed bit. 754 static constexpr size_t DATA_OFFSET = EcmaString::SIZE; // DATA_OFFSET equal to Empty String size 755 756 CAST_CHECK(LineEcmaString, IsLineString); 757 758 DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength()); 759 Cast(EcmaString * str)760 static LineEcmaString *Cast(EcmaString *str) 761 { 762 return static_cast<LineEcmaString *>(str); 763 } 764 Cast(const EcmaString * str)765 static LineEcmaString *Cast(const EcmaString *str) 766 { 767 return LineEcmaString::Cast(const_cast<EcmaString *>(str)); 768 } 769 ComputeSizeUtf8(uint32_t utf8Len)770 static size_t ComputeSizeUtf8(uint32_t utf8Len) 771 { 772 return DATA_OFFSET + utf8Len; 773 } 774 ComputeSizeUtf16(uint32_t utf16Len)775 static size_t ComputeSizeUtf16(uint32_t utf16Len) 776 { 777 return DATA_OFFSET + utf16Len * sizeof(uint16_t); 778 } 779 ObjectSize(EcmaString * str)780 static size_t ObjectSize(EcmaString *str) 781 { 782 uint32_t length = str->GetLength(); 783 return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length); 784 } 785 DataSize(EcmaString * str)786 static size_t DataSize(EcmaString *str) 787 { 788 uint32_t length = str->GetLength(); 789 return str->IsUtf16() ? length * sizeof(uint16_t) : length; 790 } 791 GetPointerLength()792 size_t GetPointerLength() 793 { 794 size_t byteSize = DataSize(this); 795 return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType); 796 } 797 GetData()798 uint16_t *GetData() const 799 { 800 return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET); 801 } 802 803 template<bool verify = true> Get(int32_t index)804 uint16_t Get(int32_t index) const 805 { 806 int32_t length = static_cast<int32_t>(GetLength()); 807 if (verify) { 808 if ((index < 0) || (index >= length)) { 809 return 0; 810 } 811 } 812 if (!IsUtf16()) { 813 Span<const uint8_t> sp(GetDataUtf8(), length); 814 return sp[index]; 815 } 816 Span<const uint16_t> sp(GetDataUtf16(), length); 817 return sp[index]; 818 } 819 Set(uint32_t index,uint16_t src)820 void Set(uint32_t index, uint16_t src) 821 { 822 ASSERT(index < GetLength()); 823 if (IsUtf8()) { 824 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 825 *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src); 826 } else { 827 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 828 *(GetData() + index) = src; 829 } 830 } 831 }; 832 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0); 833 834 class ConstantString : public EcmaString { 835 public: 836 static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE; 837 // ConstantData is the pointer of const string in the pandafile. 838 // String in pandafile is encoded by the utf8 format. 839 // EntityId is normally the uint32_t index in the pandafile. 840 // When the pandafile is to be removed, EntityId will become -1. 841 // The real string data will be reloacted into bytearray and stored in RelocatedData. 842 // ConstantData will also point at data of bytearray data. 843 ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 844 ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET); 845 ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET); 846 DEFINE_ALIGN_SIZE(LAST_OFFSET); 847 848 CAST_CHECK(ConstantString, IsConstantString); 849 DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 850 Cast(EcmaString * str)851 static ConstantString *Cast(EcmaString *str) 852 { 853 return static_cast<ConstantString *>(str); 854 } 855 Cast(const EcmaString * str)856 static ConstantString *Cast(const EcmaString *str) 857 { 858 return ConstantString::Cast(const_cast<EcmaString *>(str)); 859 } 860 ObjectSize()861 static size_t ObjectSize() 862 { 863 return ConstantString::SIZE; 864 } 865 GetEntityIdU32()866 uint32_t GetEntityIdU32() const 867 { 868 ASSERT(GetEntityId() >= 0); 869 return static_cast<uint32_t>(GetEntityId()); 870 } 871 872 template<bool verify = true> Get(int32_t index)873 uint16_t Get(int32_t index) const 874 { 875 int32_t length = static_cast<int32_t>(GetLength()); 876 if (verify) { 877 if ((index < 0) || (index >= length)) { 878 return 0; 879 } 880 } 881 ASSERT(IsUtf8()); 882 Span<const uint8_t> sp(GetConstantData(), length); 883 return sp[index]; 884 } 885 }; 886 887 // The substrings of another string use SlicedString to describe. 888 class SlicedString : public EcmaString { 889 public: 890 static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13; 891 static constexpr size_t PARENT_OFFSET = EcmaString::SIZE; 892 ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET); 893 ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG); 894 ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE); 895 896 DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET); 897 898 CAST_CHECK(SlicedString, IsSlicedString); 899 private: 900 friend class EcmaString; Cast(EcmaString * str)901 static SlicedString *Cast(EcmaString *str) 902 { 903 return static_cast<SlicedString *>(str); 904 } 905 Cast(const EcmaString * str)906 static SlicedString *Cast(const EcmaString *str) 907 { 908 return SlicedString::Cast(const_cast<EcmaString *>(str)); 909 } 910 ObjectSize()911 static size_t ObjectSize() 912 { 913 return SlicedString::SIZE; 914 } 915 916 // Minimum length for a sliced string 917 template<bool verify = true> Get(int32_t index)918 uint16_t Get(int32_t index) const 919 { 920 int32_t length = static_cast<int32_t>(GetLength()); 921 if (verify) { 922 if ((index < 0) || (index >= length)) { 923 return 0; 924 } 925 } 926 EcmaString *parent = EcmaString::Cast(GetParent()); 927 if (parent->IsLineString()) { 928 if (parent->IsUtf8()) { 929 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length); 930 return sp[index]; 931 } 932 Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length); 933 return sp[index]; 934 } 935 Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length); 936 return sp[index]; 937 } 938 }; 939 940 class TreeEcmaString : public EcmaString { 941 public: 942 // Minimum length for a tree string 943 static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13; 944 945 static constexpr size_t FIRST_OFFSET = EcmaString::SIZE; 946 ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET); 947 ACCESSORS(Second, SECOND_OFFSET, SIZE); 948 949 DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE); 950 951 CAST_CHECK(TreeEcmaString, IsTreeString); 952 Cast(EcmaString * str)953 static TreeEcmaString *Cast(EcmaString *str) 954 { 955 return static_cast<TreeEcmaString *>(str); 956 } 957 Cast(const EcmaString * str)958 static TreeEcmaString *Cast(const EcmaString *str) 959 { 960 return TreeEcmaString::Cast(const_cast<EcmaString *>(str)); 961 } 962 IsFlat()963 bool IsFlat() const 964 { 965 auto strSecond = EcmaString::Cast(GetSecond()); 966 return strSecond->GetLength() == 0; 967 } 968 969 template<bool verify = true> Get(int32_t index)970 uint16_t Get(int32_t index) const 971 { 972 int32_t length = static_cast<int32_t>(GetLength()); 973 if (verify) { 974 if ((index < 0) || (index >= length)) { 975 return 0; 976 } 977 } 978 979 if (IsFlat()) { 980 EcmaString *first = EcmaString::Cast(GetFirst()); 981 return first->At<verify>(index); 982 } 983 EcmaString *string = const_cast<TreeEcmaString *>(this); 984 while (true) { 985 if (string->IsTreeString()) { 986 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 987 if (static_cast<int32_t>(first->GetLength()) > index) { 988 string = first; 989 } else { 990 index -= static_cast<int32_t>(first->GetLength()); 991 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond()); 992 } 993 } else { 994 return string->At<verify>(index); 995 } 996 } 997 UNREACHABLE(); 998 } 999 }; 1000 1001 class FlatStringInfo { 1002 public: FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1003 FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string), 1004 startIndex_(startIndex), 1005 length_(length) {} IsUtf8()1006 bool IsUtf8() const 1007 { 1008 return string_->IsUtf8(); 1009 } 1010 IsUtf16()1011 bool IsUtf16() const 1012 { 1013 return string_->IsUtf16(); 1014 } 1015 GetString()1016 EcmaString *GetString() const 1017 { 1018 return string_; 1019 } 1020 SetString(EcmaString * string)1021 void SetString(EcmaString *string) 1022 { 1023 string_ = string; 1024 } 1025 GetStartIndex()1026 uint32_t GetStartIndex() const 1027 { 1028 return startIndex_; 1029 } 1030 GetLength()1031 uint32_t GetLength() const 1032 { 1033 return length_; 1034 } 1035 1036 const uint8_t *GetDataUtf8() const; 1037 const uint16_t *GetDataUtf16() const; 1038 uint8_t *GetDataUtf8Writable() const; 1039 std::u16string ToU16String(uint32_t len = 0); 1040 private: 1041 EcmaString *string_ {nullptr}; 1042 uint32_t startIndex_ {0}; 1043 uint32_t length_ {0}; 1044 }; 1045 1046 // if you want to use functions of EcmaString, please not use directly, 1047 // and use functions of EcmaStringAccessor alternatively. 1048 // eg: EcmaString *str = ***; str->GetLength() -----> EcmaStringAccessor(str).GetLength() 1049 class PUBLIC_API EcmaStringAccessor { 1050 public: 1051 explicit EcmaStringAccessor(EcmaString *string); 1052 1053 explicit EcmaStringAccessor(TaggedObject *obj); 1054 1055 explicit EcmaStringAccessor(JSTaggedValue value); 1056 1057 explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle); 1058 CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1059 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed) 1060 { 1061 return EcmaString::CreateLineString(vm, length, compressed); 1062 } 1063 CreateEmptyString(const EcmaVM * vm)1064 static EcmaString *CreateEmptyString(const EcmaVM *vm) 1065 { 1066 return EcmaString::CreateEmptyString(vm); 1067 } 1068 1069 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, 1070 MemSpaceType type = MemSpaceType::SEMI_SPACE, bool isConstantString = false, 1071 uint32_t idOffset = 0) 1072 { 1073 return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset); 1074 } 1075 1076 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, 1077 bool compressed, MemSpaceType type = MemSpaceType::SEMI_SPACE, uint32_t idOffset = 0) 1078 { 1079 return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset); 1080 } 1081 1082 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 1083 MemSpaceType type = MemSpaceType::SEMI_SPACE) 1084 { 1085 return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type); 1086 } 1087 1088 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 1089 bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE) 1090 { 1091 return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type); 1092 } 1093 1094 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, 1095 const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SEMI_SPACE) 1096 { 1097 return EcmaString::Concat(vm, str1Handle, str2Handle, type); 1098 } 1099 CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1100 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 1101 uint32_t length, bool compressed) 1102 { 1103 return EcmaString::CopyStringToOldSpace(vm, original, length, compressed); 1104 } 1105 1106 // can change src data structure FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1107 static EcmaString *FastSubString(const EcmaVM *vm, 1108 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1109 { 1110 return EcmaString::FastSubString(vm, src, start, length); 1111 } 1112 1113 // get GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1114 static EcmaString *GetSubString(const EcmaVM *vm, 1115 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1116 { 1117 return EcmaString::GetSubString(vm, src, start, length); 1118 } 1119 IsUtf8()1120 bool IsUtf8() const 1121 { 1122 return string_->IsUtf8(); 1123 } 1124 IsUtf16()1125 bool IsUtf16() const 1126 { 1127 return string_->IsUtf16(); 1128 } 1129 GetLength()1130 uint32_t GetLength() const 1131 { 1132 return string_->GetLength(); 1133 } 1134 1135 // require is LineString 1136 inline size_t GetUtf8Length() const; 1137 ObjectSize()1138 size_t ObjectSize() const 1139 { 1140 if (string_->IsLineString()) { 1141 return LineEcmaString::ObjectSize(string_); 1142 } if (string_->IsConstantString()) { 1143 return ConstantString::ObjectSize(); 1144 } else { 1145 return TreeEcmaString::SIZE; 1146 } 1147 } 1148 1149 // For TreeString, the calculation result is size of LineString correspondingly. GetFlatStringSize()1150 size_t GetFlatStringSize() const 1151 { 1152 if (string_->IsConstantString()) { 1153 return ConstantString::ObjectSize(); 1154 } 1155 return LineEcmaString::ObjectSize(string_); 1156 } 1157 IsInternString()1158 bool IsInternString() const 1159 { 1160 return string_->IsInternString(); 1161 } 1162 SetInternString()1163 void SetInternString() 1164 { 1165 string_->SetIsInternString(); 1166 } 1167 ClearInternString()1168 void ClearInternString() 1169 { 1170 string_->ClearInternStringFlag(); 1171 } 1172 1173 // require is LineString 1174 // It's Utf8 format, but without 0 in the end. 1175 inline const uint8_t *GetDataUtf8(); 1176 1177 // require is LineString 1178 inline const uint16_t *GetDataUtf16(); 1179 1180 // not change string data structure. 1181 // if string is not flat, this func has low efficiency. 1182 std::u16string ToU16String(uint32_t len = 0) 1183 { 1184 return string_->ToU16String(len); 1185 } 1186 1187 // not change string data structure. 1188 // if string is not flat, this func has low efficiency. ToOneByteDataForced()1189 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 1190 { 1191 return string_->ToOneByteDataForced(); 1192 } 1193 1194 // not change string data structure. 1195 // if string is not flat, this func has low efficiency. ToUtf8Span(CVector<uint8_t> & buf)1196 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf) 1197 { 1198 return string_->ToUtf8Span(buf); 1199 } 1200 1201 // only for string is flat and using UTF8 encoding 1202 inline Span<const uint8_t> FastToUtf8Span(); 1203 1204 // Using string's hash to figure out whether the string can be converted to integer TryToGetInteger(uint32_t * result)1205 inline bool TryToGetInteger(uint32_t *result) 1206 { 1207 return string_->TryToGetInteger(result); 1208 } 1209 TryToSetIntegerHash(int32_t num)1210 inline bool TryToSetIntegerHash(int32_t num) 1211 { 1212 return string_->TryToSetIntegerHash(num); 1213 } 1214 1215 // not change string data structure. 1216 // if string is not flat, this func has low efficiency. 1217 std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1218 1219 std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1220 // not change string data structure. 1221 // if string is not flat, this func has low efficiency. 1222 CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION); 1223 1224 // not change string data structure. 1225 // if string is not flat, this func has low efficiency. 1226 uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false) 1227 { 1228 return string_->WriteUtf8(buf, maxLength, isWriteBuffer); 1229 } 1230 WriteToUtf16(uint16_t * buf,uint32_t bufLength)1231 uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength) 1232 { 1233 return string_->WriteUtf16(buf, GetLength(), bufLength); 1234 } 1235 WriteToOneByte(uint8_t * buf,uint32_t maxLength)1236 uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength) 1237 { 1238 return string_->WriteOneByte(buf, maxLength); 1239 } 1240 1241 // not change string data structure. 1242 // if string is not flat, this func has low efficiency. WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1243 uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const 1244 { 1245 return string_->CopyDataUtf16(buf, maxLength); 1246 } 1247 1248 // require dst is LineString 1249 // not change src data structure. 1250 // if src is not flat, this func has low efficiency. 1251 inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 1252 1253 // not change src data structure. 1254 // if src is not flat, this func has low efficiency. 1255 template<bool verify = true> Get(uint32_t index)1256 uint16_t Get(uint32_t index) const 1257 { 1258 return string_->At<verify>(index); 1259 } 1260 1261 // require string is LineString. Set(uint32_t index,uint16_t src)1262 void Set(uint32_t index, uint16_t src) 1263 { 1264 return string_->WriteData(index, src); 1265 } 1266 1267 // not change src data structure. 1268 // if src is not flat, this func has low efficiency. GetHashcode()1269 uint32_t GetHashcode() 1270 { 1271 return string_->GetHashcode(); 1272 } 1273 GetRawHashcode()1274 uint32_t GetRawHashcode() 1275 { 1276 return string_->GetRawHashcode(); 1277 } 1278 1279 // not change src data structure. 1280 // if src is not flat, this func has low efficiency. ComputeRawHashcode()1281 std::pair<uint32_t, bool> ComputeRawHashcode() 1282 { 1283 return string_->ComputeRawHashcode(); 1284 } 1285 ComputeHashcode()1286 uint32_t ComputeHashcode() 1287 { 1288 return string_->ComputeHashcode(); 1289 } 1290 ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1291 uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger) 1292 { 1293 return string_->ComputeHashcode(rawHashSeed, isInteger); 1294 } 1295 ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1296 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 1297 { 1298 return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); 1299 } 1300 ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1301 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 1302 { 1303 return EcmaString::ComputeHashcodeUtf16(utf16Data, length); 1304 } 1305 1306 // can change receiver and search data structure 1307 static int32_t IndexOf(const EcmaVM *vm, 1308 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1309 { 1310 return EcmaString::IndexOf(vm, receiver, search, pos); 1311 } 1312 1313 // can change receiver and search data structure 1314 static int32_t LastIndexOf(const EcmaVM *vm, 1315 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1316 { 1317 return EcmaString::LastIndexOf(vm, receiver, search, pos); 1318 } 1319 1320 // can change receiver and search data structure Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1321 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right) 1322 { 1323 return EcmaString::Compare(vm, left, right); 1324 } 1325 1326 // can change str1 and str2 data structure StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1327 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 1328 { 1329 return EcmaString::StringsAreEqual(vm, str1, str2); 1330 } 1331 1332 // not change str1 and str2 data structure. 1333 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqual(EcmaString * str1,EcmaString * str2)1334 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2) 1335 { 1336 return EcmaString::StringsAreEqual(str1, str2); 1337 } 1338 1339 // not change str1 and str2 data structure. 1340 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1341 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2) 1342 { 1343 return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2); 1344 } 1345 1346 // not change str1 data structure. 1347 // if str1 is not flat, this func has low efficiency. StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1348 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 1349 bool canBeCompress) 1350 { 1351 return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress); 1352 } 1353 1354 // not change str1 data structure. 1355 // if str1 is not flat, this func has low efficiency. StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1356 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 1357 { 1358 return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len); 1359 } 1360 1361 // require str1 and str2 are LineString. 1362 // not change string data structure. 1363 // if string is not flat, this func has low efficiency. EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1364 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 1365 { 1366 return string_->EqualToSplicedString(str1, str2); 1367 } 1368 CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1369 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 1370 { 1371 return EcmaString::CanBeCompressed(utf8Data, utf8Len); 1372 } 1373 CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1374 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 1375 { 1376 return EcmaString::CanBeCompressed(utf16Data, utf16Len); 1377 } 1378 1379 // require string is LineString CanBeCompressed(const EcmaString * string)1380 static bool CanBeCompressed(const EcmaString *string) 1381 { 1382 return EcmaString::CanBeCompressed(string); 1383 } 1384 1385 // not change string data structure. 1386 // if string is not flat, this func has low efficiency. ToElementIndex(uint32_t * index)1387 bool ToElementIndex(uint32_t *index) 1388 { 1389 return string_->ToElementIndex(index); 1390 } 1391 1392 // not change string data structure. 1393 // if string is not flat, this func has low efficiency. ToInt(int32_t * index,bool * negative)1394 bool ToInt(int32_t *index, bool *negative) 1395 { 1396 return string_->ToInt(index, negative); 1397 } 1398 1399 // not change string data structure. 1400 // if string is not flat, this func has low efficiency. ToTypedArrayIndex(uint32_t * index)1401 bool ToTypedArrayIndex(uint32_t *index) 1402 { 1403 return string_->ToTypedArrayIndex(index); 1404 } 1405 ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1406 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1407 { 1408 return EcmaString::ToLower(vm, src); 1409 } 1410 TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1411 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1412 { 1413 return EcmaString::TryToLower(vm, src); 1414 } 1415 ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1416 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1417 { 1418 return EcmaString::ToUpper(vm, src); 1419 } 1420 ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1421 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1422 { 1423 return EcmaString::ToLocaleLower(vm, src, locale); 1424 } 1425 ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1426 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1427 { 1428 return EcmaString::ToLocaleUpper(vm, src, locale); 1429 } 1430 1431 static EcmaString *Trim(const JSThread *thread, 1432 const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM) 1433 { 1434 return EcmaString::Trim(thread, src, mode); 1435 } 1436 IsFlat()1437 bool IsFlat() const 1438 { 1439 return string_->IsFlat(); 1440 } 1441 IsLineString()1442 bool IsLineString() const 1443 { 1444 return string_->IsLineString(); 1445 } 1446 IsConstantString()1447 bool IsConstantString() const 1448 { 1449 return string_->IsConstantString(); 1450 } 1451 IsLineOrConstantString()1452 bool IsLineOrConstantString() const 1453 { 1454 return string_->IsLineOrConstantString(); 1455 } 1456 IsTreeString()1457 bool IsTreeString() const 1458 { 1459 return string_->IsTreeString(); 1460 } 1461 NotTreeString()1462 bool NotTreeString() const 1463 { 1464 return string_->NotTreeString(); 1465 } 1466 1467 static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1468 MemSpaceType type = MemSpaceType::SEMI_SPACE) 1469 { 1470 return EcmaString::Flatten(vm, string, type); 1471 } 1472 1473 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1474 MemSpaceType type = MemSpaceType::SEMI_SPACE) 1475 { 1476 return EcmaString::FlattenAllString(vm, string, type); 1477 } 1478 1479 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1480 MemSpaceType type = MemSpaceType::SEMI_SPACE) 1481 { 1482 return EcmaString::SlowFlatten(vm, string, type); 1483 } 1484 FlattenNoGC(const EcmaVM * vm,EcmaString * string)1485 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string) 1486 { 1487 return EcmaString::FlattenNoGC(vm, string); 1488 } 1489 GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1490 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 1491 { 1492 return EcmaString::GetUtf8DataFlat(src, buf); 1493 } 1494 GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1495 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 1496 { 1497 return EcmaString::GetUtf16DataFlat(src, buf); 1498 } 1499 1500 static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str); 1501 1502 private: 1503 EcmaString *string_ {nullptr}; 1504 }; 1505 } // namespace ecmascript 1506 } // namespace panda 1507 #endif // ECMASCRIPT_STRING_H