1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_STRING_H 17 #define ECMASCRIPT_STRING_H 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstring> 22 23 #include "ecmascript/base/utf_helper.h" 24 #include "ecmascript/common.h" 25 #include "ecmascript/ecma_macros.h" 26 #include "ecmascript/js_hclass.h" 27 #include "ecmascript/js_tagged_value.h" 28 #include "ecmascript/mem/barriers.h" 29 #include "ecmascript/mem/space.h" 30 #include "ecmascript/mem/tagged_object.h" 31 32 #include "libpandabase/macros.h" 33 #include "securec.h" 34 #include "unicode/locid.h" 35 36 namespace panda { 37 namespace test { 38 class EcmaStringEqualsTest; 39 } 40 namespace ecmascript { 41 template<typename T> 42 class JSHandle; 43 class JSPandaFile; 44 class EcmaVM; 45 class LineEcmaString; 46 class ConstantString; 47 class TreeEcmaString; 48 class SlicedString; 49 class FlatStringInfo; 50 51 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 52 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length) \ 53 if ((length) >= MAX_STRING_LENGTH) { \ 54 THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr); \ 55 } 56 57 class EcmaString : public TaggedObject { 58 /* Mix Hash Code: -- { 0 | [31 bits raw hash code] } computed through string 59 \ { 1 | [31 bits integer numbers] } fastpath for string to number 60 */ 61 public: 62 CAST_CHECK(EcmaString, IsString); 63 64 static constexpr uint32_t IS_INTEGER_MASK = 1U << 31; 65 static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; 66 static constexpr uint32_t STRING_INTERN_BIT = 0x2; 67 static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning 68 static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U; 69 static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF; 70 static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9; 71 72 static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize(); 73 // In last bit of mix_length we store if this string is compressed or not. 74 ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET) 75 // In last bit of mix_hash we store if this string is small-integer number or not. 76 ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE) 77 78 enum CompressedStatus { 79 STRING_COMPRESSED, 80 STRING_UNCOMPRESSED, 81 }; 82 83 enum IsIntegerStatus { 84 NOT_INTEGER = 0, 85 IS_INTEGER, 86 }; 87 88 enum TrimMode : uint8_t { 89 TRIM, 90 TRIM_START, 91 TRIM_END, 92 }; 93 94 enum ConcatOptStatus { 95 BEGIN_STRING_ADD = 1, 96 IN_STRING_ADD, 97 CONFIRMED_IN_STRING_ADD, 98 END_STRING_ADD, 99 INVALID_STRING_ADD, 100 HAS_BACKING_STORE, 101 }; 102 103 private: 104 friend class EcmaStringAccessor; 105 friend class LineEcmaString; 106 friend class ConstantString; 107 friend class TreeEcmaString; 108 friend class SlicedString; 109 friend class FlatStringInfo; 110 friend class NameDictionary; 111 friend class panda::test::EcmaStringEqualsTest; 112 113 static EcmaString *CreateEmptyString(const EcmaVM *vm); 114 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 115 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 116 uint32_t idOffset = 0); 117 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 118 uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 119 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 120 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 121 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 122 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 123 static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 124 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 125 static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed); 126 static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm, 127 size_t length, bool compressed, MemSpaceType type); 128 static EcmaString *CreateTreeString(const EcmaVM *vm, 129 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed); 130 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, 131 size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0); 132 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, 133 const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 134 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 135 uint32_t length, bool compressed); 136 static EcmaString *FastSubString(const EcmaVM *vm, 137 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 138 static EcmaString *GetSlicedString(const EcmaVM *vm, 139 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 140 static EcmaString *GetSubString(const EcmaVM *vm, 141 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 142 // require src is LineString 143 // not change src data structure 144 static inline EcmaString *FastSubUtf8String(const EcmaVM *vm, 145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 146 // require src is LineString 147 // not change src data structure 148 static inline EcmaString *FastSubUtf16String(const EcmaVM *vm, 149 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 150 inline void TrimLineString(const JSThread *thread, uint32_t newLength); IsUtf8()151 inline bool IsUtf8() const 152 { 153 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED; 154 } 155 IsUtf16()156 inline bool IsUtf16() const 157 { 158 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED; 159 } 160 IsInteger()161 inline bool IsInteger() 162 { 163 return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK; 164 } 165 166 // require is LineString 167 inline uint16_t *GetData() const; 168 inline const uint8_t *GetDataUtf8() const; 169 inline const uint16_t *GetDataUtf16() const; 170 171 // require is LineString 172 inline uint8_t *GetDataUtf8Writable(); 173 inline uint16_t *GetDataUtf16Writable(); 174 GetLength()175 inline uint32_t GetLength() const 176 { 177 return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT; 178 } 179 180 inline void SetLength(uint32_t length, bool compressed = false) 181 { 182 ASSERT(length < MAX_STRING_LENGTH); 183 // Use 0u for compressed/utf8 expression 184 SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED)); 185 } 186 GetRawHashcode()187 inline uint32_t GetRawHashcode() const 188 { 189 return GetMixHashcode() & (~IS_INTEGER_MASK); 190 } 191 MixHashcode(uint32_t hashcode,bool isInteger)192 static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger) 193 { 194 return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK)); 195 } 196 197 inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false) 198 { 199 // Use 0u for not integer string's expression 200 SetMixHashcode(MixHashcode(hashcode, isInteger)); 201 } 202 203 inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const; 204 SetIsInternString()205 inline void SetIsInternString() 206 { 207 SetMixLength(GetMixLength() | STRING_INTERN_BIT); 208 } 209 IsInternString()210 inline bool IsInternString() const 211 { 212 return (GetMixLength() & STRING_INTERN_BIT) != 0; 213 } 214 ClearInternStringFlag()215 inline void ClearInternStringFlag() 216 { 217 SetMixLength(GetMixLength() & ~STRING_INTERN_BIT); 218 } 219 TryGetHashCode(uint32_t * hash)220 inline bool TryGetHashCode(uint32_t *hash) 221 { 222 uint32_t hashcode = GetMixHashcode(); 223 if (hashcode == 0 && GetLength() != 0) { 224 return false; 225 } 226 *hash = hashcode; 227 return true; 228 } 229 GetIntegerCode()230 inline uint32_t GetIntegerCode() 231 { 232 ASSERT(GetMixHashcode() & IS_INTEGER_MASK); 233 return GetRawHashcode(); 234 } 235 236 // not change this data structure. 237 // if string is not flat, this func has low efficiency. GetHashcode()238 uint32_t PUBLIC_API GetHashcode() 239 { 240 uint32_t hashcode = GetMixHashcode(); 241 // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0. 242 if (hashcode == 0 && GetLength() != 0) { 243 hashcode = ComputeHashcode(); 244 SetMixHashcode(hashcode); 245 } 246 return hashcode; 247 } 248 249 template<typename T> IsDecimalDigitChar(const T c)250 inline static bool IsDecimalDigitChar(const T c) 251 { 252 return (c >= '0' && c <= '9'); 253 } 254 ComputeIntegerHash(uint32_t * num,uint8_t c)255 static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c) 256 { 257 if (!IsDecimalDigitChar(c)) { 258 return false; 259 } 260 int charDate = c - '0'; 261 *num = (*num) * 10 + charDate; // 10: decimal factor 262 return true; 263 } 264 265 bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const; 266 267 template<typename T> HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)268 static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed) 269 { 270 ASSERT(size >= 0); 271 if (hashSeed == 0) { 272 if (IsDecimalDigitChar(data[0]) && data[0] != '0') { 273 uint32_t num = data[0] - '0'; 274 uint32_t i = 1; 275 do { 276 if (i == size) { 277 // compute mix hash 278 if (num <= MAX_INTEGER_HASH_NUMBER) { 279 *hash = MixHashcode(num, IS_INTEGER); 280 return true; 281 } 282 return false; 283 } 284 } while (ComputeIntegerHash(&num, data[i++])); 285 } 286 if (size == 1 && (data[0] == '0')) { 287 *hash = MixHashcode(0, IS_INTEGER); 288 return true; 289 } 290 } else { 291 if (IsDecimalDigitChar(data[0])) { 292 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor 293 uint32_t i = 1; 294 do { 295 if (i == size) { 296 // compute mix hash 297 if (num <= MAX_INTEGER_HASH_NUMBER) { 298 *hash = MixHashcode(num, IS_INTEGER); 299 return true; 300 } 301 return false; 302 } 303 } while (ComputeIntegerHash(&num, data[i++])); 304 } 305 } 306 return false; 307 } 308 309 // not change this data structure. 310 // if string is not flat, this func has low efficiency. 311 uint32_t PUBLIC_API ComputeHashcode() const; 312 std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const; 313 uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const; 314 315 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); 316 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); 317 318 template<bool verify = true> 319 uint16_t At(int32_t index) const; 320 321 // require is LineString 322 void WriteData(uint32_t index, uint16_t src); 323 324 // can change left and right data structure 325 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right); 326 327 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 328 const JSHandle<EcmaString>& right, uint32_t offset); 329 330 // Check that two spans are equal. Should have the same length. 331 /* static */ 332 template<typename T, typename T1> StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)333 static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2) 334 { 335 ASSERT(str1.Size() <= str2.Size()); 336 size_t size = str1.Size(); 337 if (!std::is_same_v<T, T1>) { 338 for (size_t i = 0; i < size; i++) { 339 auto left = static_cast<uint16_t>(str1[i]); 340 auto right = static_cast<uint16_t>(str2[i]); 341 if (left != right) { 342 return false; 343 } 344 } 345 return true; 346 } 347 348 return !memcmp(str1.data(), str2.data(), size * sizeof(T)); 349 } 350 351 // Converts utf8Data to utf16 and compare it with given utf16_data. 352 static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, 353 uint32_t utf16Len); 354 // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence. 355 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2); 356 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 357 static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, 358 const JSHandle<EcmaString> &str2); 359 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 360 static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2); 361 // Two strings have the same type of utf encoding format. 362 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2); 363 static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2); 364 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 365 // not change str1 data structure. 366 // if str1 is not flat, this func has low efficiency. 367 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 368 bool canBeCompress); 369 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 370 // not change str1 data structure. 371 // if str1 is not flat, this func has low efficiency. 372 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); 373 374 // can change receiver and search data structure 375 static int32_t IndexOf(const EcmaVM *vm, 376 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 377 378 // can change receiver and search data structure 379 static int32_t LastIndexOf(const EcmaVM *vm, 380 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 381 382 inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const 383 { 384 if (maxLength == 0) { 385 return 1; // maxLength was -1 at napi 386 } 387 size_t length = GetLength(); 388 if (length > maxLength) { 389 return 0; 390 } 391 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 392 buf[maxLength - 1] = '\0'; 393 // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8 394 return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1; // add place for zero in the end 395 } 396 397 // It allows user to copy into buffer even if maxLength < length 398 inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const 399 { 400 if (maxLength == 0) { 401 return 1; // maxLength was -1 at napi 402 } 403 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 404 buf[maxLength - 1] = '\0'; 405 return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1; 406 } 407 CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)408 size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const 409 { 410 if (IsUtf16()) { 411 CVector<uint16_t> tmpBuf; 412 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 413 if (length > bufLength) { 414 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) { 415 LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength"; 416 UNREACHABLE(); 417 } 418 return bufLength; 419 } 420 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 421 LOG_FULL(FATAL) << "memcpy_s failed"; 422 UNREACHABLE(); 423 } 424 return length; 425 } 426 CVector<uint8_t> tmpBuf; 427 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 428 if (length > bufLength) { 429 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength); 430 } 431 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength); 432 } 433 434 // It allows user to copy into buffer even if maxLength < length WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)435 inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const 436 { 437 if (bufLength == 0) { 438 return 0; 439 } 440 // Returns a number representing a valid backrest length. 441 return CopyDataToUtf16(buf, targetLength, bufLength); 442 } 443 WriteOneByte(uint8_t * buf,size_t maxLength)444 size_t WriteOneByte(uint8_t *buf, size_t maxLength) const 445 { 446 if (maxLength == 0) { 447 return 0; 448 } 449 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 450 buf[maxLength - 1] = '\0'; 451 uint32_t length = GetLength(); 452 if (!IsUtf16()) { 453 CVector<uint8_t> tmpBuf; 454 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 455 if (length > maxLength) { 456 length = maxLength; 457 } 458 if (memcpy_s(buf, maxLength, data, length) != EOK) { 459 LOG_FULL(FATAL) << "memcpy_s failed when write one byte"; 460 UNREACHABLE(); 461 } 462 return length; 463 } 464 465 CVector<uint16_t> tmpBuf; 466 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 467 if (length > maxLength) { 468 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength); 469 } 470 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); 471 } 472 473 size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength, 474 bool modify = true, bool isWriteBuffer = false) const 475 { 476 uint32_t len = GetLength(); 477 if (start + length > len) { 478 return 0; 479 } 480 if (!IsUtf16()) { 481 if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half 482 LOG_FULL(FATAL) << " length is higher than half of size_t::max"; 483 UNREACHABLE(); 484 } 485 CVector<uint8_t> tmpBuf; 486 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start; 487 // Only copy maxLength number of chars into buffer if length > maxLength 488 auto dataLen = std::min(length, maxLength); 489 std::copy(data, data + dataLen, buf); 490 return dataLen; 491 } 492 CVector<uint16_t> tmpBuf; 493 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 494 if (length > maxLength) { 495 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, 496 modify, isWriteBuffer); 497 } 498 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, 499 modify, isWriteBuffer); 500 } 501 CopyDataUtf16(uint16_t * buf,uint32_t maxLength)502 inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const 503 { 504 uint32_t length = GetLength(); 505 if (length > maxLength) { 506 return 0; 507 } 508 if (IsUtf16()) { 509 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 510 CVector<uint16_t> tmpBuf; 511 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 512 if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 513 LOG_FULL(FATAL) << "memcpy_s failed"; 514 UNREACHABLE(); 515 } 516 return length; 517 } 518 CVector<uint8_t> tmpBuf; 519 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 520 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength); 521 } 522 523 std::u16string ToU16String(uint32_t len = 0); 524 ToOneByteDataForced()525 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 526 { 527 uint8_t *buf = nullptr; 528 auto length = GetLength(); 529 if (IsUtf16()) { 530 auto size = length * sizeof(uint16_t); 531 buf = new uint8_t[size](); 532 CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length); 533 } else { 534 buf = new uint8_t[length + 1](); 535 CopyDataUtf8(buf, length + 1); 536 } 537 return std::unique_ptr<uint8_t[]>(buf); 538 } 539 540 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false) 541 { 542 Span<const uint8_t> str; 543 uint32_t strLen = GetLength(); 544 if (UNLIKELY(IsUtf16())) { 545 CVector<uint16_t> tmpBuf; 546 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 547 ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0); 548 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1; 549 buf.reserve(len); 550 len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8); 551 str = Span<const uint8_t>(buf.data(), len); 552 } else { 553 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 554 str = Span<const uint8_t>(data, strLen); 555 } 556 return str; 557 } 558 559 Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 560 { 561 Span<const uint8_t> str; 562 uint32_t strLen = GetLength(); 563 if (UNLIKELY(IsUtf16())) { 564 CVector<uint16_t> tmpBuf; 565 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 566 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 567 buf.reserve(len); 568 len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 569 str = Span<const uint8_t>(buf.data(), len); 570 } else { 571 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 572 str = Span<const uint8_t>(data, strLen); 573 } 574 return str; 575 } 576 577 inline Span<const uint8_t> FastToUtf8Span() const; 578 TryToGetInteger(uint32_t * result)579 bool TryToGetInteger(uint32_t *result) 580 { 581 if (!IsInteger()) { 582 return false; 583 } 584 ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE); 585 *result = GetIntegerCode(); 586 return true; 587 } 588 589 // using integer number set into hash TryToSetIntegerHash(int32_t num)590 inline bool TryToSetIntegerHash(int32_t num) 591 { 592 uint32_t hashcode = GetMixHashcode(); 593 if (hashcode == 0 && GetLength() != 0) { 594 SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER); 595 return true; 596 } 597 return false; 598 } 599 600 void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 601 602 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); 603 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); 604 static bool CanBeCompressed(const EcmaString *string); 605 606 bool PUBLIC_API ToElementIndex(uint32_t *index); 607 608 bool ToInt(int32_t *index, bool *negative); 609 610 bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data); 611 612 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index); 613 614 template<bool isLower> 615 static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src); 616 617 template<bool isLower> 618 static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 619 620 template<typename T> 621 static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode); 622 623 static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM); 624 625 // single char copy for loop 626 template<typename DstType, typename SrcType> CopyChars(DstType * dst,SrcType * src,uint32_t count)627 static void CopyChars(DstType *dst, SrcType *src, uint32_t count) 628 { 629 Span<SrcType> srcSp(src, count); 630 Span<DstType> dstSp(dst, count); 631 for (uint32_t i = 0; i < count; i++) { 632 dstSp[i] = srcSp[i]; 633 } 634 } 635 636 // memory block copy 637 template<typename T> 638 static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count); 639 640 template<typename T> ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)641 static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed) 642 { 643 uint32_t hash = hashSeed; 644 Span<const T> sp(data, size); 645 for (auto c : sp) { 646 constexpr size_t SHIFT = 5; 647 hash = (hash << SHIFT) - hash + c; 648 } 649 return hash; 650 } 651 IsASCIICharacter(uint16_t data)652 static bool IsASCIICharacter(uint16_t data) 653 { 654 if (data == 0) { 655 return false; 656 } 657 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 658 return data <= base::utf_helper::UTF8_1B_MAX; 659 } 660 661 template<typename T1, typename T2> 662 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max); 663 664 template<typename T1, typename T2> 665 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos); 666 667 bool IsFlat() const; 668 IsLineString()669 bool IsLineString() const 670 { 671 return GetClass()->IsLineString(); 672 } IsConstantString()673 bool IsConstantString() const 674 { 675 return GetClass()->IsConstantString(); 676 } IsSlicedString()677 bool IsSlicedString() const 678 { 679 return GetClass()->IsSlicedString(); 680 } IsTreeString()681 bool IsTreeString() const 682 { 683 return GetClass()->IsTreeString(); 684 } NotTreeString()685 bool NotTreeString() const 686 { 687 return !IsTreeString(); 688 } IsLineOrConstantString()689 bool IsLineOrConstantString() const 690 { 691 auto hclass = GetClass(); 692 return hclass->IsLineString() || hclass->IsConstantString(); 693 } 694 GetStringType()695 JSType GetStringType() const 696 { 697 JSType type = GetClass()->GetObjectType(); 698 ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST); 699 return type; 700 } 701 702 template <typename Char> 703 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength); 704 705 template <typename Char> 706 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos); 707 708 static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf); 709 710 static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src); 711 712 static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf); 713 714 static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src); 715 716 // string must be not flat 717 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type); 718 719 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 720 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 721 722 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 723 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 724 725 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string); 726 727 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 728 729 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 730 731 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 732 733 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 734 735 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 736 737 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 738 739 static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, 740 bool toLower, uint32_t startIndex = 0); 741 }; 742 743 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data 744 class LineEcmaString : public EcmaString { 745 public: 746 static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; 747 static constexpr uint32_t INIT_LENGTH_TIMES = 4; 748 // DATA_OFFSET: the string data stored after the string header. 749 // Data can be stored in utf8 or utf16 form according to compressed bit. 750 static constexpr size_t DATA_OFFSET = EcmaString::SIZE; // DATA_OFFSET equal to Empty String size 751 752 CAST_CHECK(LineEcmaString, IsLineString); 753 754 DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength()); 755 Cast(EcmaString * str)756 static LineEcmaString *Cast(EcmaString *str) 757 { 758 return static_cast<LineEcmaString *>(str); 759 } 760 Cast(const EcmaString * str)761 static LineEcmaString *Cast(const EcmaString *str) 762 { 763 return LineEcmaString::Cast(const_cast<EcmaString *>(str)); 764 } 765 ComputeSizeUtf8(uint32_t utf8Len)766 static size_t ComputeSizeUtf8(uint32_t utf8Len) 767 { 768 return DATA_OFFSET + utf8Len; 769 } 770 ComputeSizeUtf16(uint32_t utf16Len)771 static size_t ComputeSizeUtf16(uint32_t utf16Len) 772 { 773 return DATA_OFFSET + utf16Len * sizeof(uint16_t); 774 } 775 ObjectSize(EcmaString * str)776 static size_t ObjectSize(EcmaString *str) 777 { 778 uint32_t length = str->GetLength(); 779 return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length); 780 } 781 DataSize(EcmaString * str)782 static size_t DataSize(EcmaString *str) 783 { 784 uint32_t length = str->GetLength(); 785 return str->IsUtf16() ? length * sizeof(uint16_t) : length; 786 } 787 GetPointerLength()788 size_t GetPointerLength() 789 { 790 size_t byteSize = DataSize(this); 791 return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType); 792 } 793 GetData()794 uint16_t *GetData() const 795 { 796 return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET); 797 } 798 799 template<bool verify = true> Get(int32_t index)800 uint16_t Get(int32_t index) const 801 { 802 int32_t length = static_cast<int32_t>(GetLength()); 803 if (verify) { 804 if ((index < 0) || (index >= length)) { 805 return 0; 806 } 807 } 808 if (!IsUtf16()) { 809 Span<const uint8_t> sp(GetDataUtf8(), length); 810 return sp[index]; 811 } 812 Span<const uint16_t> sp(GetDataUtf16(), length); 813 return sp[index]; 814 } 815 Set(uint32_t index,uint16_t src)816 void Set(uint32_t index, uint16_t src) 817 { 818 ASSERT(index < GetLength()); 819 if (IsUtf8()) { 820 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 821 *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src); 822 } else { 823 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 824 *(GetData() + index) = src; 825 } 826 } 827 }; 828 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0); 829 830 class ConstantString : public EcmaString { 831 public: 832 static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE; 833 // ConstantData is the pointer of const string in the pandafile. 834 // String in pandafile is encoded by the utf8 format. 835 // EntityId is normally the uint32_t index in the pandafile. 836 // When the pandafile is to be removed, EntityId will become -1. 837 // The real string data will be reloacted into bytearray and stored in RelocatedData. 838 // ConstantData will also point at data of bytearray data. 839 ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 840 ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET); 841 ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET); 842 DEFINE_ALIGN_SIZE(LAST_OFFSET); 843 844 CAST_CHECK(ConstantString, IsConstantString); 845 DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 846 Cast(EcmaString * str)847 static ConstantString *Cast(EcmaString *str) 848 { 849 return static_cast<ConstantString *>(str); 850 } 851 Cast(const EcmaString * str)852 static ConstantString *Cast(const EcmaString *str) 853 { 854 return ConstantString::Cast(const_cast<EcmaString *>(str)); 855 } 856 ObjectSize()857 static size_t ObjectSize() 858 { 859 return ConstantString::SIZE; 860 } 861 GetEntityIdU32()862 uint32_t GetEntityIdU32() const 863 { 864 ASSERT(GetEntityId() >= 0); 865 return static_cast<uint32_t>(GetEntityId()); 866 } 867 868 template<bool verify = true> Get(int32_t index)869 uint16_t Get(int32_t index) const 870 { 871 int32_t length = static_cast<int32_t>(GetLength()); 872 if (verify) { 873 if ((index < 0) || (index >= length)) { 874 return 0; 875 } 876 } 877 ASSERT(IsUtf8()); 878 Span<const uint8_t> sp(GetConstantData(), length); 879 return sp[index]; 880 } 881 }; 882 883 // The substrings of another string use SlicedString to describe. 884 class SlicedString : public EcmaString { 885 public: 886 static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13; 887 static constexpr size_t PARENT_OFFSET = EcmaString::SIZE; 888 ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET); 889 ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG); 890 ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE); 891 892 DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET); 893 894 CAST_CHECK(SlicedString, IsSlicedString); 895 private: 896 friend class EcmaString; Cast(EcmaString * str)897 static SlicedString *Cast(EcmaString *str) 898 { 899 return static_cast<SlicedString *>(str); 900 } 901 Cast(const EcmaString * str)902 static SlicedString *Cast(const EcmaString *str) 903 { 904 return SlicedString::Cast(const_cast<EcmaString *>(str)); 905 } 906 ObjectSize()907 static size_t ObjectSize() 908 { 909 return SlicedString::SIZE; 910 } 911 912 // Minimum length for a sliced string 913 template<bool verify = true> Get(int32_t index)914 uint16_t Get(int32_t index) const 915 { 916 int32_t length = static_cast<int32_t>(GetLength()); 917 if (verify) { 918 if ((index < 0) || (index >= length)) { 919 return 0; 920 } 921 } 922 EcmaString *parent = EcmaString::Cast(GetParent()); 923 if (parent->IsLineString()) { 924 if (parent->IsUtf8()) { 925 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length); 926 return sp[index]; 927 } 928 Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length); 929 return sp[index]; 930 } 931 Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length); 932 return sp[index]; 933 } 934 }; 935 936 class TreeEcmaString : public EcmaString { 937 public: 938 // Minimum length for a tree string 939 static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13; 940 941 static constexpr size_t FIRST_OFFSET = EcmaString::SIZE; 942 ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET); 943 ACCESSORS(Second, SECOND_OFFSET, SIZE); 944 945 DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE); 946 947 CAST_CHECK(TreeEcmaString, IsTreeString); 948 Cast(EcmaString * str)949 static TreeEcmaString *Cast(EcmaString *str) 950 { 951 return static_cast<TreeEcmaString *>(str); 952 } 953 Cast(const EcmaString * str)954 static TreeEcmaString *Cast(const EcmaString *str) 955 { 956 return TreeEcmaString::Cast(const_cast<EcmaString *>(str)); 957 } 958 IsFlat()959 bool IsFlat() const 960 { 961 auto strSecond = EcmaString::Cast(GetSecond()); 962 return strSecond->GetLength() == 0; 963 } 964 965 template<bool verify = true> Get(int32_t index)966 uint16_t Get(int32_t index) const 967 { 968 int32_t length = static_cast<int32_t>(GetLength()); 969 if (verify) { 970 if ((index < 0) || (index >= length)) { 971 return 0; 972 } 973 } 974 975 if (IsFlat()) { 976 EcmaString *first = EcmaString::Cast(GetFirst()); 977 return first->At<verify>(index); 978 } 979 EcmaString *string = const_cast<TreeEcmaString *>(this); 980 while (true) { 981 if (string->IsTreeString()) { 982 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 983 if (static_cast<int32_t>(first->GetLength()) > index) { 984 string = first; 985 } else { 986 index -= static_cast<int32_t>(first->GetLength()); 987 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond()); 988 } 989 } else { 990 return string->At<verify>(index); 991 } 992 } 993 UNREACHABLE(); 994 } 995 }; 996 997 class FlatStringInfo { 998 public: FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)999 FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string), 1000 startIndex_(startIndex), 1001 length_(length) {} IsUtf8()1002 bool IsUtf8() const 1003 { 1004 return string_->IsUtf8(); 1005 } 1006 IsUtf16()1007 bool IsUtf16() const 1008 { 1009 return string_->IsUtf16(); 1010 } 1011 GetString()1012 EcmaString *GetString() const 1013 { 1014 return string_; 1015 } 1016 SetString(EcmaString * string)1017 void SetString(EcmaString *string) 1018 { 1019 string_ = string; 1020 } 1021 GetStartIndex()1022 uint32_t GetStartIndex() const 1023 { 1024 return startIndex_; 1025 } 1026 SetStartIndex(uint32_t index)1027 void SetStartIndex(uint32_t index) 1028 { 1029 startIndex_ = index; 1030 } 1031 GetLength()1032 uint32_t GetLength() const 1033 { 1034 return length_; 1035 } 1036 1037 const uint8_t *GetDataUtf8() const; 1038 const uint16_t *GetDataUtf16() const; 1039 uint8_t *GetDataUtf8Writable() const; 1040 uint16_t *GetDataUtf16Writable() const; 1041 std::u16string ToU16String(uint32_t len = 0); 1042 private: 1043 EcmaString *string_ {nullptr}; 1044 uint32_t startIndex_ {0}; 1045 uint32_t length_ {0}; 1046 }; 1047 1048 // if you want to use functions of EcmaString, please not use directly, 1049 // and use functions of EcmaStringAccessor alternatively. 1050 // eg: EcmaString *str = ***; str->GetLength() -----> EcmaStringAccessor(str).GetLength() 1051 class PUBLIC_API EcmaStringAccessor { 1052 public: EcmaStringAccessor(EcmaString * string)1053 explicit inline EcmaStringAccessor(EcmaString *string) 1054 { 1055 ASSERT(string != nullptr); 1056 string_ = string; 1057 } 1058 1059 explicit EcmaStringAccessor(TaggedObject *obj); 1060 1061 explicit EcmaStringAccessor(JSTaggedValue value); 1062 1063 explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle); 1064 1065 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 1066 CreateEmptyString(const EcmaVM * vm)1067 static EcmaString *CreateEmptyString(const EcmaVM *vm) 1068 { 1069 return EcmaString::CreateEmptyString(vm); 1070 } 1071 1072 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, 1073 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 1074 uint32_t idOffset = 0) 1075 { 1076 return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset); 1077 } 1078 1079 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1080 uint32_t offset, uint32_t utf8Len, 1081 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1082 { 1083 return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type); 1084 } 1085 1086 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, 1087 bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0) 1088 { 1089 return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset); 1090 } 1091 1092 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 1093 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1094 { 1095 return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type); 1096 } 1097 1098 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 1099 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1100 { 1101 return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type); 1102 } 1103 1104 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, 1105 const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1106 { 1107 return EcmaString::Concat(vm, str1Handle, str2Handle, type); 1108 } 1109 CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1110 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 1111 uint32_t length, bool compressed) 1112 { 1113 return EcmaString::CopyStringToOldSpace(vm, original, length, compressed); 1114 } 1115 1116 // can change src data structure FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1117 static EcmaString *FastSubString(const EcmaVM *vm, 1118 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1119 { 1120 return EcmaString::FastSubString(vm, src, start, length); 1121 } 1122 1123 // get GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1124 static EcmaString *GetSubString(const EcmaVM *vm, 1125 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1126 { 1127 return EcmaString::GetSubString(vm, src, start, length); 1128 } 1129 IsUtf8()1130 bool IsUtf8() const 1131 { 1132 return string_->IsUtf8(); 1133 } 1134 IsUtf16()1135 bool IsUtf16() const 1136 { 1137 return string_->IsUtf16(); 1138 } 1139 GetLength()1140 uint32_t GetLength() const 1141 { 1142 return string_->GetLength(); 1143 } 1144 1145 // require is LineString 1146 inline size_t GetUtf8Length(bool isGetBufferSize = false) const; 1147 ObjectSize()1148 size_t ObjectSize() const 1149 { 1150 if (string_->IsLineString()) { 1151 return LineEcmaString::ObjectSize(string_); 1152 } if (string_->IsConstantString()) { 1153 return ConstantString::ObjectSize(); 1154 } else { 1155 return TreeEcmaString::SIZE; 1156 } 1157 } 1158 1159 // For TreeString, the calculation result is size of LineString correspondingly. GetFlatStringSize()1160 size_t GetFlatStringSize() const 1161 { 1162 if (string_->IsConstantString()) { 1163 return ConstantString::ObjectSize(); 1164 } 1165 return LineEcmaString::ObjectSize(string_); 1166 } 1167 IsInternString()1168 bool IsInternString() const 1169 { 1170 return string_->IsInternString(); 1171 } 1172 SetInternString()1173 void SetInternString() 1174 { 1175 string_->SetIsInternString(); 1176 } 1177 ClearInternString()1178 void ClearInternString() 1179 { 1180 string_->ClearInternStringFlag(); 1181 } 1182 1183 // require is LineString 1184 // It's Utf8 format, but without 0 in the end. 1185 inline const uint8_t *GetDataUtf8(); 1186 1187 // require is LineString 1188 inline const uint16_t *GetDataUtf16(); 1189 1190 // not change string data structure. 1191 // if string is not flat, this func has low efficiency. 1192 std::u16string ToU16String(uint32_t len = 0) 1193 { 1194 return string_->ToU16String(len); 1195 } 1196 1197 // not change string data structure. 1198 // if string is not flat, this func has low efficiency. ToOneByteDataForced()1199 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 1200 { 1201 return string_->ToOneByteDataForced(); 1202 } 1203 1204 // not change string data structure. 1205 // if string is not flat, this func has low efficiency. ToUtf8Span(CVector<uint8_t> & buf)1206 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf) 1207 { 1208 return string_->ToUtf8Span(buf); 1209 } 1210 1211 // only for string is flat and using UTF8 encoding 1212 inline Span<const uint8_t> FastToUtf8Span(); 1213 1214 // Using string's hash to figure out whether the string can be converted to integer TryToGetInteger(uint32_t * result)1215 inline bool TryToGetInteger(uint32_t *result) 1216 { 1217 return string_->TryToGetInteger(result); 1218 } 1219 TryToSetIntegerHash(int32_t num)1220 inline bool TryToSetIntegerHash(int32_t num) 1221 { 1222 return string_->TryToSetIntegerHash(num); 1223 } 1224 1225 // not change string data structure. 1226 // if string is not flat, this func has low efficiency. 1227 std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1228 1229 // this function convert for Utf8 1230 CString Utf8ConvertToString(); 1231 1232 std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1233 // not change string data structure. 1234 // if string is not flat, this func has low efficiency. 1235 CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false); 1236 1237 // not change string data structure. 1238 // if string is not flat, this func has low efficiency. 1239 uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false) 1240 { 1241 return string_->WriteUtf8(buf, maxLength, isWriteBuffer); 1242 } 1243 WriteToUtf16(uint16_t * buf,uint32_t bufLength)1244 uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength) 1245 { 1246 return string_->WriteUtf16(buf, GetLength(), bufLength); 1247 } 1248 WriteToOneByte(uint8_t * buf,uint32_t maxLength)1249 uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength) 1250 { 1251 return string_->WriteOneByte(buf, maxLength); 1252 } 1253 1254 // not change string data structure. 1255 // if string is not flat, this func has low efficiency. WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1256 uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const 1257 { 1258 return string_->CopyDataUtf16(buf, maxLength); 1259 } 1260 1261 template <typename Char> WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1262 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos) 1263 { 1264 src->WriteToFlatWithPos(src, buf, length, pos); 1265 } 1266 1267 template <typename Char> WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1268 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength) 1269 { 1270 src->WriteToFlat(src, buf, maxLength); 1271 } 1272 1273 // require dst is LineString 1274 // not change src data structure. 1275 // if src is not flat, this func has low efficiency. 1276 inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 1277 1278 // not change src data structure. 1279 // if src is not flat, this func has low efficiency. 1280 template<bool verify = true> Get(uint32_t index)1281 uint16_t Get(uint32_t index) const 1282 { 1283 return string_->At<verify>(index); 1284 } 1285 1286 // require string is LineString. Set(uint32_t index,uint16_t src)1287 void Set(uint32_t index, uint16_t src) 1288 { 1289 return string_->WriteData(index, src); 1290 } 1291 1292 // not change src data structure. 1293 // if src is not flat, this func has low efficiency. GetHashcode()1294 uint32_t GetHashcode() 1295 { 1296 return string_->GetHashcode(); 1297 } 1298 GetRawHashcode()1299 uint32_t GetRawHashcode() 1300 { 1301 return string_->GetRawHashcode(); 1302 } 1303 1304 // not change src data structure. 1305 // if src is not flat, this func has low efficiency. ComputeRawHashcode()1306 std::pair<uint32_t, bool> ComputeRawHashcode() 1307 { 1308 return string_->ComputeRawHashcode(); 1309 } 1310 ComputeHashcode()1311 uint32_t ComputeHashcode() 1312 { 1313 return string_->ComputeHashcode(); 1314 } 1315 ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1316 uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger) 1317 { 1318 return string_->ComputeHashcode(rawHashSeed, isInteger); 1319 } 1320 ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1321 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 1322 { 1323 return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); 1324 } 1325 ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1326 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 1327 { 1328 return EcmaString::ComputeHashcodeUtf16(utf16Data, length); 1329 } 1330 1331 // can change receiver and search data structure 1332 static int32_t IndexOf(const EcmaVM *vm, 1333 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1334 { 1335 return EcmaString::IndexOf(vm, receiver, search, pos); 1336 } 1337 1338 // can change receiver and search data structure 1339 static int32_t LastIndexOf(const EcmaVM *vm, 1340 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1341 { 1342 return EcmaString::LastIndexOf(vm, receiver, search, pos); 1343 } 1344 1345 // can change receiver and search data structure Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1346 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right) 1347 { 1348 return EcmaString::Compare(vm, left, right); 1349 } 1350 1351 1352 // can change receiver and search data structure 1353 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 1354 const JSHandle<EcmaString>& right, uint32_t offset = 0) 1355 { 1356 return EcmaString::IsSubStringAt(vm, left, right, offset); 1357 } 1358 1359 // can change str1 and str2 data structure StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1360 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 1361 { 1362 return EcmaString::StringsAreEqual(vm, str1, str2); 1363 } 1364 1365 // not change str1 and str2 data structure. 1366 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqual(EcmaString * str1,EcmaString * str2)1367 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2) 1368 { 1369 return EcmaString::StringsAreEqual(str1, str2); 1370 } 1371 1372 // not change str1 and str2 data structure. 1373 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1374 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2) 1375 { 1376 return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2); 1377 } 1378 1379 // not change str1 data structure. 1380 // if str1 is not flat, this func has low efficiency. StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1381 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 1382 bool canBeCompress) 1383 { 1384 return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress); 1385 } 1386 1387 // not change str1 data structure. 1388 // if str1 is not flat, this func has low efficiency. StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1389 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 1390 { 1391 return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len); 1392 } 1393 1394 // require str1 and str2 are LineString. 1395 // not change string data structure. 1396 // if string is not flat, this func has low efficiency. EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1397 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 1398 { 1399 return string_->EqualToSplicedString(str1, str2); 1400 } 1401 CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1402 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 1403 { 1404 return EcmaString::CanBeCompressed(utf8Data, utf8Len); 1405 } 1406 CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1407 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 1408 { 1409 return EcmaString::CanBeCompressed(utf16Data, utf16Len); 1410 } 1411 1412 // require string is LineString CanBeCompressed(const EcmaString * string)1413 static bool CanBeCompressed(const EcmaString *string) 1414 { 1415 return EcmaString::CanBeCompressed(string); 1416 } 1417 1418 // not change string data structure. 1419 // if string is not flat, this func has low efficiency. ToElementIndex(uint32_t * index)1420 bool ToElementIndex(uint32_t *index) 1421 { 1422 return string_->ToElementIndex(index); 1423 } 1424 1425 // not change string data structure. 1426 // if string is not flat, this func has low efficiency. ToInt(int32_t * index,bool * negative)1427 bool ToInt(int32_t *index, bool *negative) 1428 { 1429 return string_->ToInt(index, negative); 1430 } 1431 1432 // not change string data structure. 1433 // if string is not flat, this func has low efficiency. ToTypedArrayIndex(uint32_t * index)1434 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index) 1435 { 1436 return string_->ToTypedArrayIndex(index); 1437 } 1438 ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1439 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1440 { 1441 return EcmaString::ToLower(vm, src); 1442 } 1443 TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1444 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1445 { 1446 return EcmaString::TryToLower(vm, src); 1447 } 1448 TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1449 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1450 { 1451 return EcmaString::TryToUpper(vm, src); 1452 } 1453 ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1454 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1455 { 1456 return EcmaString::ToUpper(vm, src); 1457 } 1458 ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1459 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1460 { 1461 return EcmaString::ToLocaleLower(vm, src, locale); 1462 } 1463 ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1464 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1465 { 1466 return EcmaString::ToLocaleUpper(vm, src, locale); 1467 } 1468 1469 static EcmaString *Trim(const JSThread *thread, 1470 const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM) 1471 { 1472 return EcmaString::Trim(thread, src, mode); 1473 } 1474 IsASCIICharacter(uint16_t data)1475 static bool IsASCIICharacter(uint16_t data) 1476 { 1477 if (data == 0) { 1478 return false; 1479 } 1480 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 1481 return data <= base::utf_helper::UTF8_1B_MAX; 1482 } 1483 IsFlat()1484 bool IsFlat() const 1485 { 1486 return string_->IsFlat(); 1487 } 1488 IsLineString()1489 bool IsLineString() const 1490 { 1491 return string_->IsLineString(); 1492 } 1493 IsConstantString()1494 bool IsConstantString() const 1495 { 1496 return string_->IsConstantString(); 1497 } 1498 IsSlicedString()1499 bool IsSlicedString() const 1500 { 1501 return string_->IsSlicedString(); 1502 } 1503 IsLineOrConstantString()1504 bool IsLineOrConstantString() const 1505 { 1506 return string_->IsLineOrConstantString(); 1507 } 1508 GetStringType()1509 JSType GetStringType() const 1510 { 1511 return string_->GetStringType(); 1512 } 1513 IsTreeString()1514 bool IsTreeString() const 1515 { 1516 return string_->IsTreeString(); 1517 } 1518 NotTreeString()1519 bool NotTreeString() const 1520 { 1521 return string_->NotTreeString(); 1522 } 1523 1524 // the returned string may be a linestring, constantstring, or slicestring!! 1525 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1526 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1527 { 1528 return EcmaString::Flatten(vm, string, type); 1529 } 1530 1531 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1532 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1533 { 1534 return EcmaString::FlattenAllString(vm, string, type); 1535 } 1536 1537 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1538 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1539 { 1540 return EcmaString::SlowFlatten(vm, string, type); 1541 } 1542 FlattenNoGC(const EcmaVM * vm,EcmaString * string)1543 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string) 1544 { 1545 return EcmaString::FlattenNoGC(vm, string); 1546 } 1547 GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1548 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 1549 { 1550 return EcmaString::GetUtf8DataFlat(src, buf); 1551 } 1552 GetNonTreeUtf8Data(const EcmaString * src)1553 static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src) 1554 { 1555 return EcmaString::GetNonTreeUtf8Data(src); 1556 } 1557 GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1558 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 1559 { 1560 return EcmaString::GetUtf16DataFlat(src, buf); 1561 } 1562 GetNonTreeUtf16Data(const EcmaString * src)1563 static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src) 1564 { 1565 return EcmaString::GetNonTreeUtf16Data(src); 1566 } 1567 1568 static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str); 1569 1570 private: 1571 EcmaString *string_ {nullptr}; 1572 }; 1573 } // namespace ecmascript 1574 } // namespace panda 1575 #endif // ECMASCRIPT_STRING_H 1576