1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_STRING_H 17 #define ECMASCRIPT_STRING_H 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstring> 22 23 #include "ecmascript/base/utf_helper.h" 24 #include "ecmascript/common.h" 25 #include "ecmascript/ecma_macros.h" 26 #include "ecmascript/js_hclass.h" 27 #include "ecmascript/js_tagged_value.h" 28 #include "ecmascript/mem/barriers.h" 29 #include "ecmascript/mem/space.h" 30 #include "ecmascript/mem/tagged_object.h" 31 32 #include "libpandabase/macros.h" 33 #include "securec.h" 34 #include "unicode/locid.h" 35 36 namespace panda { 37 namespace test { 38 class EcmaStringEqualsTest; 39 } 40 namespace ecmascript { 41 template<typename T> 42 class JSHandle; 43 class JSPandaFile; 44 class EcmaVM; 45 class LineEcmaString; 46 class ConstantString; 47 class TreeEcmaString; 48 class SlicedString; 49 class FlatStringInfo; 50 51 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 52 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length) \ 53 if ((length) >= MAX_STRING_LENGTH) { \ 54 THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr); \ 55 } 56 57 class EcmaString : public TaggedObject { 58 /* Mix Hash Code: -- { 0 | [31 bits raw hash code] } computed through string 59 \ { 1 | [31 bits integer numbers] } fastpath for string to number 60 */ 61 public: 62 CAST_CHECK(EcmaString, IsString); 63 64 static constexpr uint32_t IS_INTEGER_MASK = 1U << 31; 65 static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; 66 static constexpr uint32_t STRING_INTERN_BIT = 0x2; 67 static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning 68 static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U; 69 static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF; 70 static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9; 71 72 static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize(); 73 // In last bit of mix_length we store if this string is compressed or not. 74 ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET) 75 // In last bit of mix_hash we store if this string is small-integer number or not. 76 ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE) 77 78 enum CompressedStatus { 79 STRING_COMPRESSED, 80 STRING_UNCOMPRESSED, 81 }; 82 83 enum IsIntegerStatus { 84 NOT_INTEGER = 0, 85 IS_INTEGER, 86 }; 87 88 enum TrimMode : uint8_t { 89 TRIM, 90 TRIM_START, 91 TRIM_END, 92 }; 93 94 enum ConcatOptStatus { 95 BEGIN_STRING_ADD = 1, 96 IN_STRING_ADD, 97 CONFIRMED_IN_STRING_ADD, 98 END_STRING_ADD, 99 INVALID_STRING_ADD, 100 HAS_BACKING_STORE, 101 }; 102 103 private: 104 friend class EcmaStringAccessor; 105 friend class LineEcmaString; 106 friend class ConstantString; 107 friend class TreeEcmaString; 108 friend class SlicedString; 109 friend class FlatStringInfo; 110 friend class NameDictionary; 111 friend class panda::test::EcmaStringEqualsTest; 112 113 static EcmaString *CreateEmptyString(const EcmaVM *vm); 114 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 115 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 116 uint32_t idOffset = 0); 117 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 118 uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 119 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 120 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 121 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 122 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 123 static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 124 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 125 static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed); 126 static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm, 127 size_t length, bool compressed, MemSpaceType type); 128 static EcmaString *CreateTreeString(const EcmaVM *vm, 129 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed); 130 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, 131 size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0); 132 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, 133 const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 134 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 135 uint32_t length, bool compressed); 136 static EcmaString *FastSubString(const EcmaVM *vm, 137 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 138 static EcmaString *GetSlicedString(const EcmaVM *vm, 139 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 140 static EcmaString *GetSubString(const EcmaVM *vm, 141 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 142 // require src is LineString 143 // not change src data structure 144 static inline EcmaString *FastSubUtf8String(const EcmaVM *vm, 145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 146 // require src is LineString 147 // not change src data structure 148 static inline EcmaString *FastSubUtf16String(const EcmaVM *vm, 149 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 150 inline void TrimLineString(const JSThread *thread, uint32_t newLength); IsUtf8()151 inline bool IsUtf8() const 152 { 153 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED; 154 } 155 IsUtf16()156 inline bool IsUtf16() const 157 { 158 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED; 159 } 160 IsInteger()161 inline bool IsInteger() 162 { 163 return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK; 164 } 165 166 // require is LineString 167 inline uint16_t *GetData() const; 168 inline const uint8_t *GetDataUtf8() const; 169 inline const uint16_t *GetDataUtf16() const; 170 171 // require is LineString 172 inline uint8_t *GetDataUtf8Writable(); 173 inline uint16_t *GetDataUtf16Writable(); 174 GetLength()175 inline uint32_t GetLength() const 176 { 177 return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT; 178 } 179 180 inline void SetLength(uint32_t length, bool compressed = false) 181 { 182 ASSERT(length < MAX_STRING_LENGTH); 183 // Use 0u for compressed/utf8 expression 184 SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED)); 185 } 186 GetRawHashcode()187 inline uint32_t GetRawHashcode() const 188 { 189 return GetMixHashcode() & (~IS_INTEGER_MASK); 190 } 191 MixHashcode(uint32_t hashcode,bool isInteger)192 static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger) 193 { 194 return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK)); 195 } 196 197 inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false) 198 { 199 // Use 0u for not integer string's expression 200 SetMixHashcode(MixHashcode(hashcode, isInteger)); 201 } 202 203 inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const; 204 SetIsInternString()205 inline void SetIsInternString() 206 { 207 SetMixLength(GetMixLength() | STRING_INTERN_BIT); 208 } 209 IsInternString()210 inline bool IsInternString() const 211 { 212 return (GetMixLength() & STRING_INTERN_BIT) != 0; 213 } 214 ClearInternStringFlag()215 inline void ClearInternStringFlag() 216 { 217 SetMixLength(GetMixLength() & ~STRING_INTERN_BIT); 218 } 219 TryGetHashCode(uint32_t * hash)220 inline bool TryGetHashCode(uint32_t *hash) 221 { 222 uint32_t hashcode = GetMixHashcode(); 223 if (hashcode == 0 && GetLength() != 0) { 224 return false; 225 } 226 *hash = hashcode; 227 return true; 228 } 229 GetIntegerCode()230 inline uint32_t GetIntegerCode() 231 { 232 ASSERT(GetMixHashcode() & IS_INTEGER_MASK); 233 return GetRawHashcode(); 234 } 235 236 // not change this data structure. 237 // if string is not flat, this func has low efficiency. GetHashcode()238 uint32_t PUBLIC_API GetHashcode() 239 { 240 uint32_t hashcode = GetMixHashcode(); 241 // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0. 242 if (hashcode == 0 && GetLength() != 0) { 243 hashcode = ComputeHashcode(); 244 SetMixHashcode(hashcode); 245 } 246 return hashcode; 247 } 248 249 template<typename T> IsDecimalDigitChar(const T c)250 inline static bool IsDecimalDigitChar(const T c) 251 { 252 return (c >= '0' && c <= '9'); 253 } 254 ComputeIntegerHash(uint32_t * num,uint8_t c)255 static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c) 256 { 257 if (!IsDecimalDigitChar(c)) { 258 return false; 259 } 260 int charDate = c - '0'; 261 *num = (*num) * 10 + charDate; // 10: decimal factor 262 return true; 263 } 264 265 bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const; 266 267 template<typename T> HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)268 static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed) 269 { 270 ASSERT(size >= 0); 271 if (hashSeed == 0) { 272 if (IsDecimalDigitChar(data[0]) && data[0] != '0') { 273 uint32_t num = data[0] - '0'; 274 uint32_t i = 1; 275 do { 276 if (i == size) { 277 // compute mix hash 278 if (num <= MAX_INTEGER_HASH_NUMBER) { 279 *hash = MixHashcode(num, IS_INTEGER); 280 return true; 281 } 282 return false; 283 } 284 } while (ComputeIntegerHash(&num, data[i++])); 285 } 286 if (size == 1 && (data[0] == '0')) { 287 *hash = MixHashcode(0, IS_INTEGER); 288 return true; 289 } 290 } else { 291 if (IsDecimalDigitChar(data[0])) { 292 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor 293 uint32_t i = 1; 294 do { 295 if (i == size) { 296 // compute mix hash 297 if (num <= MAX_INTEGER_HASH_NUMBER) { 298 *hash = MixHashcode(num, IS_INTEGER); 299 return true; 300 } 301 return false; 302 } 303 } while (ComputeIntegerHash(&num, data[i++])); 304 } 305 } 306 return false; 307 } 308 309 // not change this data structure. 310 // if string is not flat, this func has low efficiency. 311 uint32_t PUBLIC_API ComputeHashcode() const; 312 std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const; 313 uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const; 314 315 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); 316 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); 317 318 template<bool verify = true> 319 uint16_t At(int32_t index) const; 320 321 // require is LineString 322 void WriteData(uint32_t index, uint16_t src); 323 324 // can change left and right data structure 325 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right); 326 327 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 328 const JSHandle<EcmaString>& right, uint32_t offset); 329 330 // Check that two spans are equal. Should have the same length. 331 /* static */ 332 template<typename T, typename T1> StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)333 static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2) 334 { 335 ASSERT(str1.Size() <= str2.Size()); 336 size_t size = str1.Size(); 337 if (!std::is_same_v<T, T1>) { 338 for (size_t i = 0; i < size; i++) { 339 auto left = static_cast<uint16_t>(str1[i]); 340 auto right = static_cast<uint16_t>(str2[i]); 341 if (left != right) { 342 return false; 343 } 344 } 345 return true; 346 } 347 348 return !memcmp(str1.data(), str2.data(), size * sizeof(T)); 349 } 350 351 // Converts utf8Data to utf16 and compare it with given utf16_data. 352 static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, 353 uint32_t utf16Len); 354 // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence. 355 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2); 356 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 357 static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, 358 const JSHandle<EcmaString> &str2); 359 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 360 static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2); 361 // Two strings have the same type of utf encoding format. 362 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2); 363 static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2); 364 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 365 // not change str1 data structure. 366 // if str1 is not flat, this func has low efficiency. 367 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 368 bool canBeCompress); 369 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 370 // not change str1 data structure. 371 // if str1 is not flat, this func has low efficiency. 372 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); 373 374 // can change receiver and search data structure 375 static int32_t IndexOf(const EcmaVM *vm, 376 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 377 378 // can change receiver and search data structure 379 static int32_t LastIndexOf(const EcmaVM *vm, 380 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 381 382 inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const 383 { 384 if (maxLength == 0) { 385 return 1; // maxLength was -1 at napi 386 } 387 size_t length = GetLength(); 388 if (length > maxLength) { 389 return 0; 390 } 391 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 392 buf[maxLength - 1] = '\0'; 393 // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8 394 return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1; // add place for zero in the end 395 } 396 397 // It allows user to copy into buffer even if maxLength < length 398 inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const 399 { 400 if (maxLength == 0) { 401 return 1; // maxLength was -1 at napi 402 } 403 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 404 buf[maxLength - 1] = '\0'; 405 return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1; 406 } 407 CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)408 size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const 409 { 410 if (IsUtf16()) { 411 CVector<uint16_t> tmpBuf; 412 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 413 if (length > bufLength) { 414 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) { 415 LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength"; 416 UNREACHABLE(); 417 } 418 return bufLength; 419 } 420 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 421 LOG_FULL(FATAL) << "memcpy_s failed"; 422 UNREACHABLE(); 423 } 424 return length; 425 } 426 CVector<uint8_t> tmpBuf; 427 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 428 if (length > bufLength) { 429 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength); 430 } 431 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength); 432 } 433 434 // It allows user to copy into buffer even if maxLength < length WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)435 inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const 436 { 437 if (bufLength == 0) { 438 return 0; 439 } 440 // Returns a number representing a valid backrest length. 441 return CopyDataToUtf16(buf, targetLength, bufLength); 442 } 443 WriteOneByte(uint8_t * buf,size_t maxLength)444 size_t WriteOneByte(uint8_t *buf, size_t maxLength) const 445 { 446 if (maxLength == 0) { 447 return 0; 448 } 449 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 450 buf[maxLength - 1] = '\0'; 451 uint32_t length = GetLength(); 452 if (!IsUtf16()) { 453 CVector<uint8_t> tmpBuf; 454 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 455 if (length > maxLength) { 456 length = maxLength; 457 } 458 if (memcpy_s(buf, maxLength, data, length) != EOK) { 459 LOG_FULL(FATAL) << "memcpy_s failed when write one byte"; 460 UNREACHABLE(); 461 } 462 return length; 463 } 464 465 CVector<uint16_t> tmpBuf; 466 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 467 if (length > maxLength) { 468 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength); 469 } 470 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); 471 } 472 473 size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength, 474 bool modify = true, bool isWriteBuffer = false) const 475 { 476 uint32_t len = GetLength(); 477 if (start + length > len) { 478 return 0; 479 } 480 if (!IsUtf16()) { 481 if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half 482 LOG_FULL(FATAL) << " length is higher than half of size_t::max"; 483 UNREACHABLE(); 484 } 485 CVector<uint8_t> tmpBuf; 486 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start; 487 // Only copy maxLength number of chars into buffer if length > maxLength 488 auto dataLen = std::min(length, maxLength); 489 std::copy(data, data + dataLen, buf); 490 return dataLen; 491 } 492 CVector<uint16_t> tmpBuf; 493 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 494 if (length > maxLength) { 495 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, 496 modify, isWriteBuffer); 497 } 498 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, 499 modify, isWriteBuffer); 500 } 501 CopyDataUtf16(uint16_t * buf,uint32_t maxLength)502 inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const 503 { 504 uint32_t length = GetLength(); 505 if (length > maxLength) { 506 return 0; 507 } 508 if (IsUtf16()) { 509 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 510 CVector<uint16_t> tmpBuf; 511 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 512 if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 513 LOG_FULL(FATAL) << "memcpy_s failed"; 514 UNREACHABLE(); 515 } 516 return length; 517 } 518 CVector<uint8_t> tmpBuf; 519 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 520 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength); 521 } 522 523 std::u16string ToU16String(uint32_t len = 0); 524 ToOneByteDataForced()525 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 526 { 527 uint8_t *buf = nullptr; 528 auto length = GetLength(); 529 if (IsUtf16()) { 530 auto size = length * sizeof(uint16_t); 531 buf = new uint8_t[size](); 532 CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length); 533 } else { 534 buf = new uint8_t[length + 1](); 535 CopyDataUtf8(buf, length + 1); 536 } 537 return std::unique_ptr<uint8_t[]>(buf); 538 } 539 540 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false) 541 { 542 Span<const uint8_t> str; 543 uint32_t strLen = GetLength(); 544 if (UNLIKELY(IsUtf16())) { 545 CVector<uint16_t> tmpBuf; 546 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 547 ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0); 548 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1; 549 buf.reserve(len); 550 len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8); 551 str = Span<const uint8_t>(buf.data(), len); 552 } else { 553 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 554 str = Span<const uint8_t>(data, strLen); 555 } 556 return str; 557 } 558 559 Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 560 { 561 Span<const uint8_t> str; 562 uint32_t strLen = GetLength(); 563 if (UNLIKELY(IsUtf16())) { 564 CVector<uint16_t> tmpBuf; 565 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 566 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 567 buf.reserve(len); 568 len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 569 str = Span<const uint8_t>(buf.data(), len); 570 } else { 571 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 572 str = Span<const uint8_t>(data, strLen); 573 } 574 return str; 575 } 576 577 inline Span<const uint8_t> FastToUtf8Span() const; 578 TryToGetInteger(uint32_t * result)579 bool TryToGetInteger(uint32_t *result) 580 { 581 if (!IsInteger()) { 582 return false; 583 } 584 ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE); 585 *result = GetIntegerCode(); 586 return true; 587 } 588 589 // using integer number set into hash TryToSetIntegerHash(int32_t num)590 inline bool TryToSetIntegerHash(int32_t num) 591 { 592 uint32_t hashcode = GetMixHashcode(); 593 if (hashcode == 0 && GetLength() != 0) { 594 SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER); 595 return true; 596 } 597 return false; 598 } 599 600 void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 601 602 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); 603 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); 604 static bool CanBeCompressed(const EcmaString *string); 605 606 bool PUBLIC_API ToElementIndex(uint32_t *index); 607 608 bool ToInt(int32_t *index, bool *negative); 609 610 bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data); 611 612 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index); 613 614 template<bool isLower> 615 static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src); 616 617 template<bool isLower> 618 static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 619 620 template<typename T> 621 static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode); 622 623 static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM); 624 625 // single char copy for loop 626 template<typename DstType, typename SrcType> CopyChars(DstType * dst,SrcType * src,uint32_t count)627 static void CopyChars(DstType *dst, SrcType *src, uint32_t count) 628 { 629 Span<SrcType> srcSp(src, count); 630 Span<DstType> dstSp(dst, count); 631 for (uint32_t i = 0; i < count; i++) { 632 dstSp[i] = srcSp[i]; 633 } 634 } 635 636 // memory block copy 637 template<typename T> 638 static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count); 639 640 template<typename T> ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)641 static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed) 642 { 643 uint32_t hash = hashSeed; 644 Span<const T> sp(data, size); 645 for (auto c : sp) { 646 constexpr size_t SHIFT = 5; 647 hash = (hash << SHIFT) - hash + c; 648 } 649 return hash; 650 } 651 IsASCIICharacter(uint16_t data)652 static bool IsASCIICharacter(uint16_t data) 653 { 654 if (data == 0) { 655 return false; 656 } 657 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 658 return data <= base::utf_helper::UTF8_1B_MAX; 659 } 660 661 template<typename T1, typename T2> 662 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max); 663 664 template<typename T1, typename T2> 665 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos); 666 667 bool IsFlat() const; 668 IsLineString()669 bool IsLineString() const 670 { 671 return GetClass()->IsLineString(); 672 } IsConstantString()673 bool IsConstantString() const 674 { 675 return GetClass()->IsConstantString(); 676 } IsSlicedString()677 bool IsSlicedString() const 678 { 679 return GetClass()->IsSlicedString(); 680 } IsTreeString()681 bool IsTreeString() const 682 { 683 return GetClass()->IsTreeString(); 684 } NotTreeString()685 bool NotTreeString() const 686 { 687 return !IsTreeString(); 688 } IsLineOrConstantString()689 bool IsLineOrConstantString() const 690 { 691 auto hclass = GetClass(); 692 return hclass->IsLineString() || hclass->IsConstantString(); 693 } 694 GetStringType()695 JSType GetStringType() const 696 { 697 JSType type = GetClass()->GetObjectType(); 698 ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST); 699 return type; 700 } 701 702 template <typename Char> 703 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength); 704 705 template <typename Char> 706 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos); 707 708 static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf); 709 710 static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src); 711 712 static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf); 713 714 static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src); 715 716 // string must be not flat 717 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type); 718 719 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 720 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 721 722 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 723 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 724 725 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string); 726 727 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 728 729 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 730 731 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 732 733 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 734 735 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 736 737 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 738 739 static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, 740 bool toLower, uint32_t startIndex = 0); 741 }; 742 743 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data 744 class LineEcmaString : public EcmaString { 745 public: 746 static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; 747 static constexpr uint32_t INIT_LENGTH_TIMES = 4; 748 // DATA_OFFSET: the string data stored after the string header. 749 // Data can be stored in utf8 or utf16 form according to compressed bit. 750 static constexpr size_t DATA_OFFSET = EcmaString::SIZE; // DATA_OFFSET equal to Empty String size 751 752 CAST_CHECK(LineEcmaString, IsLineString); 753 754 DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength()); 755 Cast(EcmaString * str)756 static LineEcmaString *Cast(EcmaString *str) 757 { 758 return static_cast<LineEcmaString *>(str); 759 } 760 Cast(const EcmaString * str)761 static LineEcmaString *Cast(const EcmaString *str) 762 { 763 return LineEcmaString::Cast(const_cast<EcmaString *>(str)); 764 } 765 ComputeSizeUtf8(uint32_t utf8Len)766 static size_t ComputeSizeUtf8(uint32_t utf8Len) 767 { 768 return DATA_OFFSET + utf8Len; 769 } 770 ComputeSizeUtf16(uint32_t utf16Len)771 static size_t ComputeSizeUtf16(uint32_t utf16Len) 772 { 773 return DATA_OFFSET + utf16Len * sizeof(uint16_t); 774 } 775 ObjectSize(EcmaString * str)776 static size_t ObjectSize(EcmaString *str) 777 { 778 uint32_t length = str->GetLength(); 779 return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length); 780 } 781 DataSize(EcmaString * str)782 static size_t DataSize(EcmaString *str) 783 { 784 uint32_t length = str->GetLength(); 785 return str->IsUtf16() ? length * sizeof(uint16_t) : length; 786 } 787 GetPointerLength()788 size_t GetPointerLength() 789 { 790 size_t byteSize = DataSize(this); 791 return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType); 792 } 793 GetData()794 uint16_t *GetData() const 795 { 796 return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET); 797 } 798 799 template<bool verify = true> Get(int32_t index)800 uint16_t Get(int32_t index) const 801 { 802 int32_t length = static_cast<int32_t>(GetLength()); 803 if (verify) { 804 if ((index < 0) || (index >= length)) { 805 return 0; 806 } 807 } 808 if (!IsUtf16()) { 809 Span<const uint8_t> sp(GetDataUtf8(), length); 810 return sp[index]; 811 } 812 Span<const uint16_t> sp(GetDataUtf16(), length); 813 return sp[index]; 814 } 815 Set(uint32_t index,uint16_t src)816 void Set(uint32_t index, uint16_t src) 817 { 818 ASSERT(index < GetLength()); 819 if (IsUtf8()) { 820 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 821 *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src); 822 } else { 823 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 824 *(GetData() + index) = src; 825 } 826 } 827 }; 828 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0); 829 830 class ConstantString : public EcmaString { 831 public: 832 static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE; 833 // ConstantData is the pointer of const string in the pandafile. 834 // String in pandafile is encoded by the utf8 format. 835 // EntityId is normally the uint32_t index in the pandafile. 836 // When the pandafile is to be removed, EntityId will become -1. 837 // The real string data will be reloacted into bytearray and stored in RelocatedData. 838 // ConstantData will also point at data of bytearray data. 839 ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 840 ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET); 841 ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET); 842 DEFINE_ALIGN_SIZE(LAST_OFFSET); 843 844 CAST_CHECK(ConstantString, IsConstantString); 845 DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 846 Cast(EcmaString * str)847 static ConstantString *Cast(EcmaString *str) 848 { 849 return static_cast<ConstantString *>(str); 850 } 851 Cast(const EcmaString * str)852 static ConstantString *Cast(const EcmaString *str) 853 { 854 return ConstantString::Cast(const_cast<EcmaString *>(str)); 855 } 856 ObjectSize()857 static size_t ObjectSize() 858 { 859 return ConstantString::SIZE; 860 } 861 GetEntityIdU32()862 uint32_t GetEntityIdU32() const 863 { 864 ASSERT(GetEntityId() >= 0); 865 return static_cast<uint32_t>(GetEntityId()); 866 } 867 868 template<bool verify = true> Get(int32_t index)869 uint16_t Get(int32_t index) const 870 { 871 int32_t length = static_cast<int32_t>(GetLength()); 872 if (verify) { 873 if ((index < 0) || (index >= length)) { 874 return 0; 875 } 876 } 877 ASSERT(IsUtf8()); 878 Span<const uint8_t> sp(GetConstantData(), length); 879 return sp[index]; 880 } 881 }; 882 883 // The substrings of another string use SlicedString to describe. 884 class SlicedString : public EcmaString { 885 public: 886 static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13; 887 static constexpr size_t PARENT_OFFSET = EcmaString::SIZE; 888 ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET); 889 ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG); 890 ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE); 891 892 DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET); 893 894 CAST_CHECK(SlicedString, IsSlicedString); 895 private: 896 friend class EcmaString; Cast(EcmaString * str)897 static SlicedString *Cast(EcmaString *str) 898 { 899 return static_cast<SlicedString *>(str); 900 } 901 Cast(const EcmaString * str)902 static SlicedString *Cast(const EcmaString *str) 903 { 904 return SlicedString::Cast(const_cast<EcmaString *>(str)); 905 } 906 ObjectSize()907 static size_t ObjectSize() 908 { 909 return SlicedString::SIZE; 910 } 911 912 // Minimum length for a sliced string 913 template<bool verify = true> Get(int32_t index)914 uint16_t Get(int32_t index) const 915 { 916 int32_t length = static_cast<int32_t>(GetLength()); 917 if (verify) { 918 if ((index < 0) || (index >= length)) { 919 return 0; 920 } 921 } 922 EcmaString *parent = EcmaString::Cast(GetParent()); 923 if (parent->IsLineString()) { 924 if (parent->IsUtf8()) { 925 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length); 926 return sp[index]; 927 } 928 Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length); 929 return sp[index]; 930 } 931 Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length); 932 return sp[index]; 933 } 934 }; 935 936 class TreeEcmaString : public EcmaString { 937 public: 938 // Minimum length for a tree string 939 static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13; 940 941 static constexpr size_t FIRST_OFFSET = EcmaString::SIZE; 942 ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET); 943 ACCESSORS(Second, SECOND_OFFSET, SIZE); 944 945 DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE); 946 947 CAST_CHECK(TreeEcmaString, IsTreeString); 948 Cast(EcmaString * str)949 static TreeEcmaString *Cast(EcmaString *str) 950 { 951 return static_cast<TreeEcmaString *>(str); 952 } 953 Cast(const EcmaString * str)954 static TreeEcmaString *Cast(const EcmaString *str) 955 { 956 return TreeEcmaString::Cast(const_cast<EcmaString *>(str)); 957 } 958 IsFlat()959 bool IsFlat() const 960 { 961 auto strSecond = EcmaString::Cast(GetSecond()); 962 return strSecond->GetLength() == 0; 963 } 964 965 template<bool verify = true> Get(int32_t index)966 uint16_t Get(int32_t index) const 967 { 968 int32_t length = static_cast<int32_t>(GetLength()); 969 if (verify) { 970 if ((index < 0) || (index >= length)) { 971 return 0; 972 } 973 } 974 975 if (IsFlat()) { 976 EcmaString *first = EcmaString::Cast(GetFirst()); 977 return first->At<verify>(index); 978 } 979 EcmaString *string = const_cast<TreeEcmaString *>(this); 980 while (true) { 981 if (string->IsTreeString()) { 982 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 983 if (static_cast<int32_t>(first->GetLength()) > index) { 984 string = first; 985 } else { 986 index -= static_cast<int32_t>(first->GetLength()); 987 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond()); 988 } 989 } else { 990 return string->At<verify>(index); 991 } 992 } 993 UNREACHABLE(); 994 } 995 }; 996 997 // FlatStringInfo holds an EcmaString* instead of a JSHandle. If a GC occurs during its usage period, 998 // it may cause the pointer to become invalid, necessitating the pointer to be reset. 999 class FlatStringInfo { 1000 public: FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1001 FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string), 1002 startIndex_(startIndex), 1003 length_(length) {} IsUtf8()1004 bool IsUtf8() const 1005 { 1006 return string_->IsUtf8(); 1007 } 1008 IsUtf16()1009 bool IsUtf16() const 1010 { 1011 return string_->IsUtf16(); 1012 } 1013 GetString()1014 EcmaString *GetString() const 1015 { 1016 return string_; 1017 } 1018 SetString(EcmaString * string)1019 void SetString(EcmaString *string) 1020 { 1021 string_ = string; 1022 } 1023 GetStartIndex()1024 uint32_t GetStartIndex() const 1025 { 1026 return startIndex_; 1027 } 1028 SetStartIndex(uint32_t index)1029 void SetStartIndex(uint32_t index) 1030 { 1031 startIndex_ = index; 1032 } 1033 GetLength()1034 uint32_t GetLength() const 1035 { 1036 return length_; 1037 } 1038 1039 const uint8_t *GetDataUtf8() const; 1040 const uint16_t *GetDataUtf16() const; 1041 uint8_t *GetDataUtf8Writable() const; 1042 uint16_t *GetDataUtf16Writable() const; 1043 std::u16string ToU16String(uint32_t len = 0); 1044 private: 1045 EcmaString *string_ {nullptr}; 1046 uint32_t startIndex_ {0}; 1047 uint32_t length_ {0}; 1048 }; 1049 1050 // if you want to use functions of EcmaString, please not use directly, 1051 // and use functions of EcmaStringAccessor alternatively. 1052 // eg: EcmaString *str = ***; str->GetLength() -----> EcmaStringAccessor(str).GetLength() 1053 class PUBLIC_API EcmaStringAccessor { 1054 public: EcmaStringAccessor(EcmaString * string)1055 explicit inline EcmaStringAccessor(EcmaString *string) 1056 { 1057 ASSERT(string != nullptr); 1058 string_ = string; 1059 } 1060 1061 explicit EcmaStringAccessor(TaggedObject *obj); 1062 1063 explicit EcmaStringAccessor(JSTaggedValue value); 1064 1065 explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle); 1066 1067 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 1068 CreateEmptyString(const EcmaVM * vm)1069 static EcmaString *CreateEmptyString(const EcmaVM *vm) 1070 { 1071 return EcmaString::CreateEmptyString(vm); 1072 } 1073 1074 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, 1075 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 1076 uint32_t idOffset = 0) 1077 { 1078 return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset); 1079 } 1080 1081 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1082 uint32_t offset, uint32_t utf8Len, 1083 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1084 { 1085 return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type); 1086 } 1087 1088 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, 1089 bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0) 1090 { 1091 return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset); 1092 } 1093 1094 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 1095 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1096 { 1097 return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type); 1098 } 1099 1100 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 1101 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1102 { 1103 return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type); 1104 } 1105 1106 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, 1107 const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1108 { 1109 return EcmaString::Concat(vm, str1Handle, str2Handle, type); 1110 } 1111 CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1112 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 1113 uint32_t length, bool compressed) 1114 { 1115 return EcmaString::CopyStringToOldSpace(vm, original, length, compressed); 1116 } 1117 1118 // can change src data structure FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1119 static EcmaString *FastSubString(const EcmaVM *vm, 1120 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1121 { 1122 return EcmaString::FastSubString(vm, src, start, length); 1123 } 1124 1125 // get GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1126 static EcmaString *GetSubString(const EcmaVM *vm, 1127 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1128 { 1129 return EcmaString::GetSubString(vm, src, start, length); 1130 } 1131 IsUtf8()1132 bool IsUtf8() const 1133 { 1134 return string_->IsUtf8(); 1135 } 1136 IsUtf16()1137 bool IsUtf16() const 1138 { 1139 return string_->IsUtf16(); 1140 } 1141 GetLength()1142 uint32_t GetLength() const 1143 { 1144 return string_->GetLength(); 1145 } 1146 1147 // require is LineString 1148 inline size_t GetUtf8Length(bool isGetBufferSize = false) const; 1149 ObjectSize()1150 size_t ObjectSize() const 1151 { 1152 if (string_->IsLineString()) { 1153 return LineEcmaString::ObjectSize(string_); 1154 } if (string_->IsConstantString()) { 1155 return ConstantString::ObjectSize(); 1156 } else { 1157 return TreeEcmaString::SIZE; 1158 } 1159 } 1160 1161 // For TreeString, the calculation result is size of LineString correspondingly. GetFlatStringSize()1162 size_t GetFlatStringSize() const 1163 { 1164 if (string_->IsConstantString()) { 1165 return ConstantString::ObjectSize(); 1166 } 1167 return LineEcmaString::ObjectSize(string_); 1168 } 1169 IsInternString()1170 bool IsInternString() const 1171 { 1172 return string_->IsInternString(); 1173 } 1174 SetInternString()1175 void SetInternString() 1176 { 1177 string_->SetIsInternString(); 1178 } 1179 ClearInternString()1180 void ClearInternString() 1181 { 1182 string_->ClearInternStringFlag(); 1183 } 1184 1185 // require is LineString 1186 // It's Utf8 format, but without 0 in the end. 1187 inline const uint8_t *GetDataUtf8(); 1188 1189 // require is LineString 1190 inline const uint16_t *GetDataUtf16(); 1191 1192 // not change string data structure. 1193 // if string is not flat, this func has low efficiency. 1194 std::u16string ToU16String(uint32_t len = 0) 1195 { 1196 return string_->ToU16String(len); 1197 } 1198 1199 // not change string data structure. 1200 // if string is not flat, this func has low efficiency. ToOneByteDataForced()1201 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 1202 { 1203 return string_->ToOneByteDataForced(); 1204 } 1205 1206 // not change string data structure. 1207 // if string is not flat, this func has low efficiency. ToUtf8Span(CVector<uint8_t> & buf)1208 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf) 1209 { 1210 return string_->ToUtf8Span(buf); 1211 } 1212 1213 // only for string is flat and using UTF8 encoding 1214 inline Span<const uint8_t> FastToUtf8Span(); 1215 1216 // Using string's hash to figure out whether the string can be converted to integer TryToGetInteger(uint32_t * result)1217 inline bool TryToGetInteger(uint32_t *result) 1218 { 1219 return string_->TryToGetInteger(result); 1220 } 1221 TryToSetIntegerHash(int32_t num)1222 inline bool TryToSetIntegerHash(int32_t num) 1223 { 1224 return string_->TryToSetIntegerHash(num); 1225 } 1226 1227 // not change string data structure. 1228 // if string is not flat, this func has low efficiency. 1229 std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1230 1231 // this function convert for Utf8 1232 CString Utf8ConvertToString(); 1233 1234 std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1235 // not change string data structure. 1236 // if string is not flat, this func has low efficiency. 1237 CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false); 1238 1239 // not change string data structure. 1240 // if string is not flat, this func has low efficiency. 1241 uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false) 1242 { 1243 return string_->WriteUtf8(buf, maxLength, isWriteBuffer); 1244 } 1245 WriteToUtf16(uint16_t * buf,uint32_t bufLength)1246 uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength) 1247 { 1248 return string_->WriteUtf16(buf, GetLength(), bufLength); 1249 } 1250 WriteToOneByte(uint8_t * buf,uint32_t maxLength)1251 uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength) 1252 { 1253 return string_->WriteOneByte(buf, maxLength); 1254 } 1255 1256 // not change string data structure. 1257 // if string is not flat, this func has low efficiency. WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1258 uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const 1259 { 1260 return string_->CopyDataUtf16(buf, maxLength); 1261 } 1262 1263 template <typename Char> WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1264 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos) 1265 { 1266 src->WriteToFlatWithPos(src, buf, length, pos); 1267 } 1268 1269 template <typename Char> WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1270 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength) 1271 { 1272 src->WriteToFlat(src, buf, maxLength); 1273 } 1274 1275 // require dst is LineString 1276 // not change src data structure. 1277 // if src is not flat, this func has low efficiency. 1278 inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 1279 1280 // not change src data structure. 1281 // if src is not flat, this func has low efficiency. 1282 template<bool verify = true> Get(uint32_t index)1283 uint16_t Get(uint32_t index) const 1284 { 1285 return string_->At<verify>(index); 1286 } 1287 1288 // require string is LineString. Set(uint32_t index,uint16_t src)1289 void Set(uint32_t index, uint16_t src) 1290 { 1291 return string_->WriteData(index, src); 1292 } 1293 1294 // not change src data structure. 1295 // if src is not flat, this func has low efficiency. GetHashcode()1296 uint32_t GetHashcode() 1297 { 1298 return string_->GetHashcode(); 1299 } 1300 GetRawHashcode()1301 uint32_t GetRawHashcode() 1302 { 1303 return string_->GetRawHashcode(); 1304 } 1305 1306 // not change src data structure. 1307 // if src is not flat, this func has low efficiency. ComputeRawHashcode()1308 std::pair<uint32_t, bool> ComputeRawHashcode() 1309 { 1310 return string_->ComputeRawHashcode(); 1311 } 1312 ComputeHashcode()1313 uint32_t ComputeHashcode() 1314 { 1315 return string_->ComputeHashcode(); 1316 } 1317 ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1318 uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger) 1319 { 1320 return string_->ComputeHashcode(rawHashSeed, isInteger); 1321 } 1322 ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1323 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 1324 { 1325 return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); 1326 } 1327 ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1328 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 1329 { 1330 return EcmaString::ComputeHashcodeUtf16(utf16Data, length); 1331 } 1332 1333 // can change receiver and search data structure 1334 static int32_t IndexOf(const EcmaVM *vm, 1335 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1336 { 1337 return EcmaString::IndexOf(vm, receiver, search, pos); 1338 } 1339 1340 // can change receiver and search data structure 1341 static int32_t LastIndexOf(const EcmaVM *vm, 1342 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1343 { 1344 return EcmaString::LastIndexOf(vm, receiver, search, pos); 1345 } 1346 1347 // can change receiver and search data structure Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1348 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right) 1349 { 1350 return EcmaString::Compare(vm, left, right); 1351 } 1352 1353 1354 // can change receiver and search data structure 1355 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 1356 const JSHandle<EcmaString>& right, uint32_t offset = 0) 1357 { 1358 return EcmaString::IsSubStringAt(vm, left, right, offset); 1359 } 1360 1361 // can change str1 and str2 data structure StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1362 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 1363 { 1364 return EcmaString::StringsAreEqual(vm, str1, str2); 1365 } 1366 1367 // not change str1 and str2 data structure. 1368 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqual(EcmaString * str1,EcmaString * str2)1369 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2) 1370 { 1371 return EcmaString::StringsAreEqual(str1, str2); 1372 } 1373 1374 // not change str1 and str2 data structure. 1375 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1376 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2) 1377 { 1378 return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2); 1379 } 1380 1381 // not change str1 data structure. 1382 // if str1 is not flat, this func has low efficiency. StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1383 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 1384 bool canBeCompress) 1385 { 1386 return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress); 1387 } 1388 1389 // not change str1 data structure. 1390 // if str1 is not flat, this func has low efficiency. StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1391 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 1392 { 1393 return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len); 1394 } 1395 1396 // require str1 and str2 are LineString. 1397 // not change string data structure. 1398 // if string is not flat, this func has low efficiency. EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1399 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 1400 { 1401 return string_->EqualToSplicedString(str1, str2); 1402 } 1403 CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1404 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 1405 { 1406 return EcmaString::CanBeCompressed(utf8Data, utf8Len); 1407 } 1408 CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1409 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 1410 { 1411 return EcmaString::CanBeCompressed(utf16Data, utf16Len); 1412 } 1413 1414 // require string is LineString CanBeCompressed(const EcmaString * string)1415 static bool CanBeCompressed(const EcmaString *string) 1416 { 1417 return EcmaString::CanBeCompressed(string); 1418 } 1419 1420 // not change string data structure. 1421 // if string is not flat, this func has low efficiency. ToElementIndex(uint32_t * index)1422 bool ToElementIndex(uint32_t *index) 1423 { 1424 return string_->ToElementIndex(index); 1425 } 1426 1427 // not change string data structure. 1428 // if string is not flat, this func has low efficiency. ToInt(int32_t * index,bool * negative)1429 bool ToInt(int32_t *index, bool *negative) 1430 { 1431 return string_->ToInt(index, negative); 1432 } 1433 1434 // not change string data structure. 1435 // if string is not flat, this func has low efficiency. ToTypedArrayIndex(uint32_t * index)1436 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index) 1437 { 1438 return string_->ToTypedArrayIndex(index); 1439 } 1440 ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1441 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1442 { 1443 return EcmaString::ToLower(vm, src); 1444 } 1445 TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1446 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1447 { 1448 return EcmaString::TryToLower(vm, src); 1449 } 1450 TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1451 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1452 { 1453 return EcmaString::TryToUpper(vm, src); 1454 } 1455 ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1456 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1457 { 1458 return EcmaString::ToUpper(vm, src); 1459 } 1460 ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1461 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1462 { 1463 return EcmaString::ToLocaleLower(vm, src, locale); 1464 } 1465 ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1466 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1467 { 1468 return EcmaString::ToLocaleUpper(vm, src, locale); 1469 } 1470 1471 static EcmaString *Trim(const JSThread *thread, 1472 const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM) 1473 { 1474 return EcmaString::Trim(thread, src, mode); 1475 } 1476 IsASCIICharacter(uint16_t data)1477 static bool IsASCIICharacter(uint16_t data) 1478 { 1479 if (data == 0) { 1480 return false; 1481 } 1482 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 1483 return data <= base::utf_helper::UTF8_1B_MAX; 1484 } 1485 IsFlat()1486 bool IsFlat() const 1487 { 1488 return string_->IsFlat(); 1489 } 1490 IsLineString()1491 bool IsLineString() const 1492 { 1493 return string_->IsLineString(); 1494 } 1495 IsConstantString()1496 bool IsConstantString() const 1497 { 1498 return string_->IsConstantString(); 1499 } 1500 IsSlicedString()1501 bool IsSlicedString() const 1502 { 1503 return string_->IsSlicedString(); 1504 } 1505 IsLineOrConstantString()1506 bool IsLineOrConstantString() const 1507 { 1508 return string_->IsLineOrConstantString(); 1509 } 1510 GetStringType()1511 JSType GetStringType() const 1512 { 1513 return string_->GetStringType(); 1514 } 1515 IsTreeString()1516 bool IsTreeString() const 1517 { 1518 return string_->IsTreeString(); 1519 } 1520 NotTreeString()1521 bool NotTreeString() const 1522 { 1523 return string_->NotTreeString(); 1524 } 1525 1526 // the returned string may be a linestring, constantstring, or slicestring!! 1527 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1528 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1529 { 1530 return EcmaString::Flatten(vm, string, type); 1531 } 1532 1533 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1534 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1535 { 1536 return EcmaString::FlattenAllString(vm, string, type); 1537 } 1538 1539 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1540 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1541 { 1542 return EcmaString::SlowFlatten(vm, string, type); 1543 } 1544 FlattenNoGC(const EcmaVM * vm,EcmaString * string)1545 static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string) 1546 { 1547 return EcmaString::FlattenNoGC(vm, string); 1548 } 1549 GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1550 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 1551 { 1552 return EcmaString::GetUtf8DataFlat(src, buf); 1553 } 1554 GetNonTreeUtf8Data(const EcmaString * src)1555 static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src) 1556 { 1557 return EcmaString::GetNonTreeUtf8Data(src); 1558 } 1559 GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1560 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 1561 { 1562 return EcmaString::GetUtf16DataFlat(src, buf); 1563 } 1564 GetNonTreeUtf16Data(const EcmaString * src)1565 static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src) 1566 { 1567 return EcmaString::GetNonTreeUtf16Data(src); 1568 } 1569 1570 static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str); 1571 1572 private: 1573 EcmaString *string_ {nullptr}; 1574 }; 1575 } // namespace ecmascript 1576 } // namespace panda 1577 #endif // ECMASCRIPT_STRING_H 1578