/* * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "ecmascript/ecma_string-inl.h" #include "ecmascript/js_symbol.h" #include "ecmascript/mem/c_containers.h" namespace panda::ecmascript { EcmaString *EcmaString::Concat(const EcmaVM *vm, const JSHandle &left, const JSHandle &right, MemSpaceType type) { // allocator may trig gc and move src, need to hold it EcmaString *strLeft = *left; EcmaString *strRight = *right; uint32_t leftLength = strLeft->GetLength(); uint32_t rightLength = strRight->GetLength(); uint32_t newLength = leftLength + rightLength; if (newLength == 0) { return vm->GetFactory()->GetEmptyString().GetObject(); } if (leftLength == 0) { if (type == MemSpaceType::OLD_SPACE) { Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(*right)); if (objectRegion->InYoungSpace()) { return CopyStringToOldSpace(vm, right, rightLength, strRight->IsUtf8()); } } return strRight; } if (rightLength == 0) { if (type == MemSpaceType::OLD_SPACE) { Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(*left)); if (objectRegion->InYoungSpace()) { return CopyStringToOldSpace(vm, left, leftLength, strLeft->IsUtf8()); } } return strLeft; } // if the result string is small, make a LineString bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8()); if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) { ASSERT(strLeft->IsLineOrConstantString()); ASSERT(strRight->IsLineOrConstantString()); auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type); // retrieve strings after gc strLeft = *left; strRight = *right; if (compressed) { // copy left part Span sp(newString->GetDataUtf8Writable(), newLength); Span srcLeft(strLeft->GetDataUtf8(), leftLength); EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength); // copy right part sp = sp.SubSpan(leftLength); Span srcRight(strRight->GetDataUtf8(), rightLength); EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength); } else { // copy left part Span sp(newString->GetDataUtf16Writable(), newLength); if (strLeft->IsUtf8()) { EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength); } else { Span srcLeft(strLeft->GetDataUtf16(), leftLength); EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U); } // copy right part sp = sp.SubSpan(leftLength); if (strRight->IsUtf8()) { EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength); } else { Span srcRight(strRight->GetDataUtf16(), rightLength); EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U); } } ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!"); return newString; } return CreateTreeString(vm, left, right, newLength, compressed); } /* static */ EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle &original, uint32_t length, bool compressed) { if (original->IsConstantString()) { return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE); } JSHandle newString(vm->GetJSThread(), CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE)); auto strOrigin = FlattenAllString(vm, original); if (compressed) { // copy Span sp(newString->GetDataUtf8Writable(), length); Span srcSp(strOrigin.GetDataUtf8(), length); EcmaString::MemCopyChars(sp, length, srcSp, length); } else { // copy left part Span sp(newString->GetDataUtf16Writable(), length); if (strOrigin.IsUtf8()) { EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length); } else { Span srcSp(strOrigin.GetDataUtf16(), length); EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U); } } ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!"); return *newString; } /* static */ EcmaString *EcmaString::FastSubString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length) { ASSERT((start + length) <= src->GetLength()); if (length == 0) { return *vm->GetFactory()->GetEmptyString(); } if (start == 0 && length == src->GetLength()) { return *src; } if (src->IsUtf8()) { return FastSubUtf8String(vm, src, start, length); } return FastSubUtf16String(vm, src, start, length); } /* static */ EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length) { ASSERT((start + length) <= src->GetLength()); JSHandle slicedString(vm->GetJSThread(), CreateSlicedString(vm)); FlatStringInfo srcFlat = FlattenAllString(vm, src); slicedString->SetLength(length, srcFlat.GetString()->IsUtf8()); slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString())); slicedString->SetStartIndex(start + srcFlat.GetStartIndex()); return *slicedString; } /* static */ EcmaString *EcmaString::GetSubString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length) { ASSERT((start + length) <= src->GetLength()); if (static_cast(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) { if (start == 0 && length == src->GetLength()) { return *src; } if (src->IsUtf16()) { FlatStringInfo srcFlat = FlattenAllString(vm, src); bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length); if (canBeCompressed) { JSHandle string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed)); srcFlat = FlattenAllString(vm, src); CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length); return *string; } } return GetSlicedString(vm, src, start, length); } return FastSubString(vm, src, start, length); } void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length) { ASSERT(IsLineString() && !IsConstantString()); if (IsUtf8()) { ASSERT(src->IsUtf8()); CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } else if (src->IsUtf8()) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); Span to(GetDataUtf16Writable() + start, length); Span from(data, length); for (uint32_t i = 0; i < length; i++) { to[i] = from[i]; } } else { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (length != 0 && memcpy_s(GetDataUtf16Writable() + start, destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } } template int32_t CompareStringSpan(Span &lhsSp, Span &rhsSp, int32_t count) { for (int32_t i = 0; i < count; ++i) { auto left = static_cast(lhsSp[i]); auto right = static_cast(rhsSp[i]); if (left != right) { return left - right; } } return 0; } int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle &left, const JSHandle &right) { if (*left == *right) { return 0; } FlatStringInfo lhs = FlattenAllString(vm, left); JSHandle string(vm->GetJSThread(), lhs.GetString()); FlatStringInfo rhs = FlattenAllString(vm, right); lhs.SetString(*string); int32_t lhsCount = static_cast(lhs.GetLength()); int32_t rhsCount = static_cast(rhs.GetLength()); int32_t countDiff = lhsCount - rhsCount; int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount; if (!lhs.IsUtf16() && !rhs.IsUtf16()) { Span lhsSp(lhs.GetDataUtf8(), lhsCount); Span rhsSp(rhs.GetDataUtf8(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else if (!lhs.IsUtf16()) { Span lhsSp(lhs.GetDataUtf8(), lhsCount); Span rhsSp(rhs.GetDataUtf16(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else if (!rhs.IsUtf16()) { Span lhsSp(lhs.GetDataUtf16(), rhsCount); Span rhsSp(rhs.GetDataUtf8(), lhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else { Span lhsSp(lhs.GetDataUtf16(), lhsCount); Span rhsSp(rhs.GetDataUtf16(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } return countDiff; } /* static */ template int32_t EcmaString::IndexOf(Span &lhsSp, Span &rhsSp, int32_t pos, int32_t max) { ASSERT(rhsSp.size() > 0); auto first = static_cast(rhsSp[0]); for (int32_t i = pos; i <= max; i++) { if (static_cast(lhsSp[i]) != first) { i++; while (i <= max && static_cast(lhsSp[i]) != first) { i++; } } /* Found first character, now look at the rest of rhsSp */ if (i <= max) { int j = i + 1; int end = j + static_cast(rhsSp.size()) - 1; for (int k = 1; j < end && static_cast(lhsSp[j]) == static_cast(rhsSp[k]); j++, k++) { } if (j == end) { /* Found whole string. */ return i; } } } return -1; } template int32_t EcmaString::LastIndexOf(Span &lhsSp, Span &rhsSp, int32_t pos) { int rhsSize = static_cast(rhsSp.size()); ASSERT(rhsSize > 0); auto first = rhsSp[0]; for (int32_t i = pos; i >= 0; i--) { if (lhsSp[i] != first) { continue; } /* Found first character, now look at the rest of rhsSp */ int j = 1; while (j < rhsSize) { if (rhsSp[j] != lhsSp[i + j]) { break; } j++; } if (j == rhsSize) { return i; } } return -1; } int32_t EcmaString::IndexOf(const EcmaVM *vm, const JSHandle &receiver, const JSHandle &search, int pos) { EcmaString *lhstring = *receiver; EcmaString *rhstring = *search; if (lhstring == nullptr || rhstring == nullptr) { return -1; } int32_t lhsCount = static_cast(lhstring->GetLength()); int32_t rhsCount = static_cast(rhstring->GetLength()); if (pos > lhsCount) { return -1; } if (rhsCount == 0) { return pos; } if (pos < 0) { pos = 0; } int32_t max = lhsCount - rhsCount; if (max < 0) { return -1; } if (pos + rhsCount > lhsCount) { return -1; } FlatStringInfo lhs = FlattenAllString(vm, receiver); JSHandle string(vm->GetJSThread(), lhs.GetString()); FlatStringInfo rhs = FlattenAllString(vm, search); lhs.SetString(*string); if (rhs.IsUtf8() && lhs.IsUtf8()) { Span lhsSp(lhs.GetDataUtf8(), lhsCount); Span rhsSp(rhs.GetDataUtf8(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) Span lhsSp(lhs.GetDataUtf16(), lhsCount); Span rhsSp(rhs.GetDataUtf16(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } else if (rhs.IsUtf16()) { return -1; } else { // NOLINT(readability-else-after-return) Span lhsSp(lhs.GetDataUtf16(), lhsCount); Span rhsSp(rhs.GetDataUtf8(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } } int32_t EcmaString::LastIndexOf(const EcmaVM *vm, const JSHandle &receiver, const JSHandle &search, int pos) { EcmaString *lhstring = *receiver; EcmaString *rhstring = *search; if (lhstring == nullptr || rhstring == nullptr) { return -1; } int32_t lhsCount = static_cast(lhstring->GetLength()); int32_t rhsCount = static_cast(rhstring->GetLength()); if (lhsCount < rhsCount) { return -1; } if (pos < 0) { pos = 0; } if (pos > lhsCount) { pos = lhsCount; } if (pos + rhsCount > lhsCount) { pos = lhsCount - rhsCount; } if (rhsCount == 0) { return pos; } FlatStringInfo lhs = FlattenAllString(vm, receiver); JSHandle string(vm->GetJSThread(), lhs.GetString()); FlatStringInfo rhs = FlattenAllString(vm, search); lhs.SetString(*string); if (rhs.IsUtf8() && lhs.IsUtf8()) { Span lhsSp(lhs.GetDataUtf8(), lhsCount); Span rhsSp(rhs.GetDataUtf8(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) Span lhsSp(lhs.GetDataUtf16(), lhsCount); Span rhsSp(rhs.GetDataUtf16(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } else if (rhs.IsUtf16()) { return -1; } else { // NOLINT(readability-else-after-return) Span lhsSp(lhs.GetDataUtf16(), lhsCount); Span rhsSp(rhs.GetDataUtf8(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } } std::u16string EcmaString::ToU16String(uint32_t len) { uint32_t length = len > 0 ? len : GetLength(); std::u16string result; if (IsUtf16()) { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); result = base::StringHelper::Utf16ToU16String(data, length); } else { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); result = base::StringHelper::Utf8ToU16String(data, length); } return result; } // static bool EcmaString::CanBeCompressed(const EcmaString *string) { ASSERT(string->IsLineOrConstantString()); if (string->IsUtf8()) { return CanBeCompressed(string->GetDataUtf8(), string->GetLength()); } return CanBeCompressed(string->GetDataUtf16(), string->GetLength()); } // static bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) { bool isCompressed = true; uint32_t index = 0; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) while (index < utf8Len) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (!IsASCIICharacter(utf8Data[index])) { isCompressed = false; break; } ++index; } return isCompressed; } /* static */ bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) { bool isCompressed = true; Span data(utf16Data, utf16Len); for (uint32_t i = 0; i < utf16Len; i++) { if (!IsASCIICharacter(data[i])) { isCompressed = false; break; } } return isCompressed; } bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) { ASSERT(NotTreeString()); ASSERT(str1->NotTreeString() && str2->NotTreeString()); if (GetLength() != str1->GetLength() + str2->GetLength()) { return false; } if (IsUtf16()) { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) { return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength()); } } else { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) { return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(), str2->GetLength(), this->IsUtf8()); } } return false; } /* static */ bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right) { CVector bufLeftUft16; CVector bufRightUft16; CVector bufLeftUft8; CVector bufRightUft8; int32_t lhsCount = static_cast(left->GetLength()); int32_t rhsCount = static_cast(right->GetLength()); if (!left->IsUtf16() && !right->IsUtf16()) { const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8); const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8); Span lhsSp(data1, lhsCount); Span rhsSp(data2, rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else if (!left->IsUtf16()) { const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8); const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16); Span lhsSp(data1, lhsCount); Span rhsSp(data2, rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else if (!right->IsUtf16()) { const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16); const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8); Span lhsSp(data1, lhsCount); Span rhsSp(data2, rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else { const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16); const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16); Span lhsSp(data1, lhsCount); Span rhsSp(data2, rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } } /* static */ bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right) { int32_t lhsCount = static_cast(left.GetLength()); int32_t rhsCount = static_cast(right.GetLength()); if (!left.IsUtf16() && !right.IsUtf16()) { Span lhsSp(left.GetDataUtf8(), lhsCount); Span rhsSp(right.GetDataUtf8(), rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else if (!left.IsUtf16()) { Span lhsSp(left.GetDataUtf8(), lhsCount); Span rhsSp(right.GetDataUtf16(), rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else if (!right.IsUtf16()) { Span lhsSp(left.GetDataUtf16(), rhsCount); Span rhsSp(right.GetDataUtf8(), lhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } else { Span lhsSp(left.GetDataUtf16(), lhsCount); Span rhsSp(right.GetDataUtf16(), rhsCount); return EcmaString::StringsAreEquals(lhsSp, rhsSp); } } bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle &str1, const JSHandle &str2) { if (str1 == str2) { return true; } if (str1->IsInternString() && str2->IsInternString()) { return false; } uint32_t str1Len = str1->GetLength(); if (str1Len != str2->GetLength()) { return false; } if (str1Len == 0) { return true; } uint32_t str1Hash; uint32_t str2Hash; if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { if (str1Hash != str2Hash) { return false; } } FlatStringInfo str1Flat = FlattenAllString(vm, str1); JSHandle string(vm->GetJSThread(), str1Flat.GetString()); FlatStringInfo str2Flat = FlattenAllString(vm, str2); str1Flat.SetString(*string); return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat); } /* static */ bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2) { if (str1 == str2) { return true; } uint32_t str1Len = str1->GetLength(); if (str1Len != str2->GetLength()) { return false; } if (str1Len == 0) { return true; } uint32_t str1Hash; uint32_t str2Hash; if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { if (str1Hash != str2Hash) { return false; } } return StringsAreEqualDiffUtfEncoding(str1, str2); } /* static */ bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, bool canBeCompressToUtf8) { if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) { return false; } if (canBeCompressToUtf8 && str1->GetLength() != dataLen) { return false; } if (str1->IsUtf8()) { CVector buf; Span data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen); Span data2(dataAddr, dataLen); return EcmaString::StringsAreEquals(data1, data2); } CVector buf; uint32_t length = str1->GetLength(); const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf); return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length); } /* static */ bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) { uint32_t length = str1->GetLength(); if (length != utf16Len) { return false; } if (str1->IsUtf8()) { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf); return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len); } else { CVector buf; Span data1(EcmaString::GetUtf16DataFlat(str1, buf), length); Span data2(utf16Data, utf16Len); return EcmaString::StringsAreEquals(data1, data2); } } template bool EcmaString::MemCopyChars(Span &dst, size_t dstMax, Span &src, size_t count) { ASSERT(dstMax >= count); ASSERT(dst.Size() >= src.Size()); if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } return true; } bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const { ASSERT(length >= 0); Span str = FastToUtf8Span(); return HashIntegerString(str.data(), length, hash, hashSeed); } uint32_t EcmaString::ComputeHashcode() const { auto [hash, isInteger] = ComputeRawHashcode(); return MixHashcode(hash, isInteger); } // hashSeed only be used when computing two separate strings merged hashcode. std::pair EcmaString::ComputeRawHashcode() const { uint32_t hash = 0; uint32_t length = GetLength(); if (length == 0) { return {hash, false}; } if (IsUtf8()) { // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) { return {hash, true}; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); // String can not convert to integer number, using normal hashcode computing algorithm. hash = this->ComputeHashForData(data, length, 0); return {hash, false}; } else { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); // If rawSeed has certain value, and second string uses UTF16 encoding, // then merged string can not be small integer number. hash = this->ComputeHashForData(data, length, 0); return {hash, false}; } } // hashSeed only be used when computing two separate strings merged hashcode. uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const { uint32_t hash; uint32_t length = GetLength(); if (length == 0) { return MixHashcode(rawHashSeed, isInteger); } if (IsUtf8()) { // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. if ((rawHashSeed == 0 || isInteger) && length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) { return hash; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); // String can not convert to integer number, using normal hashcode computing algorithm. hash = this->ComputeHashForData(data, length, rawHashSeed); return MixHashcode(hash, NOT_INTEGER); } else { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); // If rawSeed has certain value, and second string uses UTF16 encoding, // then merged string can not be small integer number. hash = this->ComputeHashForData(data, length, rawHashSeed); return MixHashcode(hash, NOT_INTEGER); } } /* static */ uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) { uint32_t mixHash = 0; if (canBeCompress) { // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) { return mixHash; } uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0); return MixHashcode(hash, NOT_INTEGER); } else { auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); CVector tmpBuffer(utf16Len); [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf16Len, 0); ASSERT(len == utf16Len); uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0); return MixHashcode(hash, NOT_INTEGER); } LOG_ECMA(FATAL) << "this branch is unreachable"; UNREACHABLE(); } /* static */ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) { uint32_t mixHash = 0; // String length smaller than 10, try to compute integer hash. if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) { return mixHash; } uint32_t hash = ComputeHashForData(utf16Data, length, 0); return MixHashcode(hash, NOT_INTEGER); } /* static */ bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, uint32_t utf16Len) { size_t utf8Pos = 0; size_t utf16Pos = 0; while (utf8Pos < utf8Len) { auto [pair, nbytes] = utf::ConvertMUtf8ToUtf16Pair(utf8Data, utf8Len - utf8Pos); auto [pHigh, pLow] = utf::SplitUtf16Pair(pair); utf8Data += nbytes; utf8Pos += nbytes; if (pHigh != 0) { if (utf16Pos >= utf16Len - 1 || *utf16Data != pHigh) { return false; } ++utf16Pos; ++utf16Data; } if (utf16Pos >= utf16Len || *utf16Data != pLow) { return false; } ++utf16Pos; ++utf16Data; } return true; } bool EcmaString::ToElementIndex(uint32_t *index) { uint32_t len = GetLength(); if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers) return false; } if (UNLIKELY(IsUtf16())) { return false; } // fast path: get integer from string's hash value if (TryToGetInteger(index)) { return true; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); uint32_t c = data[0]; uint64_t n = 0; if (c == '0') { *index = 0; return len == 1; } uint32_t loopStart = 0; if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) { *index = n; return true; } return false; } bool EcmaString::ToInt(int32_t *index, bool *negative) { uint32_t len = GetLength(); if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers) return false; } if (UNLIKELY(IsUtf16())) { return false; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); uint32_t c = data[0]; uint32_t loopStart = 0; uint64_t n = 0; if (c == '0') { *index = 0; return len == 1; } if (c == '-' && len > 1) { *negative = true; loopStart = 1; } if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits::max()) { *index = *negative ? -n : n; return true; } return false; } bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data) { uint64_t n = 0; uint32_t len = GetLength(); if (UNLIKELY(loopStart >= len)) { return false; } for (uint32_t i = loopStart; i < len; i++) { uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (c < '0' || c > '9') { return false; } // NOLINTNEXTLINE(readability-magic-numbers) n = n * 10 + (c - '0'); // 10: decimal factor } *index = n; return true; } bool EcmaString::ToTypedArrayIndex(uint32_t *index) { uint32_t len = GetLength(); if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { return false; } if (UNLIKELY(IsUtf16())) { return false; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) uint64_t n = 0; if (c == '0') { *index = 0; return len == 1; } if (c > '0' && c <= '9') { n = c - '0'; for (uint32_t i = 1; i < len; i++) { c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (c >= '0' && c <= '9') { // NOLINTNEXTLINE(readability-magic-numbers) n = n * 10 + (c - '0'); // 10: decimal factor } else if (c == '.') { n = JSObject::MAX_ELEMENT_INDEX; break; } else { return false; } } if (n < JSObject::MAX_ELEMENT_INDEX) { *index = n; return true; } else { *index = JSObject::MAX_ELEMENT_INDEX; return true; } } else if (c == '-') { *index = JSObject::MAX_ELEMENT_INDEX; return true; } return false; } template EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle &src, Span &data, TrimMode mode) { uint32_t srcLen = src->GetLength(); int32_t start = 0; int32_t end = static_cast(srcLen) - 1; if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) { start = static_cast(base::StringHelper::GetStart(data, srcLen)); } if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) { end = base::StringHelper::GetEnd(data, start, srcLen); } EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast(end - start + 1)); return res; } /* static */ EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle &src) { auto srcFlat = FlattenAllString(vm, src); uint32_t srcLength = srcFlat.GetLength(); auto factory = vm->GetFactory(); if (srcFlat.IsUtf16()) { std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength); std::string res = base::StringHelper::ToLower(u16str); return *(factory->NewFromStdString(res)); } else { return ConvertUtf8ToLowerOrUpper(vm, src, true); } } /* static */ EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle &src) { auto srcFlat = FlattenAllString(vm, src); uint32_t srcLength = srcFlat.GetLength(); const char start = 'A'; const char end = 'Z'; uint32_t upperIndex = srcLength; Span data(srcFlat.GetDataUtf8Writable(), srcLength); for (uint32_t index = 0; index < srcLength; ++index) { if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { upperIndex = index; break; } } if (upperIndex == srcLength) { return *src; } return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex); } /* static */ EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle &src, bool toLower, uint32_t startIndex) { const char start = toLower ? 'A' : 'a'; const char end = toLower ? 'Z' : 'z'; uint32_t srcLength = src->GetLength(); JSHandle newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true)); auto srcFlat = FlattenAllString(vm, src); Span data(srcFlat.GetDataUtf8Writable(), srcLength); auto newStringPtr = newString->GetDataUtf8Writable(); if (startIndex > 0) { if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } for (uint32_t index = startIndex; index < srcLength; ++index) { if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower } else { *(newStringPtr + index) = data[index]; } } return *newString; } /* static */ EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle &src) { FlatStringInfo srcFlat = FlattenAllString(vm, src); uint32_t srcLength = srcFlat.GetLength(); auto factory = vm->GetFactory(); if (srcFlat.IsUtf16()) { std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength); std::string res = base::StringHelper::ToUpper(u16str); return *(factory->NewFromStdString(res)); } else { return ConvertUtf8ToLowerOrUpper(vm, src, false); } } /* static */ EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle &src, const icu::Locale &locale) { auto factory = vm->GetFactory(); FlatStringInfo srcFlat = FlattenAllString(vm, src); std::u16string utf16 = srcFlat.ToU16String(); std::string res = base::StringHelper::ToLocaleLower(utf16, locale); return *(factory->NewFromStdString(res)); } /* static */ EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle &src, const icu::Locale &locale) { auto factory = vm->GetFactory(); FlatStringInfo srcFlat = FlattenAllString(vm, src); std::u16string utf16 = srcFlat.ToU16String(); std::string res = base::StringHelper::ToLocaleUpper(utf16, locale); return *(factory->NewFromStdString(res)); } EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle &src, TrimMode mode) { FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src); uint32_t srcLen = srcFlat.GetLength(); if (UNLIKELY(srcLen == 0)) { return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject()); } if (srcFlat.IsUtf8()) { Span data(srcFlat.GetDataUtf8(), srcLen); return TrimBody(thread, src, data, mode); } else { Span data(srcFlat.GetDataUtf16(), srcLen); return TrimBody(thread, src, data, mode); } } EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle &string, MemSpaceType type) { ASSERT(string->IsTreeString() || string->IsSlicedString()); auto thread = vm->GetJSThread(); uint32_t length = string->GetLength(); EcmaString *result = nullptr; if (string->IsUtf8()) { result = CreateLineStringWithSpaceType(vm, length, true, type); WriteToFlat(*string, result->GetDataUtf8Writable(), length); } else { result = CreateLineStringWithSpaceType(vm, length, false, type); WriteToFlat(*string, result->GetDataUtf16Writable(), length); } if (string->IsTreeString()) { JSHandle tree(string); ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0); tree->SetFirst(thread, JSTaggedValue(result)); tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString())); } return result; } EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle &string, MemSpaceType type) { EcmaString *s = *string; if (s->IsLineOrConstantString() || s->IsSlicedString()) { return s; } if (s->IsTreeString()) { JSHandle tree = JSHandle::Cast(string); if (!tree->IsFlat()) { return SlowFlatten(vm, string, type); } s = EcmaString::Cast(tree->GetFirst()); } return s; } FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle &string, MemSpaceType type) { EcmaString *s = *string; uint32_t startIndex = 0; if (s->IsLineOrConstantString()) { return FlatStringInfo(s, startIndex, s->GetLength()); } if (string->IsTreeString()) { JSHandle tree = JSHandle::Cast(string); if (!tree->IsFlat()) { s = SlowFlatten(vm, string, type); } else { s = EcmaString::Cast(tree->GetFirst()); } } else if (string->IsSlicedString()) { s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent()); startIndex = SlicedString::Cast(*string)->GetStartIndex(); } return FlatStringInfo(s, startIndex, string->GetLength()); } EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string) { DISALLOW_GARBAGE_COLLECTION; if (string->IsLineOrConstantString()) { return string; } if (string->IsTreeString()) { TreeEcmaString *tree = TreeEcmaString::Cast(string); if (tree->IsFlat()) { string = EcmaString::Cast(tree->GetFirst()); } else { uint32_t length = tree->GetLength(); EcmaString *result = nullptr; if (tree->IsUtf8()) { result = CreateLineStringNoGC(vm, length, true); WriteToFlat(tree, result->GetDataUtf8Writable(), length); } else { result = CreateLineStringNoGC(vm, length, false); WriteToFlat(tree, result->GetDataUtf16Writable(), length); } tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result)); tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString())); return result; } } else if (string->IsSlicedString()) { SlicedString *str = SlicedString::Cast(string); uint32_t length = str->GetLength(); EcmaString *result = nullptr; if (str->IsUtf8()) { result = CreateLineStringNoGC(vm, length, true); WriteToFlat(str, result->GetDataUtf8Writable(), length); } else { result = CreateLineStringNoGC(vm, length, false); WriteToFlat(str, result->GetDataUtf16Writable(), length); } return result; } return string; } const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector &buf) { ASSERT(src->IsUtf8()); uint32_t length = src->GetLength(); EcmaString *string = const_cast(src); if (string->IsTreeString()) { if (string->IsFlat()) { string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); } else { buf.reserve(length); WriteToFlat(string, buf.data(), length); return buf.data(); } } else if (string->IsSlicedString()) { SlicedString *str = SlicedString::Cast(string); return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex(); } return string->GetDataUtf8(); } const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector &buf) { ASSERT(src->IsUtf16()); uint32_t length = src->GetLength(); EcmaString *string = const_cast(src); if (string->IsTreeString()) { if (string->IsFlat()) { string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); } else { buf.reserve(length); WriteToFlat(string, buf.data(), length); return buf.data(); } } else if (string->IsSlicedString()) { SlicedString *str = SlicedString::Cast(string); return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex(); } return string->GetDataUtf16(); } std::u16string FlatStringInfo::ToU16String(uint32_t len) { uint32_t length = len > 0 ? len : GetLength(); std::u16string result; if (IsUtf16()) { const uint16_t *data = this->GetDataUtf16(); result = base::StringHelper::Utf16ToU16String(data, length); } else { const uint8_t *data = this->GetDataUtf8(); result = base::StringHelper::Utf8ToU16String(data, length); } return result; } EcmaStringAccessor::EcmaStringAccessor(EcmaString *string) { ASSERT(string != nullptr); string_ = string; } EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj) { ASSERT(obj != nullptr); string_ = EcmaString::Cast(obj); } EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value) { ASSERT(value.IsString()); string_ = EcmaString::Cast(value.GetTaggedObject()); } EcmaStringAccessor::EcmaStringAccessor(const JSHandle &strHandle) : string_(*strHandle) { } std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage) { if (string_ == nullptr) { return ""; } bool modify = (usage != StringConvertedUsage::PRINT); CVector buf; Span sp = string_->ToUtf8Span(buf, modify); std::string res; res.reserve(sp.size()); for (const auto &c : sp) { res.push_back(c); } return res; } std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage) { if (string_ == nullptr) { return ""; } bool modify = (usage != StringConvertedUsage::PRINT); CVector buf; Span sp = string_->DebuggerToUtf8Span(buf, modify); std::string res; res.reserve(sp.size()); for (const auto &c : sp) { res.push_back(c); } return res; } CString EcmaStringAccessor::ToCString(StringConvertedUsage usage) { if (string_ == nullptr) { return ""; } bool modify = (usage != StringConvertedUsage::PRINT); CVector buf; Span sp = string_->ToUtf8Span(buf, modify); CString res; res.reserve(sp.size()); for (const auto &c : sp) { res.push_back(c); } return res; } } // namespace panda::ecmascript