1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H
17 #define COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H
18
19 #include "common_interfaces/objects/string/base_string_declare.h"
20 #include "common_interfaces/objects/string/line_string.h"
21 #include "common_interfaces/objects/string/sliced_string.h"
22 #include "common_interfaces/objects/string/tree_string.h"
23
24 namespace common {
25 std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen);
26 std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen);
27
28 template <typename T1, typename T2>
29 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count);
30 template <typename T1, typename T2>
31 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset);
32
33 template<typename ReadBarrier>
ComputeHashcode(ReadBarrier && readBarrier)34 uint32_t BaseString::ComputeHashcode(ReadBarrier &&readBarrier) const
35 {
36 auto [hash, isInteger] = ComputeRawHashcode(readBarrier);
37 return MixHashcode(hash, isInteger);
38 }
39
40 template <typename ReadBarrier>
ComputeRawHashcode(ReadBarrier && readBarrier)41 std::pair<uint32_t, bool> BaseString::ComputeRawHashcode(ReadBarrier &&readBarrier) const
42 {
43 uint32_t hash = 0;
44 uint32_t length = GetLength();
45 if (length == 0) {
46 return {hash, false};
47 }
48
49 if (IsUtf8()) {
50 std::vector<uint8_t> buf;
51 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
52 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
53 if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(data, length, &hash, 0)) {
54 return {hash, true};
55 }
56 // String can not convert to integer number, using normal hashcode computing algorithm.
57 hash = ComputeHashForData(data, length, 0);
58 return {hash, false};
59 } else {
60 std::vector<uint16_t> buf;
61 const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
62 // If rawSeed has certain value, and second string uses UTF16 encoding,
63 // then merged string can not be small integer number.
64 hash = ComputeHashForData(data, length, 0);
65 return {hash, false};
66 }
67 }
68
69 template<typename T>
IsDecimalDigitChar(const T c)70 inline static bool IsDecimalDigitChar(const T c)
71 {
72 return (c >= '0' && c <= '9');
73 }
74
ComputeIntegerHash(uint32_t * num,uint8_t c)75 inline bool ComputeIntegerHash(uint32_t *num, uint8_t c)
76 {
77 if (!IsDecimalDigitChar(c)) {
78 return false;
79 }
80 int charDate = c - '0';
81 *num = (*num) * 10 + charDate; // 10: decimal factor
82 return true;
83 }
84
85 template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)86 bool BaseString::HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
87 {
88 ASSERT(size >= 0);
89 if (hashSeed == 0) {
90 if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
91 uint32_t num = data[0] - '0';
92 uint32_t i = 1;
93 do {
94 if (i == size) {
95 // compute mix hash
96 if (num <= MAX_INTEGER_HASH_NUMBER) {
97 *hash = MixHashcode(num, IS_INTEGER);
98 return true;
99 }
100 return false;
101 }
102 } while (ComputeIntegerHash(&num, data[i++]));
103 }
104 if (size == 1 && (data[0] == '0')) {
105 *hash = MixHashcode(0, IS_INTEGER);
106 return true;
107 }
108 } else {
109 if (IsDecimalDigitChar(data[0])) {
110 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
111 uint32_t i = 1;
112 do {
113 if (i == size) {
114 // compute mix hash
115 if (num <= MAX_INTEGER_HASH_NUMBER) {
116 *hash = MixHashcode(num, IS_INTEGER);
117 return true;
118 }
119 return false;
120 }
121 } while (ComputeIntegerHash(&num, data[i++]));
122 }
123 }
124 return false;
125 }
126
127 template <typename ReadBarrier>
EqualToSplicedString(ReadBarrier && readBarrier,const BaseString * str1,const BaseString * str2)128 bool BaseString::EqualToSplicedString(ReadBarrier &&readBarrier, const BaseString *str1, const BaseString *str2)
129 {
130 DCHECK_CC(NotTreeString());
131 DCHECK_CC(str1->NotTreeString() && str2->NotTreeString());
132 if (GetLength() != str1->GetLength() + str2->GetLength()) {
133 return false;
134 }
135 if (IsUtf16()) {
136 std::vector<uint16_t> buf;
137 const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
138 if (BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength())) {
139 return BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str2,
140 data + str1->GetLength(), str2->GetLength());
141 }
142 } else {
143 std::vector<uint8_t> buf;
144 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
145 if (BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength(),
146 this->IsUtf8())) {
147 return BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str2,
148 data + str1->GetLength(),
149 str2->GetLength(), this->IsUtf8());
150 }
151 }
152 return false;
153 }
154
155 template <typename ReadBarrier>
ToU16String(ReadBarrier && readBarrier,uint32_t len)156 std::u16string BaseString::ToU16String(ReadBarrier &&readBarrier, uint32_t len)
157 {
158 uint32_t length = len > 0 ? len : GetLength();
159 std::u16string result;
160 if (IsUtf16()) {
161 std::vector<uint16_t> buf;
162 const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
163 result = Utf16ToU16String(data, length);
164 } else {
165 std::vector<uint8_t> buf;
166 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
167 result = Utf8ToU16String(data, length);
168 }
169 return result;
170 }
171
172
173 template <typename ReadBarrier>
WriteData(ReadBarrier && readBarrier,BaseString * src,uint32_t start,uint32_t destSize,uint32_t length)174 void BaseString::WriteData(ReadBarrier &&readBarrier, BaseString *src, uint32_t start, uint32_t destSize,
175 uint32_t length)
176 {
177 DCHECK_CC(IsLineString());
178 if (IsUtf8()) {
179 DCHECK_CC(src->IsUtf8());
180 std::vector<uint8_t> buf;
181 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
182 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
183 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
184 UNREACHABLE();
185 }
186 } else if (src->IsUtf8()) {
187 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
188 std::vector<uint8_t> buf;
189 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
190 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
191 Span<const uint8_t> from(data, length);
192 for (uint32_t i = 0; i < length; i++) {
193 to[i] = from[i];
194 }
195 } else {
196 std::vector<uint16_t> buf;
197 const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
198 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
199 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
200 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
201 UNREACHABLE();
202 }
203 }
204 }
205
NotTreeString()206 inline bool BaseString::NotTreeString() const
207 {
208 return !IsTreeString();
209 }
210
211 template <typename ReadBarrier>
GetNonTreeUtf8Data(ReadBarrier && readBarrier,const BaseString * src)212 const uint8_t *BaseString::GetNonTreeUtf8Data(ReadBarrier &&readBarrier, const BaseString *src)
213 {
214 DCHECK_CC(src->IsUtf8());
215 DCHECK_CC(!src->IsTreeString());
216 BaseString *string = const_cast<BaseString *>(src);
217 if (string->IsSlicedString()) {
218 SlicedString *str = SlicedString::Cast(string);
219 return BaseString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf8() +
220 str->GetStartIndex();
221 }
222 DCHECK_CC(src->IsLineString());
223 return string->GetDataUtf8();
224 }
225
226
227 template <typename ReadBarrier>
GetNonTreeUtf16Data(ReadBarrier && readBarrier,const BaseString * src)228 const uint16_t *BaseString::GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const BaseString *src)
229 {
230 DCHECK_CC(src->IsUtf16());
231 DCHECK_CC(!src->IsTreeString());
232 BaseString *string = const_cast<BaseString *>(src);
233 if (string->IsSlicedString()) {
234 SlicedString *str = SlicedString::Cast(string);
235 return BaseString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf16() +
236 str->GetStartIndex();
237 }
238 DCHECK_CC(src->IsLineString());
239 return string->GetDataUtf16();
240 }
241
242
243 /* static */
244 template <typename ReadBarrier>
StringsAreEqualDiffUtfEncoding(ReadBarrier && readBarrier,BaseString * left,BaseString * right)245 bool BaseString::StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *left, BaseString *right)
246 {
247 std::vector<uint16_t> bufLeftUft16;
248 std::vector<uint16_t> bufRightUft16;
249 std::vector<uint8_t> bufLeftUft8;
250 std::vector<uint8_t> bufRightUft8;
251 int32_t lhsCount = static_cast<int32_t>(left->GetLength());
252 int32_t rhsCount = static_cast<int32_t>(right->GetLength());
253 if (!left->IsUtf16() && !right->IsUtf16()) {
254 const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
255 const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
256 Span<const uint8_t> lhsSp(data1, lhsCount);
257 Span<const uint8_t> rhsSp(data2, rhsCount);
258 return BaseString::StringsAreEquals(lhsSp, rhsSp);
259 } else if (!left->IsUtf16()) {
260 const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
261 const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right,
262 bufRightUft16);
263 Span<const uint8_t> lhsSp(data1, lhsCount);
264 Span<const uint16_t> rhsSp(data2, rhsCount);
265 return BaseString::StringsAreEquals(lhsSp, rhsSp);
266 } else if (!right->IsUtf16()) {
267 const uint16_t *data1 =
268 BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
269 const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
270 Span<const uint16_t> lhsSp(data1, lhsCount);
271 Span<const uint8_t> rhsSp(data2, rhsCount);
272 return BaseString::StringsAreEquals(lhsSp, rhsSp);
273 } else {
274 const uint16_t *data1 =
275 BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
276 const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right,
277 bufRightUft16);
278 Span<const uint16_t> lhsSp(data1, lhsCount);
279 Span<const uint16_t> rhsSp(data2, rhsCount);
280 return BaseString::StringsAreEquals(lhsSp, rhsSp);
281 }
282 }
283
284 /* static */
285 template <typename ReadBarrier>
StringsAreEqual(ReadBarrier && readBarrier,BaseString * str1,BaseString * str2)286 bool BaseString::StringsAreEqual(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2)
287 {
288 DCHECK_CC(str1 != nullptr);
289 DCHECK_CC(str2 != nullptr);
290 if (str1 == str2) {
291 return true;
292 }
293 uint32_t str1Len = str1->GetLength();
294 if (str1Len != str2->GetLength()) {
295 return false;
296 }
297 if (str1Len == 0) {
298 return true;
299 }
300
301 uint32_t str1Hash;
302 uint32_t str2Hash;
303 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
304 if (str1Hash != str2Hash) {
305 return false;
306 }
307 }
308 return StringsAreEqualDiffUtfEncoding(std::forward<ReadBarrier>(readBarrier), str1, str2);
309 }
310
311
312 /* static */
313 template <typename ReadBarrier>
StringIsEqualUint8Data(ReadBarrier && readBarrier,const BaseString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)314 bool BaseString::StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseString *str1, const uint8_t *dataAddr,
315 uint32_t dataLen,
316 bool canBeCompressToUtf8)
317 {
318 if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
319 return false;
320 }
321 if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
322 return false;
323 }
324 if (str1->IsUtf8()) {
325 std::vector<uint8_t> buf;
326 Span<const uint8_t> data1(BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf),
327 dataLen);
328 Span<const uint8_t> data2(dataAddr, dataLen);
329 return BaseString::StringsAreEquals(data1, data2);
330 }
331 std::vector<uint16_t> buf;
332 uint32_t length = str1->GetLength();
333 const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
334 return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
335 }
336
337 /* static */
338 template <typename ReadBarrier>
StringsAreEqualUtf16(ReadBarrier && readBarrier,const BaseString * str1,const uint16_t * utf16Data,uint32_t utf16Len)339 bool BaseString::StringsAreEqualUtf16(ReadBarrier &&readBarrier, const BaseString *str1, const uint16_t *utf16Data,
340 uint32_t utf16Len)
341 {
342 uint32_t length = str1->GetLength();
343 if (length != utf16Len) {
344 return false;
345 }
346 if (str1->IsUtf8()) {
347 std::vector<uint8_t> buf;
348 const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
349 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
350 } else {
351 std::vector<uint16_t> buf;
352 Span<const uint16_t> data1(BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf),
353 length);
354 Span<const uint16_t> data2(utf16Data, utf16Len);
355 return BaseString::StringsAreEquals(data1, data2);
356 }
357 }
358 } // namespace common
359
360 #endif //COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H