• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H
17 #define COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H
18 
19 #include "common_interfaces/objects/string/base_string_declare.h"
20 #include "common_interfaces/objects/string/line_string.h"
21 #include "common_interfaces/objects/string/sliced_string.h"
22 #include "common_interfaces/objects/string/tree_string.h"
23 
24 namespace common {
25 std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen);
26 std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen);
27 
28 template <typename T1, typename T2>
29 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count);
30 template <typename T1, typename T2>
31 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset);
32 
33 template<typename ReadBarrier>
ComputeHashcode(ReadBarrier && readBarrier)34 uint32_t BaseString::ComputeHashcode(ReadBarrier &&readBarrier) const
35 {
36     auto [hash, isInteger] = ComputeRawHashcode(readBarrier);
37     return MixHashcode(hash, isInteger);
38 }
39 
40 template <typename ReadBarrier>
ComputeRawHashcode(ReadBarrier && readBarrier)41 std::pair<uint32_t, bool> BaseString::ComputeRawHashcode(ReadBarrier &&readBarrier) const
42 {
43     uint32_t hash = 0;
44     uint32_t length = GetLength();
45     if (length == 0) {
46         return {hash, false};
47     }
48 
49     if (IsUtf8()) {
50         std::vector<uint8_t> buf;
51         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
52         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
53         if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(data, length, &hash, 0)) {
54             return {hash, true};
55         }
56         // String can not convert to integer number, using normal hashcode computing algorithm.
57         hash = ComputeHashForData(data, length, 0);
58         return {hash, false};
59     } else {
60         std::vector<uint16_t> buf;
61         const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
62         // If rawSeed has certain value, and second string uses UTF16 encoding,
63         // then merged string can not be small integer number.
64         hash = ComputeHashForData(data, length, 0);
65         return {hash, false};
66     }
67 }
68 
69 template<typename T>
IsDecimalDigitChar(const T c)70 inline static bool IsDecimalDigitChar(const T c)
71 {
72     return (c >= '0' && c <= '9');
73 }
74 
ComputeIntegerHash(uint32_t * num,uint8_t c)75 inline bool ComputeIntegerHash(uint32_t *num, uint8_t c)
76 {
77     if (!IsDecimalDigitChar(c)) {
78         return false;
79     }
80     int charDate = c - '0';
81     *num = (*num) * 10 + charDate; // 10: decimal factor
82     return true;
83 }
84 
85 template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)86 bool BaseString::HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
87 {
88     ASSERT(size >= 0);
89     if (hashSeed == 0) {
90         if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
91             uint32_t num = data[0] - '0';
92             uint32_t i = 1;
93             do {
94                 if (i == size) {
95                     // compute mix hash
96                     if (num <= MAX_INTEGER_HASH_NUMBER) {
97                         *hash = MixHashcode(num, IS_INTEGER);
98                         return true;
99                     }
100                     return false;
101                 }
102             } while (ComputeIntegerHash(&num, data[i++]));
103         }
104         if (size == 1 && (data[0] == '0')) {
105             *hash = MixHashcode(0, IS_INTEGER);
106             return true;
107         }
108     } else {
109         if (IsDecimalDigitChar(data[0])) {
110             uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
111             uint32_t i = 1;
112             do {
113                 if (i == size) {
114                     // compute mix hash
115                     if (num <= MAX_INTEGER_HASH_NUMBER) {
116                         *hash = MixHashcode(num, IS_INTEGER);
117                         return true;
118                     }
119                     return false;
120                 }
121             } while (ComputeIntegerHash(&num, data[i++]));
122         }
123     }
124     return false;
125 }
126 
127 template <typename ReadBarrier>
EqualToSplicedString(ReadBarrier && readBarrier,const BaseString * str1,const BaseString * str2)128 bool BaseString::EqualToSplicedString(ReadBarrier &&readBarrier, const BaseString *str1, const BaseString *str2)
129 {
130     DCHECK_CC(NotTreeString());
131     DCHECK_CC(str1->NotTreeString() && str2->NotTreeString());
132     if (GetLength() != str1->GetLength() + str2->GetLength()) {
133         return false;
134     }
135     if (IsUtf16()) {
136         std::vector<uint16_t> buf;
137         const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
138         if (BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength())) {
139             return BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str2,
140                                                     data + str1->GetLength(), str2->GetLength());
141         }
142     } else {
143         std::vector<uint8_t> buf;
144         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
145         if (BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength(),
146                                                this->IsUtf8())) {
147             return BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str2,
148                                                       data + str1->GetLength(),
149                                                       str2->GetLength(), this->IsUtf8());
150         }
151     }
152     return false;
153 }
154 
155 template <typename ReadBarrier>
ToU16String(ReadBarrier && readBarrier,uint32_t len)156 std::u16string BaseString::ToU16String(ReadBarrier &&readBarrier, uint32_t len)
157 {
158     uint32_t length = len > 0 ? len : GetLength();
159     std::u16string result;
160     if (IsUtf16()) {
161         std::vector<uint16_t> buf;
162         const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
163         result = Utf16ToU16String(data, length);
164     } else {
165         std::vector<uint8_t> buf;
166         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
167         result = Utf8ToU16String(data, length);
168     }
169     return result;
170 }
171 
172 
173 template <typename ReadBarrier>
WriteData(ReadBarrier && readBarrier,BaseString * src,uint32_t start,uint32_t destSize,uint32_t length)174 void BaseString::WriteData(ReadBarrier &&readBarrier, BaseString *src, uint32_t start, uint32_t destSize,
175                            uint32_t length)
176 {
177     DCHECK_CC(IsLineString());
178     if (IsUtf8()) {
179         DCHECK_CC(src->IsUtf8());
180         std::vector<uint8_t> buf;
181         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
182         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
183         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
184             UNREACHABLE();
185         }
186     } else if (src->IsUtf8()) {
187         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
188         std::vector<uint8_t> buf;
189         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
190         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
191         Span<const uint8_t> from(data, length);
192         for (uint32_t i = 0; i < length; i++) {
193             to[i] = from[i];
194         }
195     } else {
196         std::vector<uint16_t> buf;
197         const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), src, buf);
198         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
199         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
200                                     destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
201             UNREACHABLE();
202         }
203     }
204 }
205 
NotTreeString()206 inline bool BaseString::NotTreeString() const
207 {
208     return !IsTreeString();
209 }
210 
211 template <typename ReadBarrier>
GetNonTreeUtf8Data(ReadBarrier && readBarrier,const BaseString * src)212 const uint8_t *BaseString::GetNonTreeUtf8Data(ReadBarrier &&readBarrier, const BaseString *src)
213 {
214     DCHECK_CC(src->IsUtf8());
215     DCHECK_CC(!src->IsTreeString());
216     BaseString *string = const_cast<BaseString *>(src);
217     if (string->IsSlicedString()) {
218         SlicedString *str = SlicedString::Cast(string);
219         return BaseString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf8() +
220                str->GetStartIndex();
221     }
222     DCHECK_CC(src->IsLineString());
223     return string->GetDataUtf8();
224 }
225 
226 
227 template <typename ReadBarrier>
GetNonTreeUtf16Data(ReadBarrier && readBarrier,const BaseString * src)228 const uint16_t *BaseString::GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const BaseString *src)
229 {
230     DCHECK_CC(src->IsUtf16());
231     DCHECK_CC(!src->IsTreeString());
232     BaseString *string = const_cast<BaseString *>(src);
233     if (string->IsSlicedString()) {
234         SlicedString *str = SlicedString::Cast(string);
235         return BaseString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf16() +
236                str->GetStartIndex();
237     }
238     DCHECK_CC(src->IsLineString());
239     return string->GetDataUtf16();
240 }
241 
242 
243 /* static */
244 template <typename ReadBarrier>
StringsAreEqualDiffUtfEncoding(ReadBarrier && readBarrier,BaseString * left,BaseString * right)245 bool BaseString::StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *left, BaseString *right)
246 {
247     std::vector<uint16_t> bufLeftUft16;
248     std::vector<uint16_t> bufRightUft16;
249     std::vector<uint8_t> bufLeftUft8;
250     std::vector<uint8_t> bufRightUft8;
251     int32_t lhsCount = static_cast<int32_t>(left->GetLength());
252     int32_t rhsCount = static_cast<int32_t>(right->GetLength());
253     if (!left->IsUtf16() && !right->IsUtf16()) {
254         const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
255         const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
256         Span<const uint8_t> lhsSp(data1, lhsCount);
257         Span<const uint8_t> rhsSp(data2, rhsCount);
258         return BaseString::StringsAreEquals(lhsSp, rhsSp);
259     } else if (!left->IsUtf16()) {
260         const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
261         const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right,
262                                                              bufRightUft16);
263         Span<const uint8_t> lhsSp(data1, lhsCount);
264         Span<const uint16_t> rhsSp(data2, rhsCount);
265         return BaseString::StringsAreEquals(lhsSp, rhsSp);
266     } else if (!right->IsUtf16()) {
267         const uint16_t *data1 =
268             BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
269         const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
270         Span<const uint16_t> lhsSp(data1, lhsCount);
271         Span<const uint8_t> rhsSp(data2, rhsCount);
272         return BaseString::StringsAreEquals(lhsSp, rhsSp);
273     } else {
274         const uint16_t *data1 =
275             BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
276         const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right,
277                                                              bufRightUft16);
278         Span<const uint16_t> lhsSp(data1, lhsCount);
279         Span<const uint16_t> rhsSp(data2, rhsCount);
280         return BaseString::StringsAreEquals(lhsSp, rhsSp);
281     }
282 }
283 
284 /* static */
285 template <typename ReadBarrier>
StringsAreEqual(ReadBarrier && readBarrier,BaseString * str1,BaseString * str2)286 bool BaseString::StringsAreEqual(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2)
287 {
288     DCHECK_CC(str1 != nullptr);
289     DCHECK_CC(str2 != nullptr);
290     if (str1 == str2) {
291         return true;
292     }
293     uint32_t str1Len = str1->GetLength();
294     if (str1Len != str2->GetLength()) {
295         return false;
296     }
297     if (str1Len == 0) {
298         return true;
299     }
300 
301     uint32_t str1Hash;
302     uint32_t str2Hash;
303     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
304         if (str1Hash != str2Hash) {
305             return false;
306         }
307     }
308     return StringsAreEqualDiffUtfEncoding(std::forward<ReadBarrier>(readBarrier), str1, str2);
309 }
310 
311 
312 /* static */
313 template <typename ReadBarrier>
StringIsEqualUint8Data(ReadBarrier && readBarrier,const BaseString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)314 bool BaseString::StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseString *str1, const uint8_t *dataAddr,
315                                         uint32_t dataLen,
316                                         bool canBeCompressToUtf8)
317 {
318     if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
319         return false;
320     }
321     if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
322         return false;
323     }
324     if (str1->IsUtf8()) {
325         std::vector<uint8_t> buf;
326         Span<const uint8_t> data1(BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf),
327                                   dataLen);
328         Span<const uint8_t> data2(dataAddr, dataLen);
329         return BaseString::StringsAreEquals(data1, data2);
330     }
331     std::vector<uint16_t> buf;
332     uint32_t length = str1->GetLength();
333     const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
334     return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
335 }
336 
337 /* static */
338 template <typename ReadBarrier>
StringsAreEqualUtf16(ReadBarrier && readBarrier,const BaseString * str1,const uint16_t * utf16Data,uint32_t utf16Len)339 bool BaseString::StringsAreEqualUtf16(ReadBarrier &&readBarrier, const BaseString *str1, const uint16_t *utf16Data,
340                                       uint32_t utf16Len)
341 {
342     uint32_t length = str1->GetLength();
343     if (length != utf16Len) {
344         return false;
345     }
346     if (str1->IsUtf8()) {
347         std::vector<uint8_t> buf;
348         const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
349         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
350     } else {
351         std::vector<uint16_t> buf;
352         Span<const uint16_t> data1(BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf),
353                                    length);
354         Span<const uint16_t> data2(utf16Data, utf16Len);
355         return BaseString::StringsAreEquals(data1, data2);
356     }
357 }
358 } // namespace common
359 
360 #endif //COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H