• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_DECLARE_H
17 #define COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_DECLARE_H
18 
19 #include "common_interfaces/base/bit_field.h"
20 #include "common_interfaces/base/mem.h"
21 #include "common_interfaces/objects/base_object.h"
22 #include "common_interfaces/objects/utils/field_macro.h"
23 #include "common_interfaces/objects/utils/objects_traits.h"
24 #include "common_interfaces/objects/readonly_handle.h"
25 #include "libpandabase/utils/span.h"
26 
27 #include <type_traits>
28 
29 namespace common {
30 class LineString;
31 class TreeString;
32 class SlicedString;
33 
34 using ::panda::Span;
35 
36 /*
37  +-----------------------------+ <-- offset 0
38  |      BaseObject fields      |
39  +-----------------------------+ <-- offset = BaseObjectSize()
40  | LengthAndFlags (uint32_t)   | <-- LENGTH_AND_FLAGS_OFFSET
41  +-----------------------------+
42  | RawHashcode (uint32_t)      | <-- RAW_HASHCODE_OFFSET
43  +-----------------------------+ <-- SIZE (== BaseString::SIZE)
44  */
45 /*
46  +-----------------------------+
47  |   LengthAndFlags (uint32_t) |
48  +-----------------------------+
49  Bit layout:
50    [0]         : CompressedStatusBit         (1 bit)
51    [1]         : IsInternBit                 (1 bit)
52    [2 - 31]    : LengthBits                  (30 bits)
53  */
54 class BaseString : public BaseObject {
55 public:
56     BASE_CAST_CHECK(BaseString, IsString);
57     NO_MOVE_SEMANTIC_CC(BaseString);
58     NO_COPY_SEMANTIC_CC(BaseString);
59     static constexpr uint32_t RAW_HASH_LENGTH = 31;
60     static constexpr uint32_t IS_INTEGER_MASK = 1U << RAW_HASH_LENGTH;
61     static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
62     static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
63     static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
64     static constexpr uint32_t MAX_ELEMENT_INDEX_LEN = 10;
65     static constexpr size_t HASH_SHIFT = 5;
66     static constexpr size_t LENGTH_AND_FLAGS_OFFSET = BaseObjectSize();
67     static constexpr uint32_t STRING_LENGTH_BITS_NUM = 30;
68 
69     enum CompressedStatus {
70         STRING_COMPRESSED,
71         STRING_UNCOMPRESSED,
72     };
73 
74     enum TrimMode : uint8_t {
75         TRIM,
76         TRIM_START,
77         TRIM_END,
78     };
79 
80     enum IsIntegerStatus {
81         NOT_INTEGER = 0,
82         IS_INTEGER,
83     };
84 
85     enum ConcatOptStatus {
86         BEGIN_STRING_ADD = 1,
87         IN_STRING_ADD,
88         CONFIRMED_IN_STRING_ADD,
89         END_STRING_ADD,
90         INVALID_STRING_ADD,
91     };
92 
93     using CompressedStatusBit = BitField<CompressedStatus, 0>;                   // 1
94     using IsInternBit = CompressedStatusBit::NextFlag;                           // 1
95     using LengthBits = IsInternBit::NextField<uint32_t, STRING_LENGTH_BITS_NUM>; // 30
96     static_assert(LengthBits::START_BIT + LengthBits::SIZE == sizeof(uint32_t) * BITS_PER_BYTE,
97                   "LengthBits does not match the field size");
98 
99     PRIMITIVE_FIELD(LengthAndFlags, uint32_t, LENGTH_AND_FLAGS_OFFSET, MIX_HASHCODE_OFFSET)
100 
101     using RawHashcode = BitField<uint32_t, 0, RAW_HASH_LENGTH>;                   // 31
102     using IsIntegerBit = RawHashcode::NextField<IsIntegerStatus, 1>;              // 1
103     // In last bit of mix_hash we store if this string is small-integer number or not.
104     PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
105 
106     static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger);
107 
108     template <typename ReadBarrier>
IsInteger(ReadBarrier && readBarrier)109     inline bool IsInteger(ReadBarrier &&readBarrier)
110     {
111         uint32_t hashcode = GetHashcode(std::forward<ReadBarrier>(readBarrier));
112         return IsIntegerBit::Decode(hashcode) == IS_INTEGER;
113     }
114 
IsString()115     bool IsString() const
116     {
117         return GetBaseClass()->IsString();
118     }
119 
IsLineString()120     bool IsLineString() const
121     {
122         return GetBaseClass()->IsLineString();
123     }
124 
IsTreeString()125     bool IsTreeString() const
126     {
127         return GetBaseClass()->IsTreeString();
128     }
129 
IsSlicedString()130     bool IsSlicedString() const
131     {
132         return GetBaseClass()->IsSlicedString();
133     }
134 
135     bool IsUtf8() const;
136 
137     bool IsUtf16() const;
138 
139     // require is LineString
140     uint16_t *GetData() const;
141     const uint8_t *GetDataUtf8() const;
142     const uint16_t *GetDataUtf16() const;
143 
144     // require is LineString
145     uint8_t *GetDataUtf8Writable();
146     uint16_t *GetDataUtf16Writable();
147 
148     uint32_t GetLength() const;
149 
150     void InitLengthAndFlags(uint32_t length, bool compressed = false, bool isIntern = false);
151 
152     template <typename ReadBarrier>
153     size_t GetUtf8Length(ReadBarrier &&readBarrier, bool modify = true, bool isGetBufferSize = false) const;
154 
155     void SetIsInternString();
156 
157     bool IsInternString() const;
158 
159     void ClearInternStringFlag();
160 
161     bool TryGetHashCode(uint32_t *hash) const;
162 
163     // not change this data structure.
164     // if string is not flat, this func has low efficiency.
165     template <typename ReadBarrier>
166     uint32_t PUBLIC_API GetHashcode(ReadBarrier &&readBarrier);
167 
168     template<class ReadBarrier>
169     uint32_t ComputeHashcode(ReadBarrier &&readBarrier) const;
170 
171     template <typename ReadBarrier>
172     std::pair<uint32_t, bool>  PUBLIC_API ComputeRawHashcode(ReadBarrier &&readBarrier) const;
173 
174     template <bool verify = true, typename ReadBarrier>
175     uint16_t At(ReadBarrier &&readBarrier, int32_t index) const;
176 
177     // require is LineString
178     void WriteData(uint32_t index, uint16_t src);
179 
180     // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
181     template <typename ReadBarrier>
182     bool EqualToSplicedString(ReadBarrier &&readBarrier, const BaseString *str1, const BaseString *str2);
183 
184     // It allows user to copy into buffer even if maxLength < length
185     template <typename ReadBarrier>
186     size_t WriteUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength,
187                      bool isWriteBuffer = false) const;
188     template <typename ReadBarrier>
189     size_t CopyDataToUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t length, uint32_t bufLength) const;
190 
191     // It allows user to copy into buffer even if maxLength < length
192     template <typename ReadBarrier>
193     size_t WriteUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const;
194 
195     template <typename ReadBarrier>
196     size_t WriteOneByte(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength) const;
197     template <typename ReadBarrier>
198     size_t CopyDataRegionUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t start, size_t length,
199                               size_t maxLength,
200                               bool modify = true, bool isWriteBuffer = false) const;
201     template <typename ReadBarrier>
202     uint32_t CopyDataUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t maxLength) const;
203     template <typename ReadBarrier>
204     std::u16string ToU16String(ReadBarrier &&readBarrier, uint32_t len = 0);
205 
206     template <typename ReadBarrier, typename Vec,
207               std::enable_if_t<objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>  = 0>
208     Span<const uint8_t> ToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify = true, bool cesu8 = false);
209 
210     template <typename ReadBarrier, typename Vec,
211               std::enable_if_t<objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>  = 0>
212     Span<const uint8_t> DebuggerToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify = true);
213 
214     template <typename ReadBarrier>
215     void WriteData(ReadBarrier &&readBarrier, BaseString *src, uint32_t start, uint32_t destSize,
216                    uint32_t length);
217 
218     template <typename ReadBarrier>
219     bool IsFlat(ReadBarrier &&readBarrier) const;
220 
221     bool NotTreeString() const;
222 
223     CommonType GetStringType() const;
224 
225     template <class T1, class T2>
226     static uint32_t CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, const T2 *dataSecond,
227                                                 size_t sizeSecond);
228 
229     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
230     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
231 
232     template <typename Allocator, objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0>
233     static BaseString *CreateFromUtf8(Allocator &&allocate, const uint8_t *utf8Data, uint32_t utf8Len,
234                                       bool canBeCompress);
235 
236     template <typename Allocator, objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0>
237     static BaseString *CreateFromUtf8CompressedSubString(Allocator &&allocate,
238                                                          const ReadOnlyHandle<BaseString> string,
239                                                          uint32_t offset, uint32_t utf8Len);
240 
241     template <typename Allocator, objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0>
242     static LineString *CreateLineString(Allocator &&allocator, size_t length, bool compressed);
243 
244     template <typename Allocator, objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0>
245     static BaseString *CreateFromUtf16(Allocator &&allocator, const uint16_t *utf16Data, uint32_t utf16Len,
246                                        bool canBeCompress);
247 
248     template <typename Allocator, typename WriteBarrier,
249               objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0,
250               objects_traits::enable_if_is_write_barrier<WriteBarrier>  = 0>
251     static SlicedString *CreateSlicedString(Allocator &&allocator, WriteBarrier &&writeBarrier,
252                                             ReadOnlyHandle<BaseString> parent);
253 
254     template <typename Allocator, typename WriteBarrier,
255               objects_traits::enable_if_is_allocate<Allocator, BaseObject *>  = 0,
256               objects_traits::enable_if_is_write_barrier<WriteBarrier>  = 0>
257     static TreeString *CreateTreeString(Allocator &&allocator, WriteBarrier &&writeBarrier,
258                                         ReadOnlyHandle<BaseString> left, ReadOnlyHandle<BaseString> right,
259                                         uint32_t length, bool compressed);
260 
261     // Check that two spans are equal. Should have the same length.
262     /* static */
263     template <typename T, typename T1>
264     static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2);
265 
266     // Converts utf8Data to utf16 and compare it with given utf16_data.
267     static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
268                                   uint32_t utf16Len);
269 
270     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
271     template <typename ReadBarrier>
272     static PUBLIC_API bool StringsAreEqual(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2);
273     // Two strings have the same type of utf encoding format.
274     template <typename ReadBarrier>
275     static bool StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2);
276 
277     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
278     // not change str1 data structure.
279     // if str1 is not flat, this func has low efficiency.
280     template <typename ReadBarrier>
281     static bool StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseString *str1,
282                                        const uint8_t *dataAddr, uint32_t dataLen,
283                                        bool canBeCompress);
284     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
285     // not change str1 data structure.
286     // if str1 is not flat, this func has low efficiency.
287     template <typename ReadBarrier>
288     static bool StringsAreEqualUtf16(ReadBarrier &&readBarrier, const BaseString *str1,
289                                      const uint16_t *utf16Data, uint32_t utf16Len);
290 
291     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
292 
293     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
294 
295     static bool CanBeCompressed(const BaseString *string);
296 
297     // single char copy for loop
298     template <typename DstType, typename SrcType>
299     static void CopyChars(DstType *dst, SrcType *src, uint32_t count);
300 
301     // To change the hash algorithm of BaseString, please modify BaseString::CalculateConcatHashCode
302     // and BaseStringHashHelper::ComputeHashForDataPlatform simultaneously!!
303     static PUBLIC_API uint32_t ComputeHashForData(const uint8_t *data, size_t size, uint32_t hashSeed);
304 
305     static PUBLIC_API uint32_t ComputeHashForData(const uint16_t *data, size_t size, uint32_t hashSeed);
306 
307     static bool IsASCIICharacter(uint16_t data);
308 
309     template <typename T1, typename T2>
310     static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
311 
312     template <typename T1, typename T2>
313     static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
314 
315     template <typename Char, typename ReadBarrier>
316     static void WriteToFlat(ReadBarrier &&readBarrier, BaseString *src, Char *buf, uint32_t maxLength);
317 
318     template <typename Char, typename ReadBarrier>
319     static void WriteToFlatWithPos(ReadBarrier &&readBarrier, BaseString *src, Char *buf, uint32_t length,
320                                    uint32_t pos);
321 
322     template <typename ReadBarrier, typename Vec,
323               std::enable_if_t<objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>  = 0>
324     static const uint8_t *PUBLIC_API GetUtf8DataFlat(ReadBarrier &&readBarrier, const BaseString *src, Vec &buf);
325 
326     template <typename ReadBarrier>
327     static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(ReadBarrier &&readBarrier, const BaseString *src);
328 
329     template <typename ReadBarrier, typename Vec,
330               std::enable_if_t<objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint16_t>, int>  = 0>
331     static const uint16_t *PUBLIC_API GetUtf16DataFlat(ReadBarrier &&readBarrier, const BaseString *src,
332                                                        Vec &buf);
333 
334     template <typename ReadBarrier>
335     static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const BaseString *src);
336 
337     template <typename T>
338     static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed);
339 };
340 } // namespace common
341 #endif // COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_DECLARE_H