• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <codecvt>
17 #include <locale>
18 
19 #include "common_components/base/utf_helper.h"
20 #include "common_interfaces/objects/base_string.h"
21 
22 #include "common_components/platform/string_hash.h"
23 #include "common_components/platform/string_hash_helper.h"
24 
25 namespace common {
26     constexpr size_t LOW_3BITS = 0x7;
27     constexpr size_t LOW_4BITS = 0xF;
28     constexpr size_t LOW_5BITS = 0x1F;
29     constexpr size_t LOW_6BITS = 0x3F;
30     constexpr size_t L_SURROGATE_START = 0xDC00;
31     constexpr size_t H_SURROGATE_START = 0xD800;
32     constexpr size_t SURROGATE_RAIR_START = 0x10000;
33     constexpr size_t OFFSET_18POS = 18;
34     constexpr size_t OFFSET_12POS = 12;
35     constexpr size_t OFFSET_10POS = 10;
36     constexpr size_t OFFSET_6POS = 6;
37 
DebuggerConvertRegionUtf16ToUtf8(const uint16_t * utf16In,uint8_t * utf8Out,size_t utf16Len,size_t utf8Len,size_t start,bool modify,bool isWriteBuffer)38     size_t UtfUtils::DebuggerConvertRegionUtf16ToUtf8(const uint16_t* utf16In, uint8_t* utf8Out, size_t utf16Len,
39                                                       size_t utf8Len, size_t start, bool modify, bool isWriteBuffer)
40     {
41         return common::utf_helper::DebuggerConvertRegionUtf16ToUtf8(utf16In, utf8Out, utf16Len, utf8Len,
42                                                                     start, modify, isWriteBuffer);
43     }
44 
Utf8ToUtf16Size(const uint8_t * utf8,size_t utf8Len)45     size_t UtfUtils::Utf8ToUtf16Size(const uint8_t* utf8, size_t utf8Len)
46     {
47         return common::utf_helper::Utf8ToUtf16Size(utf8, utf8Len);
48     }
49 
Utf16ToUtf8Size(const uint16_t * utf16,uint32_t length,bool modify,bool isGetBufferSize,bool cesu8)50     size_t UtfUtils::Utf16ToUtf8Size(const uint16_t* utf16, uint32_t length, bool modify, bool isGetBufferSize,
51                                      bool cesu8)
52     {
53         return common::utf_helper::Utf16ToUtf8Size(utf16, length, modify, isGetBufferSize, cesu8);
54     }
55 
ConvertRegionUtf8ToUtf16(const uint8_t * utf8In,uint16_t * utf16Out,size_t utf8Len,size_t utf16Len)56     size_t UtfUtils::ConvertRegionUtf8ToUtf16(const uint8_t* utf8In, uint16_t* utf16Out, size_t utf8Len,
57                                               size_t utf16Len)
58     {
59         return common::utf_helper::ConvertRegionUtf8ToUtf16(utf8In, utf16Out, utf8Len, utf16Len);
60     }
61 
ConvertRegionUtf16ToLatin1(const uint16_t * utf16In,uint8_t * latin1Out,size_t utf16Len,size_t latin1Len)62     size_t UtfUtils::ConvertRegionUtf16ToLatin1(const uint16_t* utf16In, uint8_t* latin1Out, size_t utf16Len,
63                                                 size_t latin1Len)
64     {
65         return common::utf_helper::ConvertRegionUtf16ToLatin1(utf16In, latin1Out, utf16Len, latin1Len);
66     }
67 
ConvertRegionUtf16ToUtf8(const uint16_t * utf16In,uint8_t * utf8Out,size_t utf16Len,size_t utf8Len,size_t start,bool modify,bool isWriteBuffer,bool cesu)68     size_t UtfUtils::ConvertRegionUtf16ToUtf8(const uint16_t* utf16In, uint8_t* utf8Out, size_t utf16Len,
69                                               size_t utf8Len, size_t start, bool modify, bool isWriteBuffer, bool cesu)
70     {
71         return common::utf_helper::ConvertRegionUtf16ToUtf8(
72             utf16In, utf8Out, utf16Len, utf8Len, start, modify, isWriteBuffer, cesu);
73     }
74 
75 
76     // To change the hash algorithm of BaseString, please modify BaseString::CalculateConcatHashCode
77     // and BaseStringHashHelper::ComputeHashForDataPlatform simultaneously!!
78     template<typename T>
ComputeHashForDataInternal(const T * data,size_t size,uint32_t hashSeed)79     uint32_t ComputeHashForDataInternal(const T *data, size_t size, uint32_t hashSeed)
80     {
81         if (size <= static_cast<size_t>(StringHash::MIN_SIZE_FOR_UNROLLING)) {
82             uint32_t hash = hashSeed;
83             for (uint32_t i = 0; i < size; i++) {
84                 hash = (hash << static_cast<uint32_t>(StringHash::HASH_SHIFT)) - hash + data[i];
85             }
86             return hash;
87         }
88         return StringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed);
89     }
90 
ComputeHashForData(const uint8_t * data,size_t size,uint32_t hashSeed)91     PUBLIC_API uint32_t BaseString::ComputeHashForData(const uint8_t *data, size_t size, uint32_t hashSeed)
92     {
93         return ComputeHashForDataInternal(data, size, hashSeed);
94     }
95 
ComputeHashForData(const uint16_t * data,size_t size,uint32_t hashSeed)96     PUBLIC_API uint32_t BaseString::ComputeHashForData(const uint16_t *data, size_t size, uint32_t hashSeed)
97     {
98         return ComputeHashForDataInternal(data, size, hashSeed);
99     }
100 
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)101     uint32_t BaseString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
102     {
103         if (utf8Len == 0) {
104             return MixHashcode(0, NOT_INTEGER);
105         }
106         if (canBeCompress) {
107             uint32_t mixHash = 0;
108             // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
109             if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
110                 return mixHash;
111             }
112             uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
113             return MixHashcode(hash, NOT_INTEGER);
114         }
115         auto utf16Len = UtfUtils::Utf8ToUtf16Size(utf8Data, utf8Len);
116         std::vector<uint16_t> tmpBuffer(utf16Len);
117         [[maybe_unused]] auto len = UtfUtils::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
118                                                                        utf16Len);
119         DCHECK_CC(len == utf16Len);
120         uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
121         return MixHashcode(hash, NOT_INTEGER);
122     }
123 
124     /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)125     uint32_t BaseString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
126     {
127         if (length == 0) {
128             return MixHashcode(0, NOT_INTEGER);
129         }
130         uint32_t mixHash = 0;
131         // String length smaller than 10, try to compute integer hash.
132         if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
133             return mixHash;
134         }
135         uint32_t hash = ComputeHashForData(utf16Data, length, 0);
136         return MixHashcode(hash, NOT_INTEGER);
137     }
138 
139 
140     // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t * utf8,size_t utf8Len)141     static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
142     {
143         constexpr size_t TWO_BYTES_LENGTH = 2;
144         constexpr size_t THREE_BYTES_LENGTH = 3;
145         size_t trimSize = 0;
146         if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
147             // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
148             trimSize = 1;
149         }
150         if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
151             // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
152             trimSize = TWO_BYTES_LENGTH;
153         }
154         if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
155             // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last
156             // three.
157             trimSize = THREE_BYTES_LENGTH;
158         }
159         return utf8Len - trimSize;
160     }
161 
162     /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)163     bool BaseString::IsUtf8EqualsUtf16(const uint8_t* utf8Data, size_t utf8Len,
164                                        const uint16_t* utf16Data, uint32_t utf16Len)
165     {
166         size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
167         const uint8_t* utf8End = utf8Data + utf8Len;
168         const uint8_t* utf8SafeEnd = utf8Data + safeUtf8Len;
169         const uint16_t* utf16End = utf16Data + utf16Len;
170         while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
171             uint8_t src = *utf8Data;
172             switch (src & 0xF0) {
173                 case 0xF0:
174                     {
175                         const uint8_t c2 = *(++utf8Data);
176                         const uint8_t c3 = *(++utf8Data);
177                         const uint8_t c4 = *(++utf8Data);
178                         uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
179                             ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
180                         if (codePoint >= SURROGATE_RAIR_START) {
181                             if (utf16Data >= utf16End - 1) {
182                                 return false;
183                             }
184                             codePoint -= SURROGATE_RAIR_START;
185                             if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) |
186                                 H_SURROGATE_START)) {
187                                 return false;
188                             } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
189                                 return false;
190                             }
191                         } else {
192                             if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
193                                 return false;
194                             }
195                         }
196                         utf8Data++;
197                         break;
198                     }
199                 case 0xE0:
200                     {
201                         const uint8_t c2 = *(++utf8Data);
202                         const uint8_t c3 = *(++utf8Data);
203                         if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
204                             ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
205                             return false;
206                         }
207                         utf8Data++;
208                         break;
209                     }
210                 case 0xD0:
211                 case 0xC0:
212                     {
213                         const uint8_t c2 = *(++utf8Data);
214                         if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 &
215                             LOW_6BITS))) {
216                             return false;
217                         }
218                         utf8Data++;
219                         break;
220                     }
221                 default:
222                     do {
223                         if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
224                             return false;
225                         }
226                     }
227                     while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
228                     break;
229             }
230         }
231         // The remain chars should be treated as single byte char.
232         while (utf8Data < utf8End && utf16Data < utf16End) {
233             if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
234                 return false;
235             }
236         }
237         return utf8Data == utf8End && utf16Data == utf16End;
238     }
239 
240     // static
241     template <typename T1, typename T2>
CalculateDataConcatHashCode(const T1 * dataFirst,size_t sizeFirst,const T2 * dataSecond,size_t sizeSecond)242     uint32_t BaseString::CalculateDataConcatHashCode(const T1* dataFirst, size_t sizeFirst,
243                                                      const T2* dataSecond, size_t sizeSecond)
244     {
245         uint32_t totalHash = ComputeHashForData(dataFirst, sizeFirst, 0);
246         totalHash = ComputeHashForData(dataSecond, sizeSecond, totalHash);
247         return MixHashcode(totalHash, NOT_INTEGER);
248     }
249 
250     template
251     uint32_t BaseString::CalculateDataConcatHashCode<uint8_t, uint8_t>(const uint8_t* dataFirst, size_t sizeFirst,
252                                                                        const uint8_t* dataSecond, size_t sizeSecond);
253     template
254     uint32_t BaseString::CalculateDataConcatHashCode<uint16_t, uint16_t>(const uint16_t* dataFirst, size_t sizeFirst,
255                                                                          const uint16_t* dataSecond, size_t sizeSecond);
256     template
257     uint32_t BaseString::CalculateDataConcatHashCode<uint8_t, uint16_t>(const uint8_t* dataFirst, size_t sizeFirst,
258                                                                         const uint16_t* dataSecond, size_t sizeSecond);
259     template
260     uint32_t BaseString::CalculateDataConcatHashCode<uint16_t, uint8_t>(const uint16_t* dataFirst, size_t sizeFirst,
261                                                                         const uint8_t* dataSecond, size_t sizeSecond);
262 
263 
CanBeCompressed(const BaseString * string)264     bool BaseString::CanBeCompressed(const BaseString* string)
265     {
266         DCHECK_CC(string->IsLineString());
267         if (string->IsUtf8()) {
268             return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
269         }
270         return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
271     }
272 
273     // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)274     bool BaseString::CanBeCompressed(const uint8_t* utf8Data, uint32_t utf8Len)
275     {
276         uint32_t index = 0;
277         for (; index + 4 <= utf8Len; index += 4) {
278             // 4: process the data in chunks of 4 elements to improve speed
279             // Check if all four characters in the current block are ASCII characters
280             if (!IsASCIICharacter(utf8Data[index]) ||
281                 !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block
282                 !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block
283                 !IsASCIICharacter(utf8Data[index + 3])) {
284                 // 3: the fourth element of the block
285                 return false;
286             }
287         }
288         // Check remaining characters if they are ASCII
289         for (; index < utf8Len; ++index) {
290             if (!IsASCIICharacter(utf8Data[index])) {
291                 return false;
292             }
293         }
294         return true;
295     }
296 
297     /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)298     bool BaseString::CanBeCompressed(const uint16_t* utf16Data, uint32_t utf16Len)
299     {
300         uint32_t index = 0;
301         for (; index + 4 <= utf16Len; index += 4) {
302             // 4: process the data in chunks of 4 elements to improve speed
303             // Check if all four characters in the current block are ASCII characters
304             if (!IsASCIICharacter(utf16Data[index]) ||
305                 !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
306                 !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
307                 !IsASCIICharacter(utf16Data[index + 3])) {
308                 // 3: the fourth element of the block
309                 return false;
310             }
311         }
312         // Check remaining characters if they are ASCII
313         for (; index < utf16Len; ++index) {
314             if (!IsASCIICharacter(utf16Data[index])) {
315                 return false;
316             }
317         }
318         return true;
319     }
320 
321 
IsASCIICharacter(uint16_t data)322     bool BaseString::IsASCIICharacter(uint16_t data)
323     {
324         if (data == 0) {
325             return false;
326         }
327         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
328         return data <= UtfUtils::UTF8_1B_MAX;
329     }
330 
331 
332     /* static */
333     template <typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)334     int32_t BaseString::IndexOf(Span<const T1>& lhsSp, Span<const T2>& rhsSp, int32_t pos, int32_t max)
335     {
336         DCHECK_CC(rhsSp.size() > 0);
337         auto first = static_cast<int32_t>(rhsSp[0]);
338         for (int32_t i = pos; i <= max; i++) {
339             if (static_cast<int32_t>(lhsSp[i]) != first) {
340                 i++;
341                 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
342                     i++;
343                 }
344             }
345             /* Found first character, now look at the rest of rhsSp */
346             if (i <= max) {
347                 int j = i + 1;
348                 int end = j + static_cast<int>(rhsSp.size()) - 1;
349 
350                 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
351                 }
352                 if (j == end) {
353                     /* Found whole string. */
354                     return i;
355                 }
356             }
357         }
358         return -1;
359     }
360 
361     template
362     int32_t BaseString::IndexOf<uint8_t, uint8_t>(Span<const uint8_t>& lhsSp, Span<const uint8_t>& rhsSp, int32_t pos,
363                                                   int32_t max);
364     template
365     int32_t BaseString::IndexOf<uint16_t, uint16_t>(Span<const uint16_t>& lhsSp, Span<const uint16_t>& rhsSp,
366                                                     int32_t pos, int32_t max);
367 
368     template
369     int32_t BaseString::IndexOf<uint8_t, uint16_t>(Span<const uint8_t>& lhsSp, Span<const uint16_t>& rhsSp, int32_t pos,
370                                                    int32_t max);
371 
372     template
373     int32_t BaseString::IndexOf<uint16_t, uint8_t>(Span<const uint16_t>& lhsSp, Span<const uint8_t>& rhsSp, int32_t pos,
374                                                    int32_t max);
375 
376 
377     template <typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)378     int32_t BaseString::LastIndexOf(Span<const T1>& lhsSp, Span<const T2>& rhsSp, int32_t pos)
379     {
380         int rhsSize = static_cast<int>(rhsSp.size());
381         DCHECK_CC(rhsSize > 0);
382         auto first = rhsSp[0];
383         for (int32_t i = pos; i >= 0; i--) {
384             if (lhsSp[i] != first) {
385                 continue;
386             }
387             /* Found first character, now look at the rest of rhsSp */
388             int j = 1;
389             while (j < rhsSize) {
390                 if (rhsSp[j] != lhsSp[i + j]) {
391                     break;
392                 }
393                 j++;
394             }
395             if (j == rhsSize) {
396                 return i;
397             }
398         }
399         return -1;
400     }
401 
402     template
403     int32_t BaseString::LastIndexOf<uint8_t, uint8_t>(Span<const uint8_t>& lhsSp, Span<const uint8_t>& rhsSp,
404                                                       int32_t pos);
405     template
406     int32_t BaseString::LastIndexOf<uint16_t, uint16_t>(Span<const uint16_t>& lhsSp, Span<const uint16_t>& rhsSp,
407                                                         int32_t pos);
408     template
409     int32_t BaseString::LastIndexOf<uint8_t, uint16_t>(Span<const uint8_t>& lhsSp, Span<const uint16_t>& rhsSp,
410                                                        int32_t pos);
411     template
412     int32_t BaseString::LastIndexOf<uint16_t, uint8_t>(Span<const uint16_t>& lhsSp, Span<const uint8_t>& rhsSp,
413                                                        int32_t pos);
414 
415 
416     template <typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)417     int32_t CompareStringSpan(Span<T1>& lhsSp, Span<T2>& rhsSp, int32_t count)
418     {
419         for (int32_t i = 0; i < count; ++i) {
420             auto left = static_cast<int32_t>(lhsSp[i]);
421             auto right = static_cast<int32_t>(rhsSp[i]);
422             if (left != right) {
423                 return left - right;
424             }
425         }
426         return 0;
427     }
428 
429     template
430     int32_t CompareStringSpan<const uint8_t, const uint8_t>(Span<const uint8_t>& lhsSp, Span<const uint8_t>& rhsSp,
431                                                             int32_t count);
432     template
433     int32_t CompareStringSpan<const uint16_t, const uint16_t>(Span<const uint16_t>& lhsSp, Span<const uint16_t>& rhsSp,
434                                                               int32_t count);
435     template
436     int32_t CompareStringSpan<const uint8_t, const uint16_t>(Span<const uint8_t>& lhsSp, Span<const uint16_t>& rhsSp,
437                                                              int32_t count);
438     template
439     int32_t CompareStringSpan<const uint16_t, const uint8_t>(Span<const uint16_t>& lhsSp, Span<const uint8_t>& rhsSp,
440                                                              int32_t count);
441 
442 
443     template <typename T1, typename T2>
IsSubStringAtSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,uint32_t offset)444     bool IsSubStringAtSpan(Span<T1>& lhsSp, Span<T2>& rhsSp, uint32_t offset)
445     {
446         int rhsSize = static_cast<int>(rhsSp.size());
447         DCHECK_CC(rhsSize + offset <= lhsSp.size());
448         for (int i = 0; i < rhsSize; ++i) {
449             auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
450             auto right = static_cast<int32_t>(rhsSp[i]);
451             if (left != right) {
452                 return false;
453             }
454         }
455         return true;
456     }
457 
458     template
459     bool IsSubStringAtSpan<const uint8_t, const uint8_t>(Span<const uint8_t>& lhsSp, Span<const uint8_t>& rhsSp,
460                                                          uint32_t offset);
461     template
462     bool IsSubStringAtSpan<const uint16_t, const uint16_t>(Span<const uint16_t>& lhsSp, Span<const uint16_t>& rhsSp,
463                                                            uint32_t offset);
464     template
465     bool IsSubStringAtSpan<const uint8_t, const uint16_t>(Span<const uint8_t>& lhsSp, Span<const uint16_t>& rhsSp,
466                                                           uint32_t offset);
467     template
468     bool IsSubStringAtSpan<const uint16_t, const uint8_t>(Span<const uint16_t>& lhsSp, Span<const uint8_t>& rhsSp,
469                                                           uint32_t offset);
470 
471 
Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)472     std::u16string Utf16ToU16String(const uint16_t* utf16Data, uint32_t dataLen)
473     {
474         auto* char16tData = reinterpret_cast<const char16_t*>(utf16Data);
475         std::u16string u16str(char16tData, dataLen);
476         return u16str;
477     }
478 
Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)479     std::u16string Utf8ToU16String(const uint8_t* utf8Data, uint32_t dataLen)
480     {
481         auto* charData = reinterpret_cast<const char*>(utf8Data);
482         std::string str(charData, dataLen);
483         std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
484         return u16str;
485     }
486 }  // namespace common
487