• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/base/utf_helper.h"
17 #include "ecmascript/tests/test_helper.h"
18 
19 using namespace panda::ecmascript;
20 using namespace panda::ecmascript::base;
21 using namespace panda::ecmascript::base::utf_helper;
22 
23 namespace panda::test {
24 class UtfHelperTest : public testing::Test {
25 public:
SetUpTestCase()26     static void SetUpTestCase()
27     {
28         GTEST_LOG_(INFO) << "SetUpTestCase";
29     }
30 
TearDownTestCase()31     static void TearDownTestCase()
32     {
33         GTEST_LOG_(INFO) << "TearDownCase";
34     }
35 
SetUp()36     void SetUp() override
37     {
38         TestHelper::CreateEcmaVMWithScope(instance, thread, scope);
39     }
40 
TearDown()41     void TearDown() override
42     {
43         TestHelper::DestroyEcmaVMWithScope(instance, scope);
44     }
45 
46     EcmaVM *instance {nullptr};
47     EcmaHandleScope *scope {nullptr};
48     JSThread *thread {nullptr};
49 };
50 
51 /*
52 * @tc.name: CombineTwoU16
53 * @tc.desc: Enter a pair of UTF16-encoded surrogate pair corresponding to the lead surrogates and trail surrogates,
54 *           and return the corresponding Unicode codepoint value.
55 * @tc.type: FUNC
56 */
HWTEST_F_L0(UtfHelperTest,CombineTwoU16)57 HWTEST_F_L0(UtfHelperTest, CombineTwoU16)
58 {
59     uint16_t leadSur = 0xD800;
60     uint16_t trailSur = 0xDC00;
61     uint32_t codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10)|(trailSur - 0xDc00U)) + 0x10000U;
62     uint32_t utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
63     EXPECT_EQ(codePoint, utfHelperCodePoint);
64     EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10000));
65     trailSur = 0xDFFF;
66     codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U))+ 0x10000U;
67     utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
68     EXPECT_EQ(codePoint, utfHelperCodePoint);
69     EXPECT_EQ(codePoint, static_cast<uint32_t>(0x103FF));
70     leadSur = 0xDBFF;
71     codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U)) + 0x10000U;
72     utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
73     EXPECT_EQ(codePoint, utfHelperCodePoint);
74     EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10FFFF));
75     trailSur = 0xDC00;
76     codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U)) + 0x10000U;
77     utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
78     EXPECT_EQ(codePoint, utfHelperCodePoint);
79     EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10FC00));
80     leadSur = 0xD950;
81     trailSur = 0xDF21;
82     codePoint = static_cast<uint32_t>(((leadSur - 0xD800U)<< 10) | (trailSur - 0xDC00U)) + 0x10000U;
83     utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
84     EXPECT_EQ(codePoint, utfHelperCodePoint);
85     EXPECT_EQ(codePoint, static_cast<uint32_t>(0x64321));
86 }
87 
88 /*
89 * @tc.name: UTF16Decode
90 * @tc.desc: Enter a pair of UTF16-encoded surrogate pair corresponding to the lead surrogates and trail surrogates,
91             Decodes them into corresponding Unicode codepoint values and returns.
92 * @tc.type: FUNC
93 */
HWTEST_F_L0(UtfHelperTest,UTF16Decode)94 HWTEST_F_L0(UtfHelperTest, UTF16Decode)
95 {
96     uint16_t lead = 0xD950;
97     uint16_t trail = 0xDF21;
98     EXPECT_TRUE(lead >= DECODE_LEAD_LOW && lead <= DECODE_LEAD_HIGH);
99     EXPECT_TRUE(trail >= DECODE_TRAIL_LOW && trail <= DECODE_TRAIL_HIGH);
100     uint32_t codePoint = utf_helper::UTF16Decode(lead, trail);
101     EXPECT_EQ(codePoint, 0x64321U);
102     lead = 0xD85D;
103     trail = 0xDFCC;
104     EXPECT_TRUE(lead >= DECODE_LEAD_LOW && lead <= DECODE_LEAD_HIGH);
105     EXPECT_TRUE(trail >= DECODE_TRAIL_LOW && trail <= DECODE_TRAIL_HIGH);
106     codePoint = utf_helper::UTF16Decode(lead, trail);
107     EXPECT_EQ(codePoint, 0x277CCU);
108 }
109 
110 /*
111  * @tc.name: IsValidUTF8
112  * @tc.desc: Judge whether an input group of symbols is a valid UTF8 coding sequence.
113  * @tc.type: FUNC
114  */
HWTEST_F_L0(UtfHelperTest,IsValidUTF8)115 HWTEST_F_L0(UtfHelperTest, IsValidUTF8)
116 {
117     // 0xxxxxxx, min:0, max:127
118     const std::vector<uint8_t> utfDataOneBitVaild1 = {0x00};
119     const std::vector<uint8_t> utfDataOneBitVaild2 = {BIT_MASK_1 - 0x01};
120     const std::vector<uint8_t> utfDataOneBitInvaild = {BIT_MASK_1};
121     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataOneBitVaild1));
122     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataOneBitVaild2));
123     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataOneBitInvaild));
124     // 110xxxxx 10xxxxxx, min:128, max:2047
125     const std::vector<uint8_t> utfDataTwoBitVaild1 = {BIT_MASK_2 + 0x02, BIT_MASK_1};
126     const std::vector<uint8_t> utfDataTwoBitVaild2 = {BIT_MASK_3 - 0x01, BIT_MASK_2 - 0x01};
127     const std::vector<uint8_t> utfDataTwoBitInvaild1 = {BIT_MASK_2, BIT_MASK_2};
128     const std::vector<uint8_t> utfDataTwoBitInvaild2 = {BIT_MASK_3, BIT_MASK_1};
129     const std::vector<uint8_t> utfDataTwoBitInvaild3 = {BIT_MASK_2, BIT_MASK_1};
130     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataTwoBitVaild1));
131     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataTwoBitVaild2));
132     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild1));
133     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild2));
134     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild3));
135     // 1110xxxx 10xxxxxx 10xxxxxx, min:2048, max:65535
136     const std::vector<uint8_t> utfDataThreeBitVaild1 = {BIT_MASK_3, BIT_MASK_1 + 0x20, BIT_MASK_1};
137     const std::vector<uint8_t> utfDataThreeBitVaild2 = {BIT_MASK_4 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01};
138     const std::vector<uint8_t> utfDataThreeBitVaild3 = {BIT_MASK_3 + 0x01, BIT_MASK_1, BIT_MASK_1};
139     const std::vector<uint8_t> utfDataThreeBitInvaild1 = {BIT_MASK_3, BIT_MASK_1, BIT_MASK_2};
140     const std::vector<uint8_t> utfDataThreeBitInvaild2 = {BIT_MASK_3, BIT_MASK_2, BIT_MASK_1};
141     const std::vector<uint8_t> utfDataThreeBitInvaild3 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1};
142     const std::vector<uint8_t> utfDataThreeBitInvaild4 = {BIT_MASK_4, BIT_MASK_2, BIT_MASK_2};
143     const std::vector<uint8_t> utfDataThreeBitInvaild5 = {BIT_MASK_3, BIT_MASK_1, BIT_MASK_1};
144     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild1));
145     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild2));
146     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild3));
147     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild1));
148     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild2));
149     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild3));
150     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild4));
151     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild5));
152     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx, min:65536, max:2097151
153     const std::vector<uint8_t> utfDataFourBitVaild1 = {BIT_MASK_4, BIT_MASK_1 + 0x10, BIT_MASK_1, BIT_MASK_1};
154     const std::vector<uint8_t> utfDataFourBitVaild2 =
155         {BIT_MASK_5 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01};
156     const std::vector<uint8_t> utfDataFourBitVaild3 = {BIT_MASK_4 + 0x01, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
157     const std::vector<uint8_t> utfDataFourBitInvaild1 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1, BIT_MASK_2};
158     const std::vector<uint8_t> utfDataFourBitInvaild2 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_2, BIT_MASK_1};
159     const std::vector<uint8_t> utfDataFourBitInvaild3 = {BIT_MASK_4, BIT_MASK_2, BIT_MASK_1, BIT_MASK_1};
160     const std::vector<uint8_t> utfDataFourBitInvaild4 = {BIT_MASK_5, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
161     const std::vector<uint8_t> utfDataFourBitInvaild5 = {BIT_MASK_5, BIT_MASK_2, BIT_MASK_2, BIT_MASK_2};
162     const std::vector<uint8_t> utfDataFourBitInvaild6 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
163     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataFourBitVaild1));
164     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataFourBitVaild2));
165     EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataFourBitVaild3));
166     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild1));
167     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild2));
168     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild3));
169     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild4));
170     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild5));
171     EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild6));
172 }
173 
174 /*
175 * @tc.name: ConvertUtf16ToUtf8
176 * @tc.desc: Converts a UTF16 encoding sequence encoding a character into a UTF8 encoding sequence,
177 *           and returns the sequence and the byte length of the sequence. The parameter "modify"
178 *           indicates whether to perform special conversion for 0.
179 * @tc.type: FUNC
180 */
HWTEST_F_L0(UtfHelperTest,ConvertUtf16ToUtf8_001)181 HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_001)
182 {
183     // codePoint lie in [0,0x7F]--->UTF-8(length:1)
184     {
185         uint16_t utf16Data0 = 0x00;
186         uint16_t utf16Data1 = 0x00;
187         Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
188         Utf8Char utf8CharTemp = {0, {0x00U}};
189         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
190         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
191     }
192 
193     // special case for \u0000 ==> Co80- 1100'0000 1000'0000
194     {
195         uint16_t utf16Data0 = 0x00;
196         uint16_t utf16Data1 = 0x00;
197         Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, true);
198         Utf8Char utf8CharTemp = {2, {UTF8_2B_FIRST, UTF8_2B_SECOND}};
199         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
200         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
201         utf16Data0 = 0x7F;
202         utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
203         utf8CharTemp = {1, {0x7F}};
204         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
205         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
206 
207         // codePoint lie in [0x80,0x7FF]--> UTF-8(length:2)
208         utf16Data0 = 0x80;
209         utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
210         utf8CharTemp = {2, {UTF8_2B_FIRST + 0x02U, UTF8_2B_SECOND}};
211         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
212         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
213         utf16Data0 = 0x7FF;
214         utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
215         utf8CharTemp = {2, {BIT_MASK_3 - 0x01, BIT_MASK_2 - 0x01}};
216         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
217         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
218     }
219 
220     // codePoint lie in [0xD800,0xDFFF]--> UTF-8(length:3)
221     {
222         uint16_t utf16Data0 = 0xD800;
223         uint16_t utf16Data1 = 0x00;
224         Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
225         Utf8Char utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xD800 >> 12),
226                             UTF8_3B_SECOND | (static_cast<uint8_t>(0xD800 >> 6) & utf::MASK_6BIT),
227                             UTF8_3B_THIRD | (static_cast<uint8_t>(0xD800) & utf::MASK_6BIT)}};
228         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
229         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
230         utf16Data0 = 0xDFFF;
231         utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
232         utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xDFFF >> 12),
233                             UTF8_3B_SECOND | (static_cast<uint8_t>(0xDFFF >> 6) & utf::MASK_6BIT),
234                             UTF8_3B_THIRD | (static_cast<uint8_t>(0xDFFF) & utf::MASK_6BIT)}};
235         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
236         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
237     }
238 }
239 
HWTEST_F_L0(UtfHelperTest,ConvertUtf16ToUtf8_002)240 HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_002)
241 {
242     // codePoint lie in [0x800,0xD7FF]&&[0xE000,0xFFFF]-->UTF-8(length:3)
243     uint16_t utf16Data0 = 0x800;
244     uint16_t utf16Data1 = 0x00;
245     Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
246     Utf8Char utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0x800 >> 12),
247                                  UTF8_3B_SECOND | (static_cast<uint8_t>(0x800 >> 6) & utf::MASK_6BIT),
248                                  UTF8_3B_THIRD | (static_cast<uint8_t>(0x800) & utf::MASK_6BIT)}};
249     EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
250     EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
251     utf16Data0 = 0xD7FF;
252     utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
253     utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xD7FF>>12),
254                         UTF8_3B_SECOND | (static_cast<uint8_t>(0xD7FF >> 6) & utf::MASK_6BIT),
255                         UTF8_3B_THIRD | (static_cast<uint8_t>(0xD7FF) & utf::MASK_6BIT)}};
256     EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
257     EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
258     utf16Data0 = 0xE000;
259     utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
260     utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xE000 >> 12),
261                         UTF8_3B_SECOND | (static_cast<uint8_t>(0xE000 >> 6)& utf::MASK_6BIT),
262                         UTF8_3B_THIRD | (static_cast<uint8_t>(0xE000) & utf::MASK_6BIT)}};
263     EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
264     EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
265     utf16Data0 = 0xFFFF;
266     utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
267     utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xFFFF >> 12),
268                         UTF8_3B_SECOND | (static_cast<uint8_t>(0xFFFF >> 6)& utf::MASK_6BIT),
269                         UTF8_3B_THIRD | (static_cast<uint8_t>(0xFFFF) & utf::MASK_6BIT)}};
270     EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
271     EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
272 }
273 
HWTEST_F_L0(UtfHelperTest,ConvertUtf16ToUtf8_003)274 HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_003)
275 {
276     // codePoint lie in [0x10000,0x10FFFF] --> UTF-8(length:4)
277     {
278         uint16_t utf16Data0 = 0xD800;
279         uint16_t utf16Data1 = 0xDC00;
280         Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
281         uint32_t codePoint = CombineTwoU16(utf16Data0, utf16Data1);
282         Utf8Char utf8CharTemp = {4, {static_cast<uint8_t>((codePoint >> 18) | UTF8_4B_FIRST),
283                             static_cast<uint8_t>(((codePoint >> 12) & utf::MASK_6BIT) | utf::MASK1),
284                             static_cast<uint8_t>(((codePoint >> 6) & utf::MASK_6BIT) | utf::MASK1),
285                             static_cast<uint8_t>((codePoint & utf::MASK_6BIT) | utf::MASK1)}};
286         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
287         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
288     }
289 
290     // 0xD950 0xDF21 --> 0x64321 --> 0xf1 0xa4 0x8c 0xa1
291     {
292         uint16_t utf16Data0 = 0xD950;
293         uint16_t utf16Data1 = 0xDF21;
294         Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
295         uint32_t codePoint = CombineTwoU16(utf16Data0, utf16Data1);
296         Utf8Char utf8CharTemp = {4, {static_cast<uint8_t>((codePoint >> 18) | UTF8_4B_FIRST),
297                             static_cast<uint8_t>(((codePoint >> 12)& utf::MASK_6BIT)| utf::MASK1),
298                             static_cast<uint8_t>(((codePoint >> 6)& utf::MASK_6BIT) | utf::MASK1),
299         static_cast<uint8_t>((codePoint & utf::MASK_6BIT) | utf::MASK1)}};
300         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
301         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
302         utf8CharTemp = {4, {0xf1, 0xa4, 0x8c, 0xa1}};
303         EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
304         EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
305     }
306 }
307 
308 /*
309 * @tc.name: Utf16ToUtf8Size
310 * @tc.desc: Enter a string of UTF16 coded sequences and return the length of the sequence converted into UTF8 coded
311 *           sequences. "length" indicates the length of the input UTF16 sequence, and "modify" indicates whether
312 *           to perform special conversion for.
313 * @tc.type: FUNC
314 */
HWTEST_F_L0(UtfHelperTest,Utf16ToUtf8Size_001)315 HWTEST_F_L0(UtfHelperTest, Utf16ToUtf8Size_001)
316 {
317     // when utf16 data length is only 1 and code in 0xd800-0xdfff, means that is a single code point, it needs to be
318     // represented by three UTF8 code.
319     uint32_t length = 0;
320     uint16_t utf16Value1[1] = {0xD800};
321     const uint16_t *utf16ValuePtr1 = utf16Value1;
322     length = Utf16ToUtf8Size(utf16ValuePtr1, 1, false);
323     EXPECT_EQ(length - 1, UtfLength::THREE);
324     length = 1;
325     uint16_t utf16Value2[1] = {0xDFFF};
326     const uint16_t *utf16ValuePtr2 = utf16Value2;
327     length = Utf16ToUtf8Size(utf16ValuePtr2, 1, false);
328     EXPECT_EQ(length - 1, UtfLength::THREE);
329 
330     // special case for U+0000 => c0 80
331     uint16_t utf16Value3[1] = {0x00};
332     const uint16_t *utf16ValuePtr3 = utf16Value3;
333     length = Utf16ToUtf8Size(utf16ValuePtr3, 1, false);
334     EXPECT_EQ(length - 1, 0U);
335     length = Utf16ToUtf8Size(utf16ValuePtr3, 1, true);
336     EXPECT_EQ(length - 1, 2U);
337 
338     // code point lie in [0x00, 0x7F], it needs to be represented by one UTF8 code.
339     uint16_t utf16Value4[1] = {0x00};
340     uint16_t utf16Value5[1] = {0x7F};
341     const uint16_t *utf16ValuePtr4 = utf16Value4;
342     const uint16_t *utf16ValuePtr5 = utf16Value5;
343     length = Utf16ToUtf8Size(utf16ValuePtr4, 1, false);
344     EXPECT_EQ(length - 1, 0U);
345     length = Utf16ToUtf8Size(utf16ValuePtr5, 1, false);
346     EXPECT_EQ(length - 1, 1U);
347 
348     // code point lie in [0x80, 0x7FF], it needs to be represented by two UTF8 code,
349     uint16_t utf16Value6[1] = {0x80};
350     uint16_t utf16Value7[1] = {0x7FF};
351     const uint16_t *utf16ValuePtr6 = utf16Value6;
352     const uint16_t *utf16ValuePtr7 = utf16Value7;
353     length = Utf16ToUtf8Size(utf16ValuePtr6, 1, false);
354     EXPECT_EQ(length - 1, 2U);
355     length = Utf16ToUtf8Size(utf16ValuePtr7, 1, false);
356     EXPECT_EQ(length - 1, 2U);
357     // code point lie in [0x800, 0xD7FF] or [0xDCoo, 0xFFFF], it needs to be represented by three UTF8 code.
358     uint16_t utf16Value8[1] = {0x800};
359     uint16_t utf16Value9[1] = {0xD7FF};
360     uint16_t utf16Value10[1] = {0xDC00};
361     uint16_t utf16Value11[1] = {0xFFFF};
362     const uint16_t *utf16ValuePtr8 = utf16Value8;
363     const uint16_t *utf16ValuePtr9 = utf16Value9;
364     const uint16_t *utf16ValuePtr10 = utf16Value10;
365     const uint16_t *utf16ValuePtr11 = utf16Value11;
366     length = Utf16ToUtf8Size(utf16ValuePtr8, 1, false);
367     EXPECT_EQ(length - 1, 3U);
368     length = Utf16ToUtf8Size(utf16ValuePtr9, 1, false);
369     EXPECT_EQ(length - 1, 3U);
370     length = Utf16ToUtf8Size(utf16ValuePtr10, 1, false);
371     EXPECT_EQ(length-1, 3U);
372     length = Utf16ToUtf8Size(utf16ValuePtr11, 1, false);
373     EXPECT_EQ(length - 1, 3U);
374 }
375 
HWTEST_F_L0(UtfHelperTest,Utf16ToUtf8Size_002)376 HWTEST_F_L0(UtfHelperTest, Utf16ToUtf8Size_002)
377 {
378     // The trail value is valid, located in [0xDc00, 0xDFFF].It needs to be represented by four UTF8 code.
379     uint16_t utf16Value12[2] = {0xD800, 0xDc00};
380     uint16_t utf16Value13[2] = {0xD800, 0xDFFF};
381     uint16_t utf16Value14[2] = {0xDBFF, 0xDC00};
382     uint16_t utf16Value15[2] = {0xDBFF, 0xDFFF};
383     const uint16_t *utf16ValuePtr12 = utf16Value12;
384     const uint16_t *utf16ValuePtr13 = utf16Value13;
385     const uint16_t *utf16ValuePtr14 = utf16Value14;
386     const uint16_t *utf16ValuePtr15 = utf16Value15;
387     uint32_t length = Utf16ToUtf8Size(utf16ValuePtr12, 2, false);
388     EXPECT_EQ(length - 1, 4U);
389     length = Utf16ToUtf8Size(utf16ValuePtr13, 2, false);
390     EXPECT_EQ(length- 1, 4U);
391     length = Utf16ToUtf8Size(utf16ValuePtr14, 2, false);
392     EXPECT_EQ(length - 1, 4U);
393     length = Utf16ToUtf8Size(utf16ValuePtr15, 2, false);
394     EXPECT_EQ(length - 1, 4U);
395 
396     // The trail value of Bad sequence is invalid, not located in [0xDC00, 0xDFFF].
397     // Need to return 6 bytes length
398     uint16_t utf16Value16[2] = {0xD800, 0xDBFF};
399     uint16_t utf16Value17[2] = {0xDC00, 0xDFFF};
400     const uint16_t *utf16ValuePtr16 = utf16Value16;
401     const uint16_t *utf16ValuePtr17 = utf16Value17;
402     length = Utf16ToUtf8Size(utf16ValuePtr16, 2, false);
403     EXPECT_EQ(length- 1, 6U);
404     length = Utf16ToUtf8Size(utf16ValuePtr17, 2, false);
405     EXPECT_EQ(length-1, 6U);
406 
407     // 0(or 2)+ 1+ 2 + 3 + 4 = 10(or 12)
408     uint16_t utf16Value18[6] = {0x00, 0x7F, 0x80, 0x800, 0xD800, 0xDC00};
409     const uint16_t *utf16ValuePtr18 = utf16Value18;
410     length = Utf16ToUtf8Size(utf16ValuePtr18, 6, false);
411     EXPECT_EQ(length - 1, 10U);
412     length = Utf16ToUtf8Size(utf16ValuePtr18, 6, true);
413     EXPECT_EQ(length - 1, 12U);
414 }
415 
416 /*
417 * @tc.name: ConvertUtf8ToUtf16Pair
418 * @tc.desc: Converts a UTF8 encoding sequence encoding a character into a UTF16 encoding sequence, and returns the
419 *           sequence and the byte length of the UTF16 encoding sequence. The parameter "combine" identifies whether
420 *           to return a pr0xy pair of Unicode values in the secondary plane, or the Unicode value itself.
421 * @tc.type: FUNC
422 */
HWTEST_F_L0(UtfHelperTest,ConvertUtf8ToUtf16Pair)423 HWTEST_F_L0(UtfHelperTest, ConvertUtf8ToUtf16Pair)
424 {
425     // code point lie in [0x00, 0x7F], the length of utf8 code element byte is 1
426     uint8_t utf8Value1[1] = {0x00};
427     uint8_t utf8Value2[1] = {UTF8_1B_MAX};
428     const uint8_t *utf8ValuePtr1 = utf8Value1;
429     const uint8_t *utf8ValuePtr2 = utf8Value2;
430     std::pair<uint32_t, size_t> utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr1);
431     std::pair<uint32_t, size_t> utf16Value = {utf8Value1[0], 1};
432     EXPECT_EQ(utf16Res, utf16Value);
433     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr2);
434     utf16Value = {utf8Value2[0], 1};
435     EXPECT_EQ(utf16Res, utf16Value);
436     // code point lie in [0x80, 0x7FF], the length of utf8 code element byte is 2
437     uint8_t utf8Value3[2] = {0xc2, 0x80}; // 0x80
438     uint8_t utf8Value4[2] = {0xDF, 0xBF}; // 0x7FF
439     const uint8_t *utf8ValuePtr3 = utf8Value3;
440     const uint8_t *utf8ValuePtr4 = utf8Value4;
441     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr3);
442     utf16Value = {0x80, 2};
443     EXPECT_EQ(utf16Res, utf16Value);
444     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr4);
445     utf16Value = {0x7FF, 2};
446     EXPECT_EQ(utf16Res, utf16Value);
447 
448     // code point lie in [0x800, 0xD7FF] or [0xDC00,0xFFFF], the length of utf8 code element byte is 3.
449     // when code point lie in [0xD800, 0xDBFF], due to the use of UCS-2, it corresponds to 3 utf8 symbols.
450     uint8_t utf8Value5[3] = {0xE0, 0xA0, 0x80}; // 0x800
451     uint8_t utf8Value6[3] = {0xEF, 0xBF, 0xBF}; // 0xFFFF
452     const uint8_t *utf8ValuePtr5 = utf8Value5;
453     const uint8_t *utf8ValuePtr6 = utf8Value6;
454     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr5);
455     utf16Value = {0x800, 3};
456     EXPECT_EQ(utf16Res, utf16Value);
457     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr6);
458     utf16Value = {0xFFFF, 3};
459     EXPECT_EQ(utf16Res, utf16Value);
460     // code point lie in [0x10000, 0x10FFFF], the length of utf8 code element byte is 4.
461     uint8_t utf8Value9[4] = {0xF0, 0x90, 0x80, 0x80}; // 0x10000
462     uint8_t utf8Value10[4] = {0xF4, 0x8F, 0xBF, 0xBF}; // 0x10FFFF
463     const uint8_t *utf8ValuePtr9 = utf8Value9;
464     const uint8_t *utf8ValuePtr10 = utf8Value10;
465     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr9);
466     utf16Value = {0xD800 << 16 | 0xDC00U, 4};
467     EXPECT_EQ(utf16Res, utf16Value);
468     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr10);
469     utf16Value = {0xDBFF << 16 | 0xDFFF, 4};
470     EXPECT_EQ(utf16Res, utf16Value);
471     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr9, true);
472     utf16Value = {0x10000, 4};
473     EXPECT_EQ(utf16Res, utf16Value);
474     utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr10, true);
475     utf16Value = {0x10FFFF, 4};
476     EXPECT_EQ(utf16Res, utf16Value);
477 }
478 
479 /*
480 * @tc.name: Utf8ToUtf16Size
481 * @tc.desc: Enter a string of UTF8 coded sequences and return the length of the sequence converted into UTF16 coded
482 *           sequences.
483 * @tc.type: FUNC
484 */
HWTEST_F_L0(UtfHelperTest,Utf8ToUtf16Size)485 HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size)
486 {
487     // when code point lie in (0x00, 0xFFFF], the required utf16 code element length is 1.
488     uint8_t utf8ValueOneByteMin[1] = {0x00};
489     uint8_t utf8ValueOneByteMax[4] = {0xEF, 0xBF, 0xBF, 0x00}; // 0xFFFF
490     const uint8_t *utf8ValueOneByteMinPtr = utf8ValueOneByteMin;
491     const uint8_t *utf8ValueOneByteMaxPtr = utf8ValueOneByteMax;
492     size_t length = Utf8ToUtf16Size(utf8ValueOneByteMinPtr, sizeof(utf8ValueOneByteMin));
493     EXPECT_EQ(length, 1U);
494     length = Utf8ToUtf16Size(utf8ValueOneByteMaxPtr, sizeof(utf8ValueOneByteMax));
495     EXPECT_EQ(length, 2U);
496     // when code point lie in [0x10000, 0x10FFFF], the required utf16 code element length is 2.
497     const uint8_t utf8ValueTwoBytesMin[5] = {0xF0, 0x90, 0x80, 0x80, 0x00}; // 0x10000
498     const uint8_t utf8ValueTwoBytesMax[5] = {0xF4, 0x8F, 0xBF, 0xBF, 0x00}; // 0x10FFFF
499     const uint8_t *utf8ValueTwoBytesMinPtr = utf8ValueTwoBytesMin;
500     const uint8_t *utf8ValueTwoBytesMaxPtr = utf8ValueTwoBytesMax;
501     length = Utf8ToUtf16Size(utf8ValueTwoBytesMinPtr, sizeof(utf8ValueTwoBytesMin));
502     EXPECT_EQ(length, 3U);
503     length = Utf8ToUtf16Size(utf8ValueTwoBytesMaxPtr, sizeof(utf8ValueTwoBytesMax));
504     EXPECT_EQ(length, 3U);
505     uint8_t utf8Value[12] = {
506         0xEF, 0xBF, 0xBF, 0xF0,
507         0x90, 0x80, 0x80, 0xF4,
508         0x8F, 0xBF, 0xBF, 0x00};
509     const uint8_t *utf8ValuePtr = utf8Value;
510     length = Utf8ToUtf16Size(utf8ValuePtr, sizeof(utf8Value));
511     EXPECT_EQ(length, 6U);
512 }
513 
514 /*
515 * @tc.name: ConvertRegionUtf16ToUtf8
516 * @tc.desc: Input aUTF16-encoded sequence (thelength is "utf16Len"), convert part of the sequence into a UTF8-encoded
517 *           sequence, and save it to "utf8Out"(the maximum length is "utf8Len"). The start parameter indicates the
518 *           start position of the conversion. Whether to perform special processing for O in the "modify" parameter.
519 * @tc.type: FUNC
520 */
HWTEST_F_L0(UtfHelperTest,ConvertRegionUtf16ToUtf8)521 HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf16ToUtf8)
522 {
523     size_t utf16Len = 8;
524     size_t utf8Len = 100;
525     size_t start = 0;
526     bool modify = false;
527     uint16_t utf16Value[8] = {
528         0x00, // 0 or 2 (special case for \u0000 ==> C080 - 1100'0000 1000'0000)
529         0x7F, // 1(0x00, 0x7F]
530         0x7FF, // 2 [0x80, 0x7FF]
531         0x800, // 3 [0x800, 0xD7FF]
532         0xD800, // 3 [0xD800, 0xDFFF]
533         0xFFFF, // 3 [0xE000, 0xFFFF]
534         0xD800, 0xDFFF}; // 4 [0x10000, 0x10FFFF]
535     const uint16_t *utf16ValuePtr = utf16Value;
536     uint8_t *utf8Out = (uint8_t*)malloc(utf8Len);
537     size_t utf8Pos = ConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
538     // 0 + 1 + 2 +(3 *3)+ 4= 16
539     EXPECT_EQ(utf8Pos, 16U);
540     // 2 + 1 + 2 +(3 * 3)+ 4 = 18
541     modify = true;
542     utf8Pos = ConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
543     EXPECT_EQ(utf8Pos, 18U);
544     free(utf8Out);
545 }
546 
HWTEST_F_L0(UtfHelperTest,DebuggerConvertRegionUtf16ToUtf8)547 HWTEST_F_L0(UtfHelperTest, DebuggerConvertRegionUtf16ToUtf8)
548 {
549     size_t utf16Len = 8;
550     size_t utf8Len = 100;
551     size_t start = 0;
552     bool modify = false;
553     uint16_t utf16Value[8] = {
554         0x00, // 0 or 2 (special case for \u0000 ==> C080 - 1100'0000 1000'0000)
555         0x7F, // 1(0x00, 0x7F]
556         0x7FF, // 2 [0x80, 0x7FF]
557         0x800, // 3 [0x800, 0xD7FF]
558         0xD800, // 3 [0xD800, 0xDFFF]  ---> replace by 0xFFFD
559         0xFFFF, // 3 [0xE000, 0xFFFF]  ---> replace by 0xFFFD
560         0xD800, 0xDFFF}; // 4 [0x10000, 0x10FFFF]
561     const uint16_t *utf16ValuePtr = utf16Value;
562     uint8_t *utf8Out = (uint8_t*)malloc(utf8Len);
563     size_t utf8Pos = DebuggerConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
564     // 0 + 1 + 2 +(3 * 3)+ 4 = 16
565     EXPECT_EQ(utf8Pos, 16U);
566     // 2 + 1 + 2 +(3 * 3)+ 4 = 18
567     modify = true;
568     utf8Pos = DebuggerConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
569     EXPECT_EQ(utf8Pos, 18U);
570     free(utf8Out);
571 }
572 
573 /*
574 * @tc.name: ConvertRegionUtf8ToUtf16
575 * @tc.desc: Input a UTF8-encoded sequence, convert part of the sequence into a UTF8-encoded sequence, and save it to
576 *           "utf16Out"(the maximum length is "utf16Len"), The start parameter indicates the start position of the
577 *           conversion.
578 * @tc.type: FUNC
579 */
HWTEST_F_L0(UtfHelperTest,ConvertRegionUtf8ToUtf16)580 HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16)
581 {
582     size_t utf16Len = 100;
583     size_t start = 0;
584     uint8_t utf8Value[10] = {
585         0x7F, // 1-length UTF16 encoding
586         0xDF, 0xBF, // 1-length UTF16 encoding
587         0xEF, 0xBF, 0xBF, // 1-length UTF16 encoding
588         0xF4, 0x8F, 0xBF, 0xBF}; // 2-length UTF16 encoding
589     const uint8_t *utf8ValuePtr = utf8Value;
590     uint16_t *utf16Out = (uint16_t*)malloc(utf16Len);
591     size_t outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
592     // 1 + 1 + 1 + 2 = 5s
593     EXPECT_EQ(outPos, 5U);
594     // 1 + 2 = 3
595     start = 3;
596     outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
597     EXPECT_EQ(outPos, 3U);
598 
599     // When "start" is in the middle of a symbol sequence
600     start = 2;
601     outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
602     EXPECT_EQ(outPos, 0U);
603     start = 4;
604     outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
605     EXPECT_EQ(outPos, 0U);
606     start = 7;
607     outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
608     EXPECT_EQ(outPos, 0U);
609     free(utf16Out);
610 }
611 
612 /*
613 * @tc.name: ConvertUtf8ToUnicodeChar
614 * @tc.desc: Converts a UTF8 encoding sequence encoding a character into a unicode point, and returns the
615 *           unicode point and the byte length of the utf8 encoding sequence.
616 * @tc.type: FUNC
617 */
HWTEST_F_L0(UtfHelperTest,ConvertUtf8ToUnicodeChar)618 HWTEST_F_L0(UtfHelperTest, ConvertUtf8ToUnicodeChar)
619 {
620     std::pair<int32_t, size_t> invalidValue = {INVALID_UTF8, 0};
621     // utf-8 is one byte, code point lie in [0x00, 0x7F]
622     uint8_t utf8Value1[1] = {0x00}; // 0x00
623     uint8_t utf8Value2[1] = {0x7F}; // 0x7F
624     const uint8_t *utf8ValuePtr1 = utf8Value1;
625     const uint8_t *utf8ValuePtr2 = utf8Value2;
626     std::pair<int32_t, size_t> unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr1, UtfLength::ONE);
627     std::pair<int32_t, size_t> unicodeValue = {0x00, 1};
628     EXPECT_EQ(unicodeRes, unicodeValue);
629     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr2, UtfLength::ONE);
630     unicodeValue = {0x7F, 1};
631     EXPECT_EQ(unicodeRes, unicodeValue);
632     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr2, 0);
633     EXPECT_EQ(unicodeRes, invalidValue);
634 
635     // utf-8 is two bytes, code point lie in [0x80, 0x7FF]
636     uint8_t utf8Value3[2] = {0xC2, 0x80}; // 0x80
637     uint8_t utf8Value4[2] = {0xDF, 0xBF}; // 0x7FF
638     const uint8_t *utf8ValuePtr3 = utf8Value3;
639     const uint8_t *utf8ValuePtr4 = utf8Value4;
640     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr3, UtfLength::TWO);
641     unicodeValue = {0x80, 2};
642     EXPECT_EQ(unicodeRes, unicodeValue);
643     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr4, UtfLength::TWO);
644     unicodeValue = {0x7FF, 2};
645     EXPECT_EQ(unicodeRes, unicodeValue);
646     uint8_t utf8Value5[2] = {0xD0, 0x00}; // invalid
647     const uint8_t *utf8ValuePtr5 = utf8Value5;
648     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr5, UtfLength::TWO);
649     EXPECT_EQ(unicodeRes, invalidValue);
650     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr4, UtfLength::ONE);
651     EXPECT_EQ(unicodeRes, invalidValue);
652 
653     // utf-8 is three bytes, code point lie in [0x800, 0xFFFF]
654     uint8_t utf8Value6[3] = {0xE0, 0xA0, 0x80}; // 0x800
655     uint8_t utf8Value7[3] = {0xED, 0x9F, 0xBF}; // 0xD7FF
656     const uint8_t *utf8ValuePtr6 = utf8Value6;
657     const uint8_t *utf8ValuePtr7 = utf8Value7;
658     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr6, UtfLength::THREE);
659     unicodeValue = {0x800, 3};
660     EXPECT_EQ(unicodeRes, unicodeValue);
661     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr7, UtfLength::THREE);
662     unicodeValue = {0xD7FF, 3};
663     EXPECT_EQ(unicodeRes, unicodeValue);
664     uint8_t utf8Value8[3] = {0xEB, 0x80, 0x40}; // invalid
665     const uint8_t *utf8ValuePtr8 = utf8Value8;
666     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr8, UtfLength::THREE);
667     EXPECT_EQ(unicodeRes, invalidValue);
668     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr7, UtfLength::TWO);
669     EXPECT_EQ(unicodeRes, invalidValue);
670 
671     // utf-8 is four bytes, code point lie in [0x10000, 0x10FFFF].
672     uint8_t utf8Value9[4] = {0xF0, 0x90, 0x80, 0x80}; // 0x10000
673     uint8_t utf8Value10[4] = {0xF4, 0x8F, 0xBF, 0xBF}; // 0x10FFFF
674     const uint8_t *utf8ValuePtr9 = utf8Value9;
675     const uint8_t *utf8ValuePtr10 = utf8Value10;
676     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr9, UtfLength::FOUR);
677     unicodeValue = {0x10000, 4};
678     EXPECT_EQ(unicodeRes, unicodeValue);
679     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr10, UtfLength::FOUR);
680     unicodeValue = {0x10FFFF, 4};
681     EXPECT_EQ(unicodeRes, unicodeValue);
682     uint8_t utf8Value11[4] = {0xF4, 0x80, 0x80, 0x40}; // invalid
683     const uint8_t *utf8ValuePtr11 = utf8Value11;
684     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr11, UtfLength::FOUR);
685     EXPECT_EQ(unicodeRes, invalidValue);
686     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr10, UtfLength::THREE);
687     EXPECT_EQ(unicodeRes, invalidValue);
688 
689     // other exception
690     uint8_t utf8Value12[2] = {0x90, 0x00}; // invalid
691     const uint8_t *utf8ValuePtr12 = utf8Value12;
692     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr12, UtfLength::FOUR);
693     EXPECT_EQ(unicodeRes, invalidValue);
694     uint8_t utf8Value13[2] = {0xF8, 0x00}; // invalid
695     const uint8_t *utf8ValuePtr13 = utf8Value13;
696     unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr13, UtfLength::FOUR);
697     EXPECT_EQ(unicodeRes, invalidValue);
698 }
699 } // namespace panda:test
700