• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "utils/utf.h"
17 
18 #include <cstdint>
19 
20 #include <vector>
21 
22 #include <gtest/gtest.h>
23 
24 namespace panda::utf::test {
25 
26 HWTEST(Utf, ConvertMUtf8ToUtf16, testing::ext::TestSize.Level0)
27 {
28     // 2-byte mutf-8 U+0000
29     {
30         const std::vector<uint8_t> in {0xc0, 0x80, 0x00};
31         const std::vector<uint16_t> res {0x0};
32         std::vector<uint16_t> out(res.size());
33         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
34         EXPECT_EQ(out, res);
35     }
36 
37     // 1-byte mutf-8: 0xxxxxxx
38     {
39         const std::vector<uint8_t> in {0x7f, 0x00};
40         const std::vector<uint16_t> res {0x7f};
41         std::vector<uint16_t> out(res.size());
42         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
43         EXPECT_EQ(out, res);
44     }
45 
46     // 2-byte mutf-8: 110xxxxx 10xxxxxx
47     {
48         const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00};
49         const std::vector<uint16_t> res {0xa7, 0x33};
50         std::vector<uint16_t> out(res.size());
51         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
52         EXPECT_EQ(out, res);
53     }
54 
55     // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
56     {
57         const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00};
58         const std::vector<uint16_t> res {0xffc3, 0x33};
59         std::vector<uint16_t> out(res.size());
60         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
61         EXPECT_EQ(out, res);
62     }
63 
64     // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
65     {
66         const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00};
67         const std::vector<uint16_t> res {0xd801, 0xdc37};
68         std::vector<uint16_t> out(res.size());
69         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
70         EXPECT_EQ(out, res);
71     }
72 
73     {
74         const std::vector<uint8_t> in {0x5b, 0x61, 0x62, 0x63, 0xed, 0xa3, 0x92, 0x5d, 0x00};
75         const std::vector<uint16_t> res {0x5b, 0x61, 0x62, 0x63, 0xd8d2, 0x5d};
76         std::vector<uint16_t> out(res.size());
77         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
78         EXPECT_EQ(out, res);
79     }
80 
81     {
82         const std::vector<uint8_t> in {0xF0, 0x9F, 0x91, 0xB3, 0x00};
83         const std::vector<uint16_t> res {0xD83D, 0xDC73};
84         std::vector<uint16_t> out(res.size());
85         ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
86         EXPECT_EQ(out, res);
87     }
88 }
89 
90 HWTEST(Utf, Utf16ToMUtf8Size, testing::ext::TestSize.Level0)
91 {
92     // 2-byte mutf-8 U+0000
93     {
94         const std::vector<uint16_t> in {0x0};
95         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
96         EXPECT_EQ(res, 3U);
97     }
98 
99     // 1-byte mutf-8: 0xxxxxxx
100     {
101         const std::vector<uint16_t> in {0x7f};
102         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
103         EXPECT_EQ(res, 2U);
104     }
105 
106     {
107         const std::vector<uint16_t> in {0x7f};
108         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
109         EXPECT_EQ(res, 2U);
110     }
111 
112     // 2-byte mutf-8: 110xxxxx 10xxxxxx
113     {
114         const std::vector<uint16_t> in {0xa7, 0x33};
115         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
116         EXPECT_EQ(res, 4U);
117     }
118 
119     // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
120     {
121         const std::vector<uint16_t> in {0xffc3, 0x33};
122         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
123         EXPECT_EQ(res, 5U);
124     }
125 
126     // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
127     {
128         const std::vector<uint16_t> in {0xd801, 0xdc37};
129         size_t res = Utf16ToMUtf8Size(in.data(), in.size());
130         EXPECT_EQ(res, 5U);
131     }
132 }
133 
134 HWTEST(Utf, ConvertRegionUtf16ToMUtf8, testing::ext::TestSize.Level0)
135 {
136     // 2-byte mutf-8 U+0000
137     {
138         const std::vector<uint16_t> in {0x0};
139         const std::vector<uint8_t> res {0xc0, 0x80, 0x00};
140         std::vector<uint8_t> out(res.size());
141         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
142         EXPECT_EQ(sz, 2U);
143         out[out.size() - 1] = '\0';
144         EXPECT_EQ(out, res);
145     }
146 
147     // 1-byte mutf-8: 0xxxxxxx
148     {
149         const std::vector<uint16_t> in {0x7f};
150         const std::vector<uint8_t> res {0x7f, 0x00};
151         std::vector<uint8_t> out(res.size());
152         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
153         EXPECT_EQ(sz, 1U);
154         out[out.size() - 1] = '\0';
155         EXPECT_EQ(out, res);
156     }
157 
158     // 2-byte mutf-8: 110xxxxx 10xxxxxx
159     {
160         const std::vector<uint16_t> in {0xa7, 0x33};
161         const std::vector<uint8_t> res {0xc2, 0xa7, 0x33, 0x00};
162         std::vector<uint8_t> out(res.size());
163         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
164         EXPECT_EQ(sz, 3U);
165         out[out.size() - 1] = '\0';
166         EXPECT_EQ(out, res);
167     }
168 
169     // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
170     {
171         const std::vector<uint16_t> in {0xffc3, 0x33};
172         const std::vector<uint8_t> res {0xef, 0xbf, 0x83, 0x33, 0x00};
173         std::vector<uint8_t> out(res.size());
174         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
175         EXPECT_EQ(sz, 4U);
176         out[out.size() - 1] = '\0';
177         EXPECT_EQ(out, res);
178     }
179 
180     // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
181     // utf-16 data in 0xd800-0xdfff
182     {
183         const std::vector<uint16_t> in {0xd834, 0x33};
184         const std::vector<uint8_t> res {0xed, 0xa0, 0xb4, 0x33, 0x00};
185         std::vector<uint8_t> out(res.size());
186         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
187         EXPECT_EQ(sz, 4U);
188         out[out.size() - 1] = '\0';
189         EXPECT_EQ(out, res);
190     }
191 
192     // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
193     // utf-16 data in 0xd800-0xdfff
194     {
195         const std::vector<uint16_t> in {0xdf06, 0x33};
196         const std::vector<uint8_t> res {0xed, 0xbc, 0x86, 0x33, 0x00};
197         std::vector<uint8_t> out(res.size());
198         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
199         EXPECT_EQ(sz, 4U);
200         out[out.size() - 1] = '\0';
201         EXPECT_EQ(out, res);
202     }
203 
204     // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
205     {
206         const std::vector<uint16_t> in {0xd801, 0xdc37};
207         const std::vector<uint8_t> res {0xf0, 0x90, 0x90, 0xb7, 0x00};
208         std::vector<uint8_t> out(res.size());
209         size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
210         EXPECT_EQ(sz, 4U);
211         out[out.size() - 1] = '\0';
212         EXPECT_EQ(out, res);
213     }
214 }
215 
216 HWTEST(Utf, CompareMUtf8ToMUtf8, testing::ext::TestSize.Level0)
217 {
218     // 1-byte utf-8: 0xxxxxxx
219     {
220         const std::vector<uint8_t> v1 {0x00};
221         const std::vector<uint8_t> v2 {0x7f, 0x00};
222         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
223     }
224 
225     {
226         const std::vector<uint8_t> v1 {0x02, 0x00};
227         const std::vector<uint8_t> v2 {0x00};
228         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
229     }
230 
231     {
232         const std::vector<uint8_t> v1 {0x7f, 0x00};
233         const std::vector<uint8_t> v2 {0x7f, 0x00};
234         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
235     }
236 
237     {
238         const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00};
239         const std::vector<uint8_t> v2 {0x01, 0x70, 0x00};
240         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
241     }
242 
243     {
244         const std::vector<uint8_t> v1 {0x01, 0x71, 0x00};
245         const std::vector<uint8_t> v2 {0x01, 0x73, 0x00};
246         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
247     }
248 
249     // 2-byte utf-8: 110xxxxx 10xxxxxx
250     {
251         const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
252         const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
253         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
254     }
255 
256     {
257         const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00};
258         const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00};
259         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
260     }
261 
262     {
263         const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00};
264         const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00};
265         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
266     }
267 
268     // 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx
269     {
270         const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00};
271         const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00};
272         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
273     }
274 
275     {
276         const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00};
277         const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00};
278         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
279     }
280 
281     {
282         const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00};
283         const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00};
284         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
285     }
286 
287     // 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
288     {
289         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
290         const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
291         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
292     }
293 
294     {
295         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00};
296         const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
297         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
298     }
299 
300     {
301         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
302         const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00};
303         EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
304     }
305 }
306 
307 HWTEST(Utf, CompareUtf8ToUtf8, testing::ext::TestSize.Level0)
308 {
309     // 1-byte utf-8: 0xxxxxxx
310     {
311         const std::vector<uint8_t> v1 {0x00};
312         const std::vector<uint8_t> v2 {0x7f, 0x00};
313         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
314     }
315 
316     {
317         const std::vector<uint8_t> v1 {0x02, 0x00};
318         const std::vector<uint8_t> v2 {0x00};
319         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
320     }
321 
322     {
323         const std::vector<uint8_t> v1 {0x7f, 0x00};
324         const std::vector<uint8_t> v2 {0x7f, 0x00};
325         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
326     }
327 
328     {
329         const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00};
330         const std::vector<uint8_t> v2 {0x01, 0x70, 0x00};
331         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
332     }
333 
334     {
335         const std::vector<uint8_t> v1 {0x01, 0x71, 0x00};
336         const std::vector<uint8_t> v2 {0x01, 0x73, 0x00};
337         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
338     }
339 
340     // 2-byte utf-8: 110xxxxx 10xxxxxx
341     {
342         const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
343         const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
344         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
345     }
346 
347     {
348         const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00};
349         const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00};
350         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
351     }
352 
353     {
354         const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00};
355         const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00};
356         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
357     }
358 
359     // 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx
360     {
361         const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00};
362         const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00};
363         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
364     }
365 
366     {
367         const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00};
368         const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00};
369         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
370     }
371 
372     {
373         const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00};
374         const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00};
375         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
376     }
377 
378     // 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
379     {
380         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
381         const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
382         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
383     }
384 
385     {
386         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00};
387         const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
388         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
389     }
390 
391     {
392         const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
393         const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00};
394         EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
395     }
396 }
397 
398 HWTEST(Utf, IsMUtf8OnlySingleBytes, testing::ext::TestSize.Level0)
399 {
400     const std::vector<uint8_t> v1 {0x02, 0x00};
401     EXPECT_TRUE(IsMUtf8OnlySingleBytes(v1.data()));
402 
403     const std::vector<uint8_t> v2 {0x90, 0x00};
404     EXPECT_FALSE(IsMUtf8OnlySingleBytes(v2.data()));
405 }
406 
407 HWTEST(Utf, IsValidModifiedUTF8, testing::ext::TestSize.Level0)
408 {
409     const std::vector<uint8_t> v1 {0x31, 0x00};
410     EXPECT_TRUE(IsValidModifiedUTF8(v1.data()));
411 
412     const std::vector<uint8_t> v2 {0x9f, 0x00};
413     EXPECT_FALSE(IsValidModifiedUTF8(v2.data()));
414 
415     const std::vector<uint8_t> v3 {0xf7, 0x00};
416     EXPECT_FALSE(IsValidModifiedUTF8(v3.data()));
417 
418     const std::vector<uint8_t> v4 {0xe0, 0x00};
419     EXPECT_FALSE(IsValidModifiedUTF8(v4.data()));
420 
421     const std::vector<uint8_t> v5 {0xd4, 0x00};
422     EXPECT_FALSE(IsValidModifiedUTF8(v5.data()));
423 
424     const std::vector<uint8_t> v6 {0x11, 0x31, 0x00};
425     EXPECT_TRUE(IsValidModifiedUTF8(v6.data()));
426 
427     const std::vector<uint8_t> v7 {0xf8, 0x00};
428     EXPECT_FALSE(IsValidModifiedUTF8(v7.data()));
429 }
430 
431 HWTEST(Utf, ConvertMUtf8ToUtf16Pair, testing::ext::TestSize.Level0)
432 {
433     const uint8_t data = 0x11;
434     std::pair<uint32_t, size_t> p1 = ConvertMUtf8ToUtf16Pair(&data, 2U);
435     ASSERT_EQ(17U, p1.first);
436     ASSERT_EQ(1U, p1.second);
437 
438     std::pair<uint32_t, size_t> p2 = ConvertMUtf8ToUtf16Pair(&data, 3U);
439     ASSERT_EQ(17U, p2.first);
440     ASSERT_EQ(1U, p2.second);
441 }
442 
443 HWTEST(Utf, IsEqualTest, testing::ext::TestSize.Level0)
444 {
445     {
446         const std::vector<uint8_t> v1 {0x7f, 0x00};
447         const std::vector<uint8_t> v2 {0x7f, 0x00};
448         Span<const uint8_t> utf8_1(v1.data(), v1.size());
449         Span<const uint8_t> utf8_2(v2.data(), v2.size());
450         ASSERT_TRUE(IsEqual(utf8_1, utf8_2));
451     }
452 
453     {
454         const std::vector<uint8_t> v1 {0x7f, 0x7f, 0x00};
455         const std::vector<uint8_t> v2 {0x7f, 0x00};
456         Span<const uint8_t> utf8_1(v1.data(), v1.size());
457         Span<const uint8_t> utf8_2(v2.data(), v2.size());
458         ASSERT_FALSE(IsEqual(utf8_1, utf8_2));
459     }
460 
461     {
462         const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
463         const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
464         EXPECT_TRUE(IsEqual(v1.data(), v2.data()));
465     }
466 }
467 
468 }  // namespace panda::utf::test
469