1 /* 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "utils/utf.h" 17 18 #include <cstdint> 19 20 #include <vector> 21 22 #include <gtest/gtest.h> 23 24 namespace panda::utf::test { 25 26 HWTEST(Utf, ConvertMUtf8ToUtf16, testing::ext::TestSize.Level0) 27 { 28 // 2-byte mutf-8 U+0000 29 { 30 const std::vector<uint8_t> in {0xc0, 0x80, 0x00}; 31 const std::vector<uint16_t> res {0x0}; 32 std::vector<uint16_t> out(res.size()); 33 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 34 EXPECT_EQ(out, res); 35 } 36 37 // 1-byte mutf-8: 0xxxxxxx 38 { 39 const std::vector<uint8_t> in {0x7f, 0x00}; 40 const std::vector<uint16_t> res {0x7f}; 41 std::vector<uint16_t> out(res.size()); 42 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 43 EXPECT_EQ(out, res); 44 } 45 46 // 2-byte mutf-8: 110xxxxx 10xxxxxx 47 { 48 const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00}; 49 const std::vector<uint16_t> res {0xa7, 0x33}; 50 std::vector<uint16_t> out(res.size()); 51 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 52 EXPECT_EQ(out, res); 53 } 54 55 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 56 { 57 const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00}; 58 const std::vector<uint16_t> res {0xffc3, 0x33}; 59 std::vector<uint16_t> out(res.size()); 60 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 61 EXPECT_EQ(out, res); 62 } 63 64 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 65 { 66 const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00}; 67 const std::vector<uint16_t> res {0xd801, 0xdc37}; 68 std::vector<uint16_t> out(res.size()); 69 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 70 EXPECT_EQ(out, res); 71 } 72 73 { 74 const std::vector<uint8_t> in {0x5b, 0x61, 0x62, 0x63, 0xed, 0xa3, 0x92, 0x5d, 0x00}; 75 const std::vector<uint16_t> res {0x5b, 0x61, 0x62, 0x63, 0xd8d2, 0x5d}; 76 std::vector<uint16_t> out(res.size()); 77 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 78 EXPECT_EQ(out, res); 79 } 80 81 { 82 const std::vector<uint8_t> in {0xF0, 0x9F, 0x91, 0xB3, 0x00}; 83 const std::vector<uint16_t> res {0xD83D, 0xDC73}; 84 std::vector<uint16_t> out(res.size()); 85 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 86 EXPECT_EQ(out, res); 87 } 88 } 89 90 HWTEST(Utf, Utf16ToMUtf8Size, testing::ext::TestSize.Level0) 91 { 92 // 2-byte mutf-8 U+0000 93 { 94 const std::vector<uint16_t> in {0x0}; 95 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 96 EXPECT_EQ(res, 3U); 97 } 98 99 // 1-byte mutf-8: 0xxxxxxx 100 { 101 const std::vector<uint16_t> in {0x7f}; 102 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 103 EXPECT_EQ(res, 2U); 104 } 105 106 { 107 const std::vector<uint16_t> in {0x7f}; 108 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 109 EXPECT_EQ(res, 2U); 110 } 111 112 // 2-byte mutf-8: 110xxxxx 10xxxxxx 113 { 114 const std::vector<uint16_t> in {0xa7, 0x33}; 115 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 116 EXPECT_EQ(res, 4U); 117 } 118 119 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 120 { 121 const std::vector<uint16_t> in {0xffc3, 0x33}; 122 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 123 EXPECT_EQ(res, 5U); 124 } 125 126 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 127 { 128 const std::vector<uint16_t> in {0xd801, 0xdc37}; 129 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 130 EXPECT_EQ(res, 5U); 131 } 132 } 133 134 HWTEST(Utf, ConvertRegionUtf16ToMUtf8, testing::ext::TestSize.Level0) 135 { 136 // 2-byte mutf-8 U+0000 137 { 138 const std::vector<uint16_t> in {0x0}; 139 const std::vector<uint8_t> res {0xc0, 0x80, 0x00}; 140 std::vector<uint8_t> out(res.size()); 141 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 142 EXPECT_EQ(sz, 2U); 143 out[out.size() - 1] = '\0'; 144 EXPECT_EQ(out, res); 145 } 146 147 // 1-byte mutf-8: 0xxxxxxx 148 { 149 const std::vector<uint16_t> in {0x7f}; 150 const std::vector<uint8_t> res {0x7f, 0x00}; 151 std::vector<uint8_t> out(res.size()); 152 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 153 EXPECT_EQ(sz, 1U); 154 out[out.size() - 1] = '\0'; 155 EXPECT_EQ(out, res); 156 } 157 158 // 2-byte mutf-8: 110xxxxx 10xxxxxx 159 { 160 const std::vector<uint16_t> in {0xa7, 0x33}; 161 const std::vector<uint8_t> res {0xc2, 0xa7, 0x33, 0x00}; 162 std::vector<uint8_t> out(res.size()); 163 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 164 EXPECT_EQ(sz, 3U); 165 out[out.size() - 1] = '\0'; 166 EXPECT_EQ(out, res); 167 } 168 169 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 170 { 171 const std::vector<uint16_t> in {0xffc3, 0x33}; 172 const std::vector<uint8_t> res {0xef, 0xbf, 0x83, 0x33, 0x00}; 173 std::vector<uint8_t> out(res.size()); 174 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 175 EXPECT_EQ(sz, 4U); 176 out[out.size() - 1] = '\0'; 177 EXPECT_EQ(out, res); 178 } 179 180 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 181 // utf-16 data in 0xd800-0xdfff 182 { 183 const std::vector<uint16_t> in {0xd834, 0x33}; 184 const std::vector<uint8_t> res {0xed, 0xa0, 0xb4, 0x33, 0x00}; 185 std::vector<uint8_t> out(res.size()); 186 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 187 EXPECT_EQ(sz, 4U); 188 out[out.size() - 1] = '\0'; 189 EXPECT_EQ(out, res); 190 } 191 192 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 193 // utf-16 data in 0xd800-0xdfff 194 { 195 const std::vector<uint16_t> in {0xdf06, 0x33}; 196 const std::vector<uint8_t> res {0xed, 0xbc, 0x86, 0x33, 0x00}; 197 std::vector<uint8_t> out(res.size()); 198 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 199 EXPECT_EQ(sz, 4U); 200 out[out.size() - 1] = '\0'; 201 EXPECT_EQ(out, res); 202 } 203 204 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 205 { 206 const std::vector<uint16_t> in {0xd801, 0xdc37}; 207 const std::vector<uint8_t> res {0xf0, 0x90, 0x90, 0xb7, 0x00}; 208 std::vector<uint8_t> out(res.size()); 209 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 210 EXPECT_EQ(sz, 4U); 211 out[out.size() - 1] = '\0'; 212 EXPECT_EQ(out, res); 213 } 214 } 215 216 HWTEST(Utf, CompareMUtf8ToMUtf8, testing::ext::TestSize.Level0) 217 { 218 // 1-byte utf-8: 0xxxxxxx 219 { 220 const std::vector<uint8_t> v1 {0x00}; 221 const std::vector<uint8_t> v2 {0x7f, 0x00}; 222 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 223 } 224 225 { 226 const std::vector<uint8_t> v1 {0x02, 0x00}; 227 const std::vector<uint8_t> v2 {0x00}; 228 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 229 } 230 231 { 232 const std::vector<uint8_t> v1 {0x7f, 0x00}; 233 const std::vector<uint8_t> v2 {0x7f, 0x00}; 234 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 235 } 236 237 { 238 const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00}; 239 const std::vector<uint8_t> v2 {0x01, 0x70, 0x00}; 240 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 241 } 242 243 { 244 const std::vector<uint8_t> v1 {0x01, 0x71, 0x00}; 245 const std::vector<uint8_t> v2 {0x01, 0x73, 0x00}; 246 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 247 } 248 249 // 2-byte utf-8: 110xxxxx 10xxxxxx 250 { 251 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 252 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 253 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 254 } 255 256 { 257 const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00}; 258 const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00}; 259 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 260 } 261 262 { 263 const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00}; 264 const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00}; 265 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 266 } 267 268 // 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx 269 { 270 const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00}; 271 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00}; 272 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 273 } 274 275 { 276 const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00}; 277 const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00}; 278 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 279 } 280 281 { 282 const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00}; 283 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00}; 284 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 285 } 286 287 // 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 288 { 289 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 290 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 291 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 292 } 293 294 { 295 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00}; 296 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 297 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 298 } 299 300 { 301 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 302 const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 303 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 304 } 305 } 306 307 HWTEST(Utf, CompareUtf8ToUtf8, testing::ext::TestSize.Level0) 308 { 309 // 1-byte utf-8: 0xxxxxxx 310 { 311 const std::vector<uint8_t> v1 {0x00}; 312 const std::vector<uint8_t> v2 {0x7f, 0x00}; 313 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 314 } 315 316 { 317 const std::vector<uint8_t> v1 {0x02, 0x00}; 318 const std::vector<uint8_t> v2 {0x00}; 319 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 320 } 321 322 { 323 const std::vector<uint8_t> v1 {0x7f, 0x00}; 324 const std::vector<uint8_t> v2 {0x7f, 0x00}; 325 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 326 } 327 328 { 329 const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00}; 330 const std::vector<uint8_t> v2 {0x01, 0x70, 0x00}; 331 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 332 } 333 334 { 335 const std::vector<uint8_t> v1 {0x01, 0x71, 0x00}; 336 const std::vector<uint8_t> v2 {0x01, 0x73, 0x00}; 337 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 338 } 339 340 // 2-byte utf-8: 110xxxxx 10xxxxxx 341 { 342 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 343 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 344 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 345 } 346 347 { 348 const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00}; 349 const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00}; 350 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 351 } 352 353 { 354 const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00}; 355 const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00}; 356 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 357 } 358 359 // 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx 360 { 361 const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00}; 362 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00}; 363 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 364 } 365 366 { 367 const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00}; 368 const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00}; 369 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 370 } 371 372 { 373 const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00}; 374 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00}; 375 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 376 } 377 378 // 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 379 { 380 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 381 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 382 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 383 } 384 385 { 386 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00}; 387 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 388 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 389 } 390 391 { 392 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 393 const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 394 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 395 } 396 } 397 398 HWTEST(Utf, IsMUtf8OnlySingleBytes, testing::ext::TestSize.Level0) 399 { 400 const std::vector<uint8_t> v1 {0x02, 0x00}; 401 EXPECT_TRUE(IsMUtf8OnlySingleBytes(v1.data())); 402 403 const std::vector<uint8_t> v2 {0x90, 0x00}; 404 EXPECT_FALSE(IsMUtf8OnlySingleBytes(v2.data())); 405 } 406 407 HWTEST(Utf, IsValidModifiedUTF8, testing::ext::TestSize.Level0) 408 { 409 const std::vector<uint8_t> v1 {0x31, 0x00}; 410 EXPECT_TRUE(IsValidModifiedUTF8(v1.data())); 411 412 const std::vector<uint8_t> v2 {0x9f, 0x00}; 413 EXPECT_FALSE(IsValidModifiedUTF8(v2.data())); 414 415 const std::vector<uint8_t> v3 {0xf7, 0x00}; 416 EXPECT_FALSE(IsValidModifiedUTF8(v3.data())); 417 418 const std::vector<uint8_t> v4 {0xe0, 0x00}; 419 EXPECT_FALSE(IsValidModifiedUTF8(v4.data())); 420 421 const std::vector<uint8_t> v5 {0xd4, 0x00}; 422 EXPECT_FALSE(IsValidModifiedUTF8(v5.data())); 423 424 const std::vector<uint8_t> v6 {0x11, 0x31, 0x00}; 425 EXPECT_TRUE(IsValidModifiedUTF8(v6.data())); 426 427 const std::vector<uint8_t> v7 {0xf8, 0x00}; 428 EXPECT_FALSE(IsValidModifiedUTF8(v7.data())); 429 } 430 431 HWTEST(Utf, ConvertMUtf8ToUtf16Pair, testing::ext::TestSize.Level0) 432 { 433 const uint8_t data = 0x11; 434 std::pair<uint32_t, size_t> p1 = ConvertMUtf8ToUtf16Pair(&data, 2U); 435 ASSERT_EQ(17U, p1.first); 436 ASSERT_EQ(1U, p1.second); 437 438 std::pair<uint32_t, size_t> p2 = ConvertMUtf8ToUtf16Pair(&data, 3U); 439 ASSERT_EQ(17U, p2.first); 440 ASSERT_EQ(1U, p2.second); 441 } 442 443 HWTEST(Utf, IsEqualTest, testing::ext::TestSize.Level0) 444 { 445 { 446 const std::vector<uint8_t> v1 {0x7f, 0x00}; 447 const std::vector<uint8_t> v2 {0x7f, 0x00}; 448 Span<const uint8_t> utf8_1(v1.data(), v1.size()); 449 Span<const uint8_t> utf8_2(v2.data(), v2.size()); 450 ASSERT_TRUE(IsEqual(utf8_1, utf8_2)); 451 } 452 453 { 454 const std::vector<uint8_t> v1 {0x7f, 0x7f, 0x00}; 455 const std::vector<uint8_t> v2 {0x7f, 0x00}; 456 Span<const uint8_t> utf8_1(v1.data(), v1.size()); 457 Span<const uint8_t> utf8_2(v2.data(), v2.size()); 458 ASSERT_FALSE(IsEqual(utf8_1, utf8_2)); 459 } 460 461 { 462 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 463 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 464 EXPECT_TRUE(IsEqual(v1.data(), v2.data())); 465 } 466 } 467 468 } // namespace panda::utf::test 469