1 // 2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 // 7 // Official repository: https://github.com/boostorg/beast 8 // 9 10 // Test that header file is self-contained. 11 #include <boost/beast/websocket/detail/utf8_checker.hpp> 12 13 #include <boost/beast/core/buffers_suffix.hpp> 14 #include <boost/beast/core/multi_buffer.hpp> 15 #include <boost/beast/_experimental/unit_test/suite.hpp> 16 #include <array> 17 18 namespace boost { 19 namespace beast { 20 namespace websocket { 21 namespace detail { 22 23 class utf8_checker_test : public beast::unit_test::suite 24 { 25 public: 26 void testOneByteSequence()27 testOneByteSequence() 28 { 29 // valid single-char code points 30 for(unsigned char c = 0; c < 128; ++c) 31 { 32 utf8_checker u; 33 BEAST_EXPECT(u.write(&c, 1)); 34 BEAST_EXPECT(u.finish()); 35 } 36 37 // invalid lead bytes 38 for(unsigned char c = 128; c < 192; ++c) 39 { 40 utf8_checker u; 41 BEAST_EXPECT(! u.write(&c, 1)); 42 } 43 44 // two byte sequences 45 for(unsigned char c = 192; c < 224; ++c) 46 { 47 // fail fast 48 utf8_checker u; 49 if (c < 194) 50 BEAST_EXPECT(! u.write(&c, 1)); 51 else 52 { 53 BEAST_EXPECT(u.write(&c, 1)); 54 BEAST_EXPECT(! u.finish()); 55 } 56 } 57 58 // three byte sequences 59 for(unsigned char c = 224; c < 240; ++c) 60 { 61 utf8_checker u; 62 BEAST_EXPECT(u.write(&c, 1)); 63 BEAST_EXPECT(! u.finish()); 64 } 65 66 // four byte sequences 67 for(unsigned char c = 240; c < 245; ++c) 68 { 69 // fail fast 70 utf8_checker u; 71 BEAST_EXPECT(u.write(&c, 1)); 72 BEAST_EXPECT(! u.finish()); 73 } 74 75 // invalid lead bytes 76 for(unsigned char c = 245; c; ++c) 77 { 78 utf8_checker u; 79 BEAST_EXPECT(! u.write(&c, 1)); 80 } 81 } 82 83 void testTwoByteSequence()84 testTwoByteSequence() 85 { 86 // Autobahn 6.18.1 87 { 88 utf8_checker u; 89 BEAST_EXPECT(! u.write(net::buffer("\xc1\xbf", 2))); 90 } 91 92 utf8_checker u; 93 std::uint8_t buf[2]; 94 // First byte valid range 194-223 95 for(auto i : {194, 223}) 96 { 97 buf[0] = static_cast<std::uint8_t>(i); 98 99 // Second byte valid range 128-191 100 for(auto j : {128, 191}) 101 { 102 buf[1] = static_cast<std::uint8_t>(j); 103 BEAST_EXPECT(u.write(buf, 2)); 104 BEAST_EXPECT(u.finish()); 105 } 106 107 // Second byte invalid range 0-127 108 for(auto j : {0, 127}) 109 { 110 buf[1] = static_cast<std::uint8_t>(j); 111 BEAST_EXPECT(! u.write(buf, 2)); 112 u.reset(); 113 } 114 115 // Second byte invalid range 192-255 116 for(auto j : {192, 255}) 117 { 118 buf[1] = static_cast<std::uint8_t>(j); 119 BEAST_EXPECT(! u.write(buf, 2)); 120 u.reset(); 121 } 122 123 // Segmented sequence second byte invalid 124 BEAST_EXPECT(u.write(buf, 1)); 125 BEAST_EXPECT(! u.write(&buf[1], 1)); 126 u.reset(); 127 } 128 } 129 130 void testThreeByteSequence()131 testThreeByteSequence() 132 { 133 { 134 utf8_checker u; 135 BEAST_EXPECT(u.write(net::buffer("\xef\xbf\xbf", 3))); 136 BEAST_EXPECT(u.finish()); 137 } 138 utf8_checker u; 139 std::uint8_t buf[3]; 140 // First byte valid range 224-239 141 for(auto i : {224, 239}) 142 { 143 buf[0] = static_cast<std::uint8_t>(i); 144 145 // Second byte valid range 128-191 or 160-191 or 128-159 146 std::int32_t const b = (i == 224 ? 160 : 128); 147 std::int32_t const e = (i == 237 ? 159 : 191); 148 for(auto j : {b, e}) 149 { 150 buf[1] = static_cast<std::uint8_t>(j); 151 152 // Third byte valid range 128-191 153 for(auto k : {128, 191}) 154 { 155 buf[2] = static_cast<std::uint8_t>(k); 156 BEAST_EXPECT(u.write(buf, 3)); 157 BEAST_EXPECT(u.finish()); 158 // Segmented sequence 159 if (i == 224) 160 { 161 BEAST_EXPECT(u.write(buf, 1)); 162 BEAST_EXPECT(!u.finish()); 163 } 164 else 165 { 166 BEAST_EXPECT(u.write(buf, 1)); 167 BEAST_EXPECT(u.write(&buf[1], 2)); 168 } 169 u.reset(); 170 // Segmented sequence 171 BEAST_EXPECT(u.write(buf, 2)); 172 BEAST_EXPECT(u.write(&buf[2], 1)); 173 u.reset(); 174 175 if (i == 224) 176 { 177 // Second byte invalid range 0-159 178 for (auto l : {0, 159}) 179 { 180 buf[1] = static_cast<std::uint8_t>(l); 181 BEAST_EXPECT(! u.write(buf, 3)); 182 u.reset(); 183 // Segmented sequence second byte invalid 184 BEAST_EXPECT(!u.write(buf, 2)); 185 u.reset(); 186 } 187 // Second byte invalid range 192-255 188 for(auto l : {192, 255}) 189 { 190 buf[1] = static_cast<std::uint8_t>(l); 191 BEAST_EXPECT(! u.write(buf, 3)); 192 u.reset(); 193 // Segmented sequence second byte invalid 194 BEAST_EXPECT(!u.write(buf, 2)); 195 u.reset(); 196 } 197 buf[1] = static_cast<std::uint8_t>(j); 198 } 199 else if (i == 237) 200 { 201 // Second byte invalid range 0-127 202 for(auto l : {0, 127}) 203 { 204 buf[1] = static_cast<std::uint8_t>(l); 205 BEAST_EXPECT(! u.write(buf, 3)); 206 u.reset(); 207 // Segmented sequence second byte invalid 208 BEAST_EXPECT(!u.write(buf, 2)); 209 u.reset(); 210 } 211 212 // Second byte invalid range 160-255 213 for(auto l : {160, 255}) 214 { 215 buf[1] = static_cast<std::uint8_t>(l); 216 BEAST_EXPECT(! u.write(buf, 3)); 217 u.reset(); 218 // Segmented sequence second byte invalid 219 BEAST_EXPECT(! u.write(buf, 2)); 220 u.reset(); 221 } 222 buf[1] = static_cast<std::uint8_t>(j); 223 } 224 } 225 226 // Third byte invalid range 0-127 227 for(auto k : {0, 127}) 228 { 229 buf[2] = static_cast<std::uint8_t>(k); 230 BEAST_EXPECT(! u.write(buf, 3)); 231 u.reset(); 232 } 233 234 // Third byte invalid range 192-255 235 for(auto k : {192, 255}) 236 { 237 buf[2] = static_cast<std::uint8_t>(k); 238 BEAST_EXPECT(! u.write(buf, 3)); 239 u.reset(); 240 } 241 242 // Segmented sequence third byte invalid 243 BEAST_EXPECT(u.write(buf, 2)); 244 BEAST_EXPECT(! u.write(&buf[2], 1)); 245 u.reset(); 246 } 247 248 // Second byte invalid range 0-127 or 0-159 249 for(auto j : {0, b - 1}) 250 { 251 buf[1] = static_cast<std::uint8_t>(j); 252 BEAST_EXPECT(! u.write(buf, 3)); 253 u.reset(); 254 } 255 256 // Second byte invalid range 160-255 or 192-255 257 for(auto j : {e + 1, 255}) 258 { 259 buf[1] = static_cast<std::uint8_t>(j); 260 BEAST_EXPECT(! u.write(buf, 3)); 261 u.reset(); 262 } 263 264 // Segmented sequence second byte invalid 265 if (i == 224) { 266 BEAST_EXPECT(u.write(buf, 1)); 267 BEAST_EXPECT(!u.finish()); 268 } 269 else 270 { 271 BEAST_EXPECT(u.write(buf, 1)); 272 BEAST_EXPECT(!u.write(&buf[1], 1)); 273 } 274 u.reset(); 275 } 276 } 277 278 void testFourByteSequence()279 testFourByteSequence() 280 { 281 using net::const_buffer; 282 utf8_checker u; 283 std::uint8_t buf[4]; 284 // First byte valid range 240-244 285 for(auto i : {240, 244}) 286 { 287 buf[0] = static_cast<std::uint8_t>(i); 288 289 std::int32_t const b = (i == 240 ? 144 : 128); 290 std::int32_t const e = (i == 244 ? 143 : 191); 291 for(auto j = b; j <= e; ++j) 292 { 293 buf[1] = static_cast<std::uint8_t>(j); 294 295 // Second byte valid range 144-191 or 128-191 or 128-143 296 for(auto k : {128, 191}) 297 { 298 buf[2] = static_cast<std::uint8_t>(k); 299 300 // Third byte valid range 128-191 301 for(auto n : {128, 191}) 302 { 303 // Fourth byte valid range 128-191 304 buf[3] = static_cast<std::uint8_t>(n); 305 BEAST_EXPECT(u.write(buf, 4)); 306 BEAST_EXPECT(u.finish()); 307 // Segmented sequence 308 BEAST_EXPECT(u.write(buf, 1)); 309 BEAST_EXPECT(u.write(&buf[1], 3)); 310 u.reset(); 311 // Segmented sequence 312 BEAST_EXPECT(u.write(buf, 2)); 313 BEAST_EXPECT(u.write(&buf[2], 2)); 314 u.reset(); 315 // Segmented sequence 316 BEAST_EXPECT(u.write(buf, 3)); 317 BEAST_EXPECT(u.write(&buf[3], 1)); 318 u.reset(); 319 320 if (i == 240) 321 { 322 // Second byte invalid range 0-143 323 for(auto r : {0, 143}) 324 { 325 buf[1] = static_cast<std::uint8_t>(r); 326 BEAST_EXPECT(! u.write(buf, 4)); 327 u.reset(); 328 // Segmented sequence second byte invalid 329 BEAST_EXPECT(! u.write(buf, 2)); 330 u.reset(); 331 } 332 333 // Second byte invalid range 192-255 334 for(auto r : {192, 255}) 335 { 336 buf[1] = static_cast<std::uint8_t>(r); 337 BEAST_EXPECT(! u.write(buf, 4)); 338 u.reset(); 339 // Segmented sequence second byte invalid 340 BEAST_EXPECT(!u.write(buf, 2)); 341 u.reset(); 342 } 343 buf[1] = static_cast<std::uint8_t>(j); 344 } 345 else if (i == 244) 346 { 347 // Second byte invalid range 0-127 348 for(auto r : {0, 127}) 349 { 350 buf[1] = static_cast<std::uint8_t>(r); 351 BEAST_EXPECT(! u.write(buf, 4)); 352 u.reset(); 353 // Segmented sequence second byte invalid 354 BEAST_EXPECT(! u.write(buf, 2)); 355 u.reset(); 356 } 357 // Second byte invalid range 144-255 358 for(auto r : {144, 255}) 359 { 360 buf[1] = static_cast<std::uint8_t>(r); 361 BEAST_EXPECT(! u.write(buf, 4)); 362 u.reset(); 363 // Segmented sequence second byte invalid 364 BEAST_EXPECT(! u.write(buf, 2)); 365 u.reset(); 366 } 367 buf[1] = static_cast<std::uint8_t>(j); 368 } 369 } 370 371 // Fourth byte invalid ranges 0-127, 192-255 372 for(auto r : {0, 127, 192, 255}) 373 { 374 buf[3] = static_cast<std::uint8_t>(r); 375 BEAST_EXPECT(! u.write(buf, 4)); 376 u.reset(); 377 } 378 379 // Segmented sequence fourth byte invalid 380 BEAST_EXPECT(u.write(buf, 3)); 381 BEAST_EXPECT(! u.write(&buf[3], 1)); 382 u.reset(); 383 } 384 385 // Third byte invalid ranges 0-127, 192-255 386 for(auto r : {0, 127, 192, 255}) 387 { 388 buf[2] = static_cast<std::uint8_t>(r); 389 BEAST_EXPECT(! u.write(buf, 4)); 390 u.reset(); 391 } 392 393 // Segmented sequence third byte invalid 394 BEAST_EXPECT(u.write(buf, 2)); 395 BEAST_EXPECT(! u.write(&buf[2], 1)); 396 u.reset(); 397 } 398 399 // Second byte invalid range 0-127 or 0-143 400 for(auto r : {0, b - 1}) 401 { 402 buf[1] = static_cast<std::uint8_t>(r); 403 BEAST_EXPECT(! u.write(buf, 4)); 404 u.reset(); 405 } 406 407 // Second byte invalid range 144-255 or 192-255 408 for(auto r : {e + 1, 255}) 409 { 410 buf[1] = static_cast<std::uint8_t>(r); 411 BEAST_EXPECT(! u.write(buf, 4)); 412 u.reset(); 413 } 414 415 // Segmented sequence second byte invalid 416 BEAST_EXPECT(u.write(buf, 1)); 417 BEAST_EXPECT(! u.write(&buf[1], 1)); 418 419 u.reset(); 420 } 421 422 // First byte invalid range 245-255 423 for(auto r : {245, 255}) 424 { 425 buf[0] = static_cast<std::uint8_t>(r); 426 BEAST_EXPECT(! u.write(buf, 4)); 427 u.reset(); 428 } 429 } 430 431 void testWithStreamBuffer()432 testWithStreamBuffer() 433 { 434 { 435 // Valid UTF8 encoded text 436 std::vector<std::vector<std::uint8_t>> const data{{ 437 0x48,0x65,0x69,0x7A,0xC3,0xB6,0x6C,0x72,0xC3,0xBC,0x63,0x6B, 438 0x73,0x74,0x6F,0xC3,0x9F,0x61,0x62,0x64,0xC3,0xA4,0x6D,0x70, 439 0x66,0x75,0x6E,0x67 440 }, { 441 0xCE,0x93,0xCE,0xB1,0xCE,0xB6,0xCE,0xAD,0xCE,0xB5,0xCF,0x82, 442 0x20,0xCE,0xBA,0xCE,0xB1,0xE1,0xBD,0xB6,0x20,0xCE,0xBC,0xCF, 443 0x85,0xCF,0x81,0xCF,0x84,0xCE,0xB9,0xE1,0xBD,0xB2,0xCF,0x82, 444 0x20,0xCE,0xB4,0xE1,0xBD,0xB2,0xCE,0xBD,0x20,0xCE,0xB8,0xE1, 445 0xBD,0xB0,0x20,0xCE,0xB2,0xCF,0x81,0xE1,0xBF,0xB6,0x20,0xCF, 446 0x80,0xCE,0xB9,0xE1,0xBD,0xB0,0x20,0xCF,0x83,0xCF,0x84,0xE1, 447 0xBD,0xB8,0x20,0xCF,0x87,0xCF,0x81,0xCF,0x85,0xCF,0x83,0xCE, 448 0xB1,0xCF,0x86,0xE1,0xBD,0xB6,0x20,0xCE,0xBE,0xCE,0xAD,0xCF, 449 0x86,0xCF,0x89,0xCF,0x84,0xCE,0xBF 450 }, { 451 0xC3,0x81,0x72,0x76,0xC3,0xAD,0x7A,0x74,0xC5,0xB1,0x72,0xC5, 452 0x91,0x20,0x74,0xC3,0xBC,0x6B,0xC3,0xB6,0x72,0x66,0xC3,0xBA, 453 0x72,0xC3,0xB3,0x67,0xC3,0xA9,0x70 454 }, { 455 240, 144, 128, 128 456 } 457 }; 458 utf8_checker u; 459 for(auto const& s : data) 460 { 461 static std::size_t constexpr size = 3; 462 std::size_t n = s.size(); 463 buffers_suffix< 464 net::const_buffer> cb{ 465 net::const_buffer(s.data(), n)}; 466 multi_buffer b; 467 while(n) 468 { 469 auto const amount = (std::min)(n, size); 470 b.commit(net::buffer_copy( 471 b.prepare(amount), cb)); 472 cb.consume(amount); 473 n -= amount; 474 } 475 BEAST_EXPECT(u.write(b.data())); 476 BEAST_EXPECT(u.finish()); 477 } 478 } 479 } 480 481 void testBranches()482 testBranches() 483 { 484 // switch to slow loop from alignment loop 485 { 486 char buf[32]; 487 for(unsigned i = 0; i < sizeof(buf); i += 2) 488 { 489 buf[i ] = '\xc2'; 490 buf[i+1] = '\x80'; 491 } 492 auto p = reinterpret_cast<char const*>(sizeof(std::size_t) * ( 493 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) / 494 sizeof(std::size_t))) + 2; 495 utf8_checker u; 496 BEAST_EXPECT(u.write( 497 reinterpret_cast<std::uint8_t const*>(p), 498 sizeof(buf)-(p-buf))); 499 BEAST_EXPECT(u.finish()); 500 } 501 502 // invalid code point in the last dword of a fast run 503 { 504 char buf[20]; 505 auto p = reinterpret_cast<char*>(sizeof(std::size_t) * ( 506 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) / 507 sizeof(std::size_t))); 508 BOOST_ASSERT(p + 12 <= buf + sizeof(buf)); 509 auto const in = p; 510 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*'; 511 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*'; 512 p[0] = '\x80'; // invalid 513 p[1] = '*'; 514 p[2] = '*'; 515 p[3] = '*'; 516 utf8_checker u; 517 BEAST_EXPECT(! u.write(reinterpret_cast< 518 std::uint8_t const*>(in), 12)); 519 } 520 } 521 522 void AutodeskTests()523 AutodeskTests() 524 { 525 std::vector<std::vector<std::uint8_t>> const data{ 526 { 's','t','a','r','t', 0xE0 }, 527 { 0xA6, 0x81, 'e','n','d' } }; 528 utf8_checker u; 529 for(auto const& s : data) 530 { 531 std::size_t n = s.size(); 532 buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)}; 533 multi_buffer b; 534 while(n) 535 { 536 auto const amount = (std::min)(n, std::size_t(3)/*size*/); 537 b.commit(net::buffer_copy(b.prepare(amount), cb)); 538 cb.consume(amount); 539 n -= amount; 540 } 541 BEAST_EXPECT(u.write(b.data())); 542 } 543 BEAST_EXPECT(u.finish()); 544 } 545 546 void AutobahnTest(std::vector<std::vector<std::uint8_t>> && data,std::vector<bool> result)547 AutobahnTest(std::vector<std::vector<std::uint8_t>>&& data, std::vector<bool> result) 548 { 549 BEAST_EXPECT(data.size() == result.size()); 550 utf8_checker u; 551 for(std::size_t i = 0; i < data.size(); ++i) 552 { 553 auto const& s = data[i]; 554 555 std::size_t n = s.size(); 556 buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)}; 557 multi_buffer b; 558 while(n) 559 { 560 auto const amount = (std::min)(n, std::size_t(3)/*size*/); 561 b.commit(net::buffer_copy(b.prepare(amount), cb)); 562 cb.consume(amount); 563 n -= amount; 564 } 565 BEAST_EXPECT(u.write(b.data()) == result[i]); 566 } 567 } 568 569 void run()570 run() override 571 { 572 testOneByteSequence(); 573 testTwoByteSequence(); 574 testThreeByteSequence(); 575 testFourByteSequence(); 576 testWithStreamBuffer(); 577 testBranches(); 578 AutodeskTests(); 579 // 6.4.2 580 AutobahnTest(std::vector<std::vector<std::uint8_t>>{ 581 { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 }, 582 { 0x90 }, { 0x80, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 } }, 583 { true, false, false}); 584 // 6.4.4 585 AutobahnTest(std::vector<std::vector<std::uint8_t>>{ 586 { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 }, 587 { 0x90 } }, 588 { true, false }); 589 } 590 }; 591 592 BEAST_DEFINE_TESTSUITE(beast,websocket,utf8_checker); 593 594 } // detail 595 } // websocket 596 } // beast 597 } // boost 598