• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9 
10 // Test that header file is self-contained.
11 #include <boost/beast/websocket/detail/utf8_checker.hpp>
12 
13 #include <boost/beast/core/buffers_suffix.hpp>
14 #include <boost/beast/core/multi_buffer.hpp>
15 #include <boost/beast/_experimental/unit_test/suite.hpp>
16 #include <array>
17 
18 namespace boost {
19 namespace beast {
20 namespace websocket {
21 namespace detail {
22 
23 class utf8_checker_test : public beast::unit_test::suite
24 {
25 public:
26     void
testOneByteSequence()27     testOneByteSequence()
28     {
29         // valid single-char code points
30         for(unsigned char c = 0; c < 128; ++c)
31         {
32             utf8_checker u;
33             BEAST_EXPECT(u.write(&c, 1));
34             BEAST_EXPECT(u.finish());
35         }
36 
37         // invalid lead bytes
38         for(unsigned char c = 128; c < 192; ++c)
39         {
40             utf8_checker u;
41             BEAST_EXPECT(! u.write(&c, 1));
42         }
43 
44         // two byte sequences
45         for(unsigned char c = 192; c < 224; ++c)
46         {
47             // fail fast
48             utf8_checker u;
49             if (c < 194)
50                 BEAST_EXPECT(! u.write(&c, 1));
51             else
52             {
53                 BEAST_EXPECT(u.write(&c, 1));
54                 BEAST_EXPECT(! u.finish());
55             }
56         }
57 
58         // three byte sequences
59         for(unsigned char c = 224; c < 240; ++c)
60         {
61             utf8_checker u;
62             BEAST_EXPECT(u.write(&c, 1));
63             BEAST_EXPECT(! u.finish());
64         }
65 
66         // four byte sequences
67         for(unsigned char c = 240; c < 245; ++c)
68         {
69             // fail fast
70             utf8_checker u;
71             BEAST_EXPECT(u.write(&c, 1));
72             BEAST_EXPECT(! u.finish());
73         }
74 
75         // invalid lead bytes
76         for(unsigned char c = 245; c; ++c)
77         {
78             utf8_checker u;
79             BEAST_EXPECT(! u.write(&c, 1));
80         }
81     }
82 
83     void
testTwoByteSequence()84     testTwoByteSequence()
85     {
86         // Autobahn 6.18.1
87         {
88             utf8_checker u;
89             BEAST_EXPECT(! u.write(net::buffer("\xc1\xbf", 2)));
90         }
91 
92         utf8_checker u;
93         std::uint8_t buf[2];
94         // First byte valid range 194-223
95         for(auto i : {194, 223})
96         {
97             buf[0] = static_cast<std::uint8_t>(i);
98 
99             // Second byte valid range 128-191
100             for(auto j : {128, 191})
101             {
102                 buf[1] = static_cast<std::uint8_t>(j);
103                 BEAST_EXPECT(u.write(buf, 2));
104                 BEAST_EXPECT(u.finish());
105             }
106 
107             // Second byte invalid range 0-127
108             for(auto j : {0, 127})
109             {
110                 buf[1] = static_cast<std::uint8_t>(j);
111                 BEAST_EXPECT(! u.write(buf, 2));
112                 u.reset();
113             }
114 
115             // Second byte invalid range 192-255
116             for(auto j : {192, 255})
117             {
118                 buf[1] = static_cast<std::uint8_t>(j);
119                 BEAST_EXPECT(! u.write(buf, 2));
120                 u.reset();
121             }
122 
123             // Segmented sequence second byte invalid
124             BEAST_EXPECT(u.write(buf, 1));
125             BEAST_EXPECT(! u.write(&buf[1], 1));
126             u.reset();
127         }
128     }
129 
130     void
testThreeByteSequence()131     testThreeByteSequence()
132     {
133         {
134             utf8_checker u;
135             BEAST_EXPECT(u.write(net::buffer("\xef\xbf\xbf", 3)));
136             BEAST_EXPECT(u.finish());
137         }
138         utf8_checker u;
139         std::uint8_t buf[3];
140         // First byte valid range 224-239
141         for(auto i : {224, 239})
142         {
143             buf[0] = static_cast<std::uint8_t>(i);
144 
145             // Second byte valid range 128-191 or 160-191 or 128-159
146             std::int32_t const b = (i == 224 ? 160 : 128);
147             std::int32_t const e = (i == 237 ? 159 : 191);
148             for(auto j : {b, e})
149             {
150                 buf[1] = static_cast<std::uint8_t>(j);
151 
152                 // Third byte valid range 128-191
153                 for(auto k : {128, 191})
154                 {
155                     buf[2] = static_cast<std::uint8_t>(k);
156                     BEAST_EXPECT(u.write(buf, 3));
157                     BEAST_EXPECT(u.finish());
158                     // Segmented sequence
159                     if (i == 224)
160                     {
161                         BEAST_EXPECT(u.write(buf, 1));
162                         BEAST_EXPECT(!u.finish());
163                     }
164                     else
165                     {
166                         BEAST_EXPECT(u.write(buf, 1));
167                         BEAST_EXPECT(u.write(&buf[1], 2));
168                     }
169                     u.reset();
170                     // Segmented sequence
171                     BEAST_EXPECT(u.write(buf, 2));
172                     BEAST_EXPECT(u.write(&buf[2], 1));
173                     u.reset();
174 
175                     if (i == 224)
176                     {
177                         // Second byte invalid range 0-159
178                         for (auto l : {0, 159})
179                         {
180                             buf[1] = static_cast<std::uint8_t>(l);
181                             BEAST_EXPECT(! u.write(buf, 3));
182                             u.reset();
183                             // Segmented sequence second byte invalid
184                             BEAST_EXPECT(!u.write(buf, 2));
185                             u.reset();
186                         }
187                         // Second byte invalid range 192-255
188                         for(auto l : {192, 255})
189                         {
190                             buf[1] = static_cast<std::uint8_t>(l);
191                             BEAST_EXPECT(! u.write(buf, 3));
192                             u.reset();
193                             // Segmented sequence second byte invalid
194                             BEAST_EXPECT(!u.write(buf, 2));
195                             u.reset();
196                         }
197                         buf[1] = static_cast<std::uint8_t>(j);
198                     }
199                     else if (i == 237)
200                     {
201                         // Second byte invalid range 0-127
202                         for(auto l : {0, 127})
203                         {
204                             buf[1] = static_cast<std::uint8_t>(l);
205                             BEAST_EXPECT(! u.write(buf, 3));
206                             u.reset();
207                             // Segmented sequence second byte invalid
208                             BEAST_EXPECT(!u.write(buf, 2));
209                             u.reset();
210                         }
211 
212                         // Second byte invalid range 160-255
213                         for(auto l : {160, 255})
214                         {
215                             buf[1] = static_cast<std::uint8_t>(l);
216                             BEAST_EXPECT(! u.write(buf, 3));
217                             u.reset();
218                             // Segmented sequence second byte invalid
219                             BEAST_EXPECT(! u.write(buf, 2));
220                             u.reset();
221                         }
222                         buf[1] = static_cast<std::uint8_t>(j);
223                     }
224                 }
225 
226                 // Third byte invalid range 0-127
227                 for(auto k : {0, 127})
228                 {
229                     buf[2] = static_cast<std::uint8_t>(k);
230                     BEAST_EXPECT(! u.write(buf, 3));
231                     u.reset();
232                 }
233 
234                 // Third byte invalid range 192-255
235                 for(auto k : {192, 255})
236                 {
237                     buf[2] = static_cast<std::uint8_t>(k);
238                     BEAST_EXPECT(! u.write(buf, 3));
239                     u.reset();
240                 }
241 
242                 // Segmented sequence third byte invalid
243                 BEAST_EXPECT(u.write(buf, 2));
244                 BEAST_EXPECT(! u.write(&buf[2], 1));
245                 u.reset();
246             }
247 
248             // Second byte invalid range 0-127 or 0-159
249             for(auto j : {0, b - 1})
250             {
251                 buf[1] = static_cast<std::uint8_t>(j);
252                 BEAST_EXPECT(! u.write(buf, 3));
253                 u.reset();
254             }
255 
256             // Second byte invalid range 160-255 or 192-255
257             for(auto j : {e + 1, 255})
258             {
259                 buf[1] = static_cast<std::uint8_t>(j);
260                 BEAST_EXPECT(! u.write(buf, 3));
261                 u.reset();
262             }
263 
264             // Segmented sequence second byte invalid
265             if (i == 224) {
266                 BEAST_EXPECT(u.write(buf, 1));
267                 BEAST_EXPECT(!u.finish());
268             }
269             else
270             {
271                 BEAST_EXPECT(u.write(buf, 1));
272                 BEAST_EXPECT(!u.write(&buf[1], 1));
273             }
274             u.reset();
275         }
276     }
277 
278     void
testFourByteSequence()279     testFourByteSequence()
280     {
281         using net::const_buffer;
282         utf8_checker u;
283         std::uint8_t buf[4];
284         // First byte valid range 240-244
285         for(auto i : {240, 244})
286         {
287             buf[0] = static_cast<std::uint8_t>(i);
288 
289             std::int32_t const b = (i == 240 ? 144 : 128);
290             std::int32_t const e = (i == 244 ? 143 : 191);
291             for(auto j = b; j <= e; ++j)
292             {
293                 buf[1] = static_cast<std::uint8_t>(j);
294 
295                 // Second byte valid range 144-191 or 128-191 or 128-143
296                 for(auto k : {128, 191})
297                 {
298                     buf[2] = static_cast<std::uint8_t>(k);
299 
300                     // Third byte valid range 128-191
301                     for(auto n : {128, 191})
302                     {
303                         // Fourth byte valid range 128-191
304                         buf[3] = static_cast<std::uint8_t>(n);
305                         BEAST_EXPECT(u.write(buf, 4));
306                         BEAST_EXPECT(u.finish());
307                         // Segmented sequence
308                         BEAST_EXPECT(u.write(buf, 1));
309                         BEAST_EXPECT(u.write(&buf[1], 3));
310                         u.reset();
311                         // Segmented sequence
312                         BEAST_EXPECT(u.write(buf, 2));
313                         BEAST_EXPECT(u.write(&buf[2], 2));
314                         u.reset();
315                         // Segmented sequence
316                         BEAST_EXPECT(u.write(buf, 3));
317                         BEAST_EXPECT(u.write(&buf[3], 1));
318                         u.reset();
319 
320                         if (i == 240)
321                         {
322                             // Second byte invalid range 0-143
323                             for(auto r : {0, 143})
324                             {
325                                 buf[1] = static_cast<std::uint8_t>(r);
326                                 BEAST_EXPECT(! u.write(buf, 4));
327                                 u.reset();
328                                 // Segmented sequence second byte invalid
329                                 BEAST_EXPECT(! u.write(buf, 2));
330                                 u.reset();
331                             }
332 
333                             // Second byte invalid range 192-255
334                             for(auto r : {192, 255})
335                             {
336                                 buf[1] = static_cast<std::uint8_t>(r);
337                                 BEAST_EXPECT(! u.write(buf, 4));
338                                 u.reset();
339                                 // Segmented sequence second byte invalid
340                                 BEAST_EXPECT(!u.write(buf, 2));
341                                 u.reset();
342                             }
343                             buf[1] = static_cast<std::uint8_t>(j);
344                         }
345                         else if (i == 244)
346                         {
347                             // Second byte invalid range 0-127
348                             for(auto r : {0, 127})
349                             {
350                                 buf[1] = static_cast<std::uint8_t>(r);
351                                 BEAST_EXPECT(! u.write(buf, 4));
352                                 u.reset();
353                                 // Segmented sequence second byte invalid
354                                 BEAST_EXPECT(! u.write(buf, 2));
355                                 u.reset();
356                             }
357                             // Second byte invalid range 144-255
358                             for(auto r : {144, 255})
359                             {
360                                 buf[1] = static_cast<std::uint8_t>(r);
361                                 BEAST_EXPECT(! u.write(buf, 4));
362                                 u.reset();
363                                 // Segmented sequence second byte invalid
364                                 BEAST_EXPECT(! u.write(buf, 2));
365                                 u.reset();
366                             }
367                             buf[1] = static_cast<std::uint8_t>(j);
368                         }
369                     }
370 
371                     // Fourth byte invalid ranges 0-127, 192-255
372                     for(auto r : {0, 127, 192, 255})
373                     {
374                         buf[3] = static_cast<std::uint8_t>(r);
375                         BEAST_EXPECT(! u.write(buf, 4));
376                         u.reset();
377                     }
378 
379                     // Segmented sequence fourth byte invalid
380                     BEAST_EXPECT(u.write(buf, 3));
381                     BEAST_EXPECT(! u.write(&buf[3], 1));
382                     u.reset();
383                 }
384 
385                 // Third byte invalid ranges 0-127, 192-255
386                 for(auto r : {0, 127, 192, 255})
387                 {
388                     buf[2] = static_cast<std::uint8_t>(r);
389                     BEAST_EXPECT(! u.write(buf, 4));
390                     u.reset();
391                 }
392 
393                 // Segmented sequence third byte invalid
394                 BEAST_EXPECT(u.write(buf, 2));
395                 BEAST_EXPECT(! u.write(&buf[2], 1));
396                 u.reset();
397             }
398 
399             // Second byte invalid range 0-127 or 0-143
400             for(auto r : {0, b - 1})
401             {
402                 buf[1] = static_cast<std::uint8_t>(r);
403                 BEAST_EXPECT(! u.write(buf, 4));
404                 u.reset();
405             }
406 
407             // Second byte invalid range 144-255 or 192-255
408             for(auto r : {e + 1, 255})
409             {
410                 buf[1] = static_cast<std::uint8_t>(r);
411                 BEAST_EXPECT(! u.write(buf, 4));
412                 u.reset();
413             }
414 
415             // Segmented sequence second byte invalid
416             BEAST_EXPECT(u.write(buf, 1));
417             BEAST_EXPECT(! u.write(&buf[1], 1));
418 
419             u.reset();
420         }
421 
422         // First byte invalid range 245-255
423         for(auto r : {245, 255})
424         {
425             buf[0] = static_cast<std::uint8_t>(r);
426             BEAST_EXPECT(! u.write(buf, 4));
427             u.reset();
428         }
429     }
430 
431     void
testWithStreamBuffer()432     testWithStreamBuffer()
433     {
434         {
435             // Valid UTF8 encoded text
436             std::vector<std::vector<std::uint8_t>> const data{{
437                     0x48,0x65,0x69,0x7A,0xC3,0xB6,0x6C,0x72,0xC3,0xBC,0x63,0x6B,
438                     0x73,0x74,0x6F,0xC3,0x9F,0x61,0x62,0x64,0xC3,0xA4,0x6D,0x70,
439                     0x66,0x75,0x6E,0x67
440                 }, {
441                     0xCE,0x93,0xCE,0xB1,0xCE,0xB6,0xCE,0xAD,0xCE,0xB5,0xCF,0x82,
442                     0x20,0xCE,0xBA,0xCE,0xB1,0xE1,0xBD,0xB6,0x20,0xCE,0xBC,0xCF,
443                     0x85,0xCF,0x81,0xCF,0x84,0xCE,0xB9,0xE1,0xBD,0xB2,0xCF,0x82,
444                     0x20,0xCE,0xB4,0xE1,0xBD,0xB2,0xCE,0xBD,0x20,0xCE,0xB8,0xE1,
445                     0xBD,0xB0,0x20,0xCE,0xB2,0xCF,0x81,0xE1,0xBF,0xB6,0x20,0xCF,
446                     0x80,0xCE,0xB9,0xE1,0xBD,0xB0,0x20,0xCF,0x83,0xCF,0x84,0xE1,
447                     0xBD,0xB8,0x20,0xCF,0x87,0xCF,0x81,0xCF,0x85,0xCF,0x83,0xCE,
448                     0xB1,0xCF,0x86,0xE1,0xBD,0xB6,0x20,0xCE,0xBE,0xCE,0xAD,0xCF,
449                     0x86,0xCF,0x89,0xCF,0x84,0xCE,0xBF
450                 }, {
451                     0xC3,0x81,0x72,0x76,0xC3,0xAD,0x7A,0x74,0xC5,0xB1,0x72,0xC5,
452                     0x91,0x20,0x74,0xC3,0xBC,0x6B,0xC3,0xB6,0x72,0x66,0xC3,0xBA,
453                     0x72,0xC3,0xB3,0x67,0xC3,0xA9,0x70
454                 }, {
455                     240, 144, 128, 128
456                 }
457             };
458             utf8_checker u;
459             for(auto const& s : data)
460             {
461                 static std::size_t constexpr size = 3;
462                 std::size_t n = s.size();
463                 buffers_suffix<
464                     net::const_buffer> cb{
465                         net::const_buffer(s.data(), n)};
466                 multi_buffer b;
467                 while(n)
468                 {
469                     auto const amount = (std::min)(n, size);
470                     b.commit(net::buffer_copy(
471                         b.prepare(amount), cb));
472                     cb.consume(amount);
473                     n -= amount;
474                 }
475                 BEAST_EXPECT(u.write(b.data()));
476                 BEAST_EXPECT(u.finish());
477             }
478         }
479     }
480 
481     void
testBranches()482     testBranches()
483     {
484         // switch to slow loop from alignment loop
485         {
486             char buf[32];
487             for(unsigned i = 0; i < sizeof(buf); i += 2)
488             {
489                 buf[i  ] = '\xc2';
490                 buf[i+1] = '\x80';
491             }
492             auto p = reinterpret_cast<char const*>(sizeof(std::size_t) * (
493                 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
494                     sizeof(std::size_t))) + 2;
495             utf8_checker u;
496             BEAST_EXPECT(u.write(
497                 reinterpret_cast<std::uint8_t const*>(p),
498                     sizeof(buf)-(p-buf)));
499             BEAST_EXPECT(u.finish());
500         }
501 
502         // invalid code point in the last dword of a fast run
503         {
504             char buf[20];
505             auto p = reinterpret_cast<char*>(sizeof(std::size_t) * (
506                 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
507                     sizeof(std::size_t)));
508             BOOST_ASSERT(p + 12 <= buf + sizeof(buf));
509             auto const in = p;
510             *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
511             *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
512             p[0] = '\x80'; // invalid
513             p[1] = '*';
514             p[2] = '*';
515             p[3] = '*';
516             utf8_checker u;
517             BEAST_EXPECT(! u.write(reinterpret_cast<
518                 std::uint8_t const*>(in), 12));
519         }
520     }
521 
522     void
AutodeskTests()523     AutodeskTests()
524     {
525         std::vector<std::vector<std::uint8_t>> const data{
526             { 's','t','a','r','t', 0xE0 },
527             { 0xA6, 0x81, 'e','n','d' } };
528         utf8_checker u;
529         for(auto const& s : data)
530         {
531             std::size_t n = s.size();
532             buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)};
533             multi_buffer b;
534             while(n)
535             {
536                 auto const amount = (std::min)(n, std::size_t(3)/*size*/);
537                 b.commit(net::buffer_copy(b.prepare(amount), cb));
538                 cb.consume(amount);
539                 n -= amount;
540             }
541             BEAST_EXPECT(u.write(b.data()));
542         }
543         BEAST_EXPECT(u.finish());
544     }
545 
546     void
AutobahnTest(std::vector<std::vector<std::uint8_t>> && data,std::vector<bool> result)547     AutobahnTest(std::vector<std::vector<std::uint8_t>>&& data, std::vector<bool> result)
548     {
549         BEAST_EXPECT(data.size() == result.size());
550         utf8_checker u;
551         for(std::size_t i = 0; i < data.size(); ++i)
552         {
553             auto const& s = data[i];
554 
555             std::size_t n = s.size();
556             buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)};
557             multi_buffer b;
558             while(n)
559             {
560                 auto const amount = (std::min)(n, std::size_t(3)/*size*/);
561                 b.commit(net::buffer_copy(b.prepare(amount), cb));
562                 cb.consume(amount);
563                 n -= amount;
564             }
565             BEAST_EXPECT(u.write(b.data()) == result[i]);
566         }
567     }
568 
569     void
run()570     run() override
571     {
572         testOneByteSequence();
573         testTwoByteSequence();
574         testThreeByteSequence();
575         testFourByteSequence();
576         testWithStreamBuffer();
577         testBranches();
578         AutodeskTests();
579         // 6.4.2
580         AutobahnTest(std::vector<std::vector<std::uint8_t>>{
581             { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
582             { 0x90 }, { 0x80, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 } },
583             { true, false, false});
584         // 6.4.4
585         AutobahnTest(std::vector<std::vector<std::uint8_t>>{
586             { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
587             { 0x90 } },
588             { true, false });
589     }
590 };
591 
592 BEAST_DEFINE_TESTSUITE(beast,websocket,utf8_checker);
593 
594 } // detail
595 } // websocket
596 } // beast
597 } // boost
598