1 // 2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 // 7 // Official repository: https://github.com/boostorg/beast 8 // 9 10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP 11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP 12 13 #include <boost/beast/core/detail/config.hpp> 14 #include <boost/beast/core/error.hpp> 15 #include <boost/beast/core/string.hpp> 16 #include <boost/beast/http/field.hpp> 17 #include <boost/beast/http/verb.hpp> 18 #include <boost/beast/http/detail/basic_parser.hpp> 19 #include <boost/asio/buffer.hpp> 20 #include <boost/optional.hpp> 21 #include <boost/assert.hpp> 22 #include <limits> 23 #include <memory> 24 #include <type_traits> 25 #include <utility> 26 27 namespace boost { 28 namespace beast { 29 namespace http { 30 31 /** A parser for decoding HTTP/1 wire format messages. 32 33 This parser is designed to efficiently parse messages in the 34 HTTP/1 wire format. It allocates no memory when input is 35 presented as a single contiguous buffer, and uses minimal 36 state. It will handle chunked encoding and it understands 37 the semantics of the Connection, Content-Length, and Upgrade 38 fields. 39 The parser is optimized for the case where the input buffer 40 sequence consists of a single contiguous buffer. The 41 @ref beast::basic_flat_buffer class is provided, which guarantees 42 that the input sequence of the stream buffer will be represented 43 by exactly one contiguous buffer. To ensure the optimum performance 44 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms 45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some. 46 Alternatively, the caller may use custom techniques to ensure that 47 the structured portion of the HTTP message (header or chunk header) 48 is contained in a linear buffer. 49 50 The interface to the parser uses virtual member functions. 51 To use this class, derive your type from @ref basic_parser. When 52 bytes are presented, the implementation will make a series of zero 53 or more calls to virtual functions, which the derived class must 54 implement. 55 56 Every virtual function must be provided by the derived class, 57 or else a compilation error will be generated. The implementation 58 will make sure that `ec` is clear before each virtual function 59 is invoked. If a virtual function sets an error, it is propagated 60 out of the parser to the caller. 61 62 @tparam isRequest A `bool` indicating whether the parser will be 63 presented with request or response message. 64 65 @note If the parser encounters a field value with obs-fold 66 longer than 4 kilobytes in length, an error is generated. 67 */ 68 template<bool isRequest> 69 class basic_parser 70 : private detail::basic_parser_base 71 { 72 boost::optional<std::uint64_t> 73 body_limit_ = 74 boost::optional<std::uint64_t>( 75 default_body_limit(is_request{})); // max payload body 76 std::uint64_t len_ = 0; // size of chunk or body 77 std::uint64_t len0_ = 0; // content length if known 78 std::unique_ptr<char[]> buf_; // temp storage 79 std::size_t buf_len_ = 0; // size of buf_ 80 std::size_t skip_ = 0; // resume search here 81 std::uint32_t header_limit_ = 8192; // max header size 82 unsigned short status_ = 0; // response status 83 state state_ = state::nothing_yet; // initial state 84 unsigned f_ = 0; // flags 85 86 // limit on the size of the stack flat buffer 87 static std::size_t constexpr max_stack_buffer = 8192; 88 89 // Message will be complete after reading header 90 static unsigned constexpr flagSkipBody = 1<< 0; 91 92 // Consume input buffers across semantic boundaries 93 static unsigned constexpr flagEager = 1<< 1; 94 95 // The parser has read at least one byte 96 static unsigned constexpr flagGotSome = 1<< 2; 97 98 // Message semantics indicate a body is expected. 99 // cleared if flagSkipBody set 100 // 101 static unsigned constexpr flagHasBody = 1<< 3; 102 103 static unsigned constexpr flagHTTP11 = 1<< 4; 104 static unsigned constexpr flagNeedEOF = 1<< 5; 105 static unsigned constexpr flagExpectCRLF = 1<< 6; 106 static unsigned constexpr flagConnectionClose = 1<< 7; 107 static unsigned constexpr flagConnectionUpgrade = 1<< 8; 108 static unsigned constexpr flagConnectionKeepAlive = 1<< 9; 109 static unsigned constexpr flagContentLength = 1<< 10; 110 static unsigned constexpr flagChunked = 1<< 11; 111 static unsigned constexpr flagUpgrade = 1<< 12; 112 static unsigned constexpr flagFinalChunk = 1<< 13; 113 114 static constexpr 115 std::uint64_t default_body_limit(std::true_type)116 default_body_limit(std::true_type) 117 { 118 // limit for requests 119 return 1 * 1024 * 1024; // 1MB 120 } 121 122 static constexpr 123 std::uint64_t default_body_limit(std::false_type)124 default_body_limit(std::false_type) 125 { 126 // limit for responses 127 return 8 * 1024 * 1024; // 8MB 128 } 129 130 template<bool OtherIsRequest> 131 friend class basic_parser; 132 133 friend class basic_parser_test; 134 135 protected: 136 /// Default constructor 137 basic_parser() = default; 138 139 /** Move constructor 140 141 @note 142 143 After the move, the only valid operation on the 144 moved-from object is destruction. 145 */ 146 basic_parser(basic_parser &&) = default; 147 148 /// Move assignment 149 basic_parser& operator=(basic_parser &&) = default; 150 151 public: 152 /// `true` if this parser parses requests, `false` for responses. 153 using is_request = 154 std::integral_constant<bool, isRequest>; 155 156 /// Destructor 157 virtual ~basic_parser() = default; 158 159 /// Copy constructor 160 basic_parser(basic_parser const&) = delete; 161 162 /// Copy assignment 163 basic_parser& operator=(basic_parser const&) = delete; 164 165 /// Returns `true` if the parser has received at least one byte of input. 166 bool got_some() const167 got_some() const 168 { 169 return state_ != state::nothing_yet; 170 } 171 172 /** Returns `true` if the message is complete. 173 174 The message is complete after the full header is prduced 175 and one of the following is true: 176 177 @li The skip body option was set. 178 179 @li The semantics of the message indicate there is no body. 180 181 @li The semantics of the message indicate a body is expected, 182 and the entire body was parsed. 183 */ 184 bool is_done() const185 is_done() const 186 { 187 return state_ == state::complete; 188 } 189 190 /** Returns `true` if a the parser has produced the full header. 191 */ 192 bool is_header_done() const193 is_header_done() const 194 { 195 return state_ > state::fields; 196 } 197 198 /** Returns `true` if the message is an upgrade message. 199 200 @note The return value is undefined unless 201 @ref is_header_done would return `true`. 202 */ 203 bool upgrade() const204 upgrade() const 205 { 206 return (f_ & flagConnectionUpgrade) != 0; 207 } 208 209 /** Returns `true` if the last value for Transfer-Encoding is "chunked". 210 211 @note The return value is undefined unless 212 @ref is_header_done would return `true`. 213 */ 214 bool chunked() const215 chunked() const 216 { 217 return (f_ & flagChunked) != 0; 218 } 219 220 /** Returns `true` if the message has keep-alive connection semantics. 221 222 This function always returns `false` if @ref need_eof would return 223 `false`. 224 225 @note The return value is undefined unless 226 @ref is_header_done would return `true`. 227 */ 228 bool 229 keep_alive() const; 230 231 /** Returns the optional value of Content-Length if known. 232 233 @note The return value is undefined unless 234 @ref is_header_done would return `true`. 235 */ 236 boost::optional<std::uint64_t> 237 content_length() const; 238 239 /** Returns the remaining content length if known 240 241 If the message header specifies a Content-Length, 242 the return value will be the number of bytes remaining 243 in the payload body have not yet been parsed. 244 245 @note The return value is undefined unless 246 @ref is_header_done would return `true`. 247 */ 248 boost::optional<std::uint64_t> 249 content_length_remaining() const; 250 251 /** Returns `true` if the message semantics require an end of file. 252 253 Depending on the contents of the header, the parser may 254 require and end of file notification to know where the end 255 of the body lies. If this function returns `true` it will be 256 necessary to call @ref put_eof when there will never be additional 257 data from the input. 258 */ 259 bool need_eof() const260 need_eof() const 261 { 262 return (f_ & flagNeedEOF) != 0; 263 } 264 265 /** Set the limit on the payload body. 266 267 This function sets the maximum allowed size of the payload body, 268 before any encodings except chunked have been removed. Depending 269 on the message semantics, one of these cases will apply: 270 271 @li The Content-Length is specified and exceeds the limit. In 272 this case the result @ref error::body_limit is returned 273 immediately after the header is parsed. 274 275 @li The Content-Length is unspecified and the chunked encoding 276 is not specified as the last encoding. In this case the end of 277 message is determined by the end of file indicator on the 278 associated stream or input source. If a sufficient number of 279 body payload octets are presented to the parser to exceed the 280 configured limit, the parse fails with the result 281 @ref error::body_limit 282 283 @li The Transfer-Encoding specifies the chunked encoding as the 284 last encoding. In this case, when the number of payload body 285 octets produced by removing the chunked encoding exceeds 286 the configured limit, the parse fails with the result 287 @ref error::body_limit. 288 289 Setting the limit after any body octets have been parsed 290 results in undefined behavior. 291 292 The default limit is 1MB for requests and 8MB for responses. 293 294 @param v An optional integral value representing the body limit. 295 If this is equal to `boost::none`, then the body limit is disabled. 296 */ 297 void body_limit(boost::optional<std::uint64_t> v)298 body_limit(boost::optional<std::uint64_t> v) 299 { 300 body_limit_ = v; 301 } 302 303 /** Set a limit on the total size of the header. 304 305 This function sets the maximum allowed size of the header 306 including all field name, value, and delimiter characters 307 and also including the CRLF sequences in the serialized 308 input. If the end of the header is not found within the 309 limit of the header size, the error @ref error::header_limit 310 is returned by @ref put. 311 312 Setting the limit after any header octets have been parsed 313 results in undefined behavior. 314 */ 315 void header_limit(std::uint32_t v)316 header_limit(std::uint32_t v) 317 { 318 header_limit_ = v; 319 } 320 321 /// Returns `true` if the eager parse option is set. 322 bool eager() const323 eager() const 324 { 325 return (f_ & flagEager) != 0; 326 } 327 328 /** Set the eager parse option. 329 330 Normally the parser returns after successfully parsing a structured 331 element (header, chunk header, or chunk body) even if there are octets 332 remaining in the input. This is necessary when attempting to parse the 333 header first, or when the caller wants to inspect information which may 334 be invalidated by subsequent parsing, such as a chunk extension. The 335 `eager` option controls whether the parser keeps going after parsing 336 structured element if there are octets remaining in the buffer and no 337 error occurs. This option is automatically set or cleared during certain 338 stream operations to improve performance with no change in functionality. 339 340 The default setting is `false`. 341 342 @param v `true` to set the eager parse option or `false` to disable it. 343 */ 344 void eager(bool v)345 eager(bool v) 346 { 347 if(v) 348 f_ |= flagEager; 349 else 350 f_ &= ~flagEager; 351 } 352 353 /// Returns `true` if the skip parse option is set. 354 bool skip() const355 skip() const 356 { 357 return (f_ & flagSkipBody) != 0; 358 } 359 360 /** Set the skip parse option. 361 362 This option controls whether or not the parser expects to see an HTTP 363 body, regardless of the presence or absence of certain fields such as 364 Content-Length or a chunked Transfer-Encoding. Depending on the request, 365 some responses do not carry a body. For example, a 200 response to a 366 CONNECT request from a tunneling proxy, or a response to a HEAD request. 367 In these cases, callers may use this function inform the parser that 368 no body is expected. The parser will consider the message complete 369 after the header has been received. 370 371 @param v `true` to set the skip body option or `false` to disable it. 372 373 @note This function must called before any bytes are processed. 374 */ 375 void 376 skip(bool v); 377 378 /** Write a buffer sequence to the parser. 379 380 This function attempts to incrementally parse the HTTP 381 message data stored in the caller provided buffers. Upon 382 success, a positive return value indicates that the parser 383 made forward progress, consuming that number of 384 bytes. 385 386 In some cases there may be an insufficient number of octets 387 in the input buffer in order to make forward progress. This 388 is indicated by the code @ref error::need_more. When 389 this happens, the caller should place additional bytes into 390 the buffer sequence and call @ref put again. 391 392 The error code @ref error::need_more is special. When this 393 error is returned, a subsequent call to @ref put may succeed 394 if the buffers have been updated. Otherwise, upon error 395 the parser may not be restarted. 396 397 @param buffers An object meeting the requirements of 398 <em>ConstBufferSequence</em> that represents the next chunk of 399 message data. If the length of this buffer sequence is 400 one, the implementation will not allocate additional memory. 401 The class @ref beast::basic_flat_buffer is provided as one way to 402 meet this requirement 403 404 @param ec Set to the error, if any occurred. 405 406 @return The number of octets consumed in the buffer 407 sequence. The caller should remove these octets even if the 408 error is set. 409 */ 410 template<class ConstBufferSequence> 411 std::size_t 412 put(ConstBufferSequence const& buffers, error_code& ec); 413 414 #if ! BOOST_BEAST_DOXYGEN 415 std::size_t 416 put(net::const_buffer buffer, 417 error_code& ec); 418 #endif 419 420 /** Inform the parser that the end of stream was reached. 421 422 In certain cases, HTTP needs to know where the end of 423 the stream is. For example, sometimes servers send 424 responses without Content-Length and expect the client 425 to consume input (for the body) until EOF. Callbacks 426 and errors will still be processed as usual. 427 428 This is typically called when a read from the 429 underlying stream object sets the error code to 430 `net::error::eof`. 431 432 @note Only valid after parsing a complete header. 433 434 @param ec Set to the error, if any occurred. 435 */ 436 void 437 put_eof(error_code& ec); 438 439 protected: 440 /** Called after receiving the request-line. 441 442 This virtual function is invoked after receiving a request-line 443 when parsing HTTP requests. 444 It can only be called when `isRequest == true`. 445 446 @param method The verb enumeration. If the method string is not 447 one of the predefined strings, this value will be @ref verb::unknown. 448 449 @param method_str The unmodified string representing the verb. 450 451 @param target The request-target. 452 453 @param version The HTTP-version. This will be 10 for HTTP/1.0, 454 and 11 for HTTP/1.1. 455 456 @param ec An output parameter which the function may set to indicate 457 an error. The error will be clear before this function is invoked. 458 */ 459 virtual 460 void 461 on_request_impl( 462 verb method, 463 string_view method_str, 464 string_view target, 465 int version, 466 error_code& ec) = 0; 467 468 /** Called after receiving the status-line. 469 470 This virtual function is invoked after receiving a status-line 471 when parsing HTTP responses. 472 It can only be called when `isRequest == false`. 473 474 @param code The numeric status code. 475 476 @param reason The reason-phrase. Note that this value is 477 now obsolete, and only provided for historical or diagnostic 478 purposes. 479 480 @param version The HTTP-version. This will be 10 for HTTP/1.0, 481 and 11 for HTTP/1.1. 482 483 @param ec An output parameter which the function may set to indicate 484 an error. The error will be clear before this function is invoked. 485 */ 486 virtual 487 void 488 on_response_impl( 489 int code, 490 string_view reason, 491 int version, 492 error_code& ec) = 0; 493 494 /** Called once for each complete field in the HTTP header. 495 496 This virtual function is invoked for each field that is received 497 while parsing an HTTP message. 498 499 @param name The known field enum value. If the name of the field 500 is not recognized, this value will be @ref field::unknown. 501 502 @param name_string The exact name of the field as received from 503 the input, represented as a string. 504 505 @param value A string holding the value of the field. 506 507 @param ec An output parameter which the function may set to indicate 508 an error. The error will be clear before this function is invoked. 509 */ 510 virtual 511 void 512 on_field_impl( 513 field name, 514 string_view name_string, 515 string_view value, 516 error_code& ec) = 0; 517 518 /** Called once after the complete HTTP header is received. 519 520 This virtual function is invoked once, after the complete HTTP 521 header is received while parsing a message. 522 523 @param ec An output parameter which the function may set to indicate 524 an error. The error will be clear before this function is invoked. 525 */ 526 virtual 527 void 528 on_header_impl(error_code& ec) = 0; 529 530 /** Called once before the body is processed. 531 532 This virtual function is invoked once, before the content body is 533 processed (but after the complete header is received). 534 535 @param content_length A value representing the content length in 536 bytes if the length is known (this can include a zero length). 537 Otherwise, the value will be `boost::none`. 538 539 @param ec An output parameter which the function may set to indicate 540 an error. The error will be clear before this function is invoked. 541 */ 542 virtual 543 void 544 on_body_init_impl( 545 boost::optional<std::uint64_t> const& content_length, 546 error_code& ec) = 0; 547 548 /** Called each time additional data is received representing the content body. 549 550 This virtual function is invoked for each piece of the body which is 551 received while parsing of a message. This function is only used when 552 no chunked transfer encoding is present. 553 554 @param body A string holding the additional body contents. This may 555 contain nulls or unprintable characters. 556 557 @param ec An output parameter which the function may set to indicate 558 an error. The error will be clear before this function is invoked. 559 560 @see on_chunk_body_impl 561 */ 562 virtual 563 std::size_t 564 on_body_impl( 565 string_view body, 566 error_code& ec) = 0; 567 568 /** Called each time a new chunk header of a chunk encoded body is received. 569 570 This function is invoked each time a new chunk header is received. 571 The function is only used when the chunked transfer encoding is present. 572 573 @param size The size of this chunk, in bytes. 574 575 @param extensions A string containing the entire chunk extensions. 576 This may be empty, indicating no extensions are present. 577 578 @param ec An output parameter which the function may set to indicate 579 an error. The error will be clear before this function is invoked. 580 */ 581 virtual 582 void 583 on_chunk_header_impl( 584 std::uint64_t size, 585 string_view extensions, 586 error_code& ec) = 0; 587 588 /** Called each time additional data is received representing part of a body chunk. 589 590 This virtual function is invoked for each piece of the body which is 591 received while parsing of a message. This function is only used when 592 no chunked transfer encoding is present. 593 594 @param remain The number of bytes remaining in this chunk. This includes 595 the contents of passed `body`. If this value is zero, then this represents 596 the final chunk. 597 598 @param body A string holding the additional body contents. This may 599 contain nulls or unprintable characters. 600 601 @param ec An output parameter which the function may set to indicate 602 an error. The error will be clear before this function is invoked. 603 604 @return This function should return the number of bytes actually consumed 605 from the `body` value. Any bytes that are not consumed on this call 606 will be presented in a subsequent call. 607 608 @see on_body_impl 609 */ 610 virtual 611 std::size_t 612 on_chunk_body_impl( 613 std::uint64_t remain, 614 string_view body, 615 error_code& ec) = 0; 616 617 /** Called once when the complete message is received. 618 619 This virtual function is invoked once, after successfully parsing 620 a complete HTTP message. 621 622 @param ec An output parameter which the function may set to indicate 623 an error. The error will be clear before this function is invoked. 624 */ 625 virtual 626 void 627 on_finish_impl(error_code& ec) = 0; 628 629 private: 630 631 boost::optional<std::uint64_t> 632 content_length_unchecked() const; 633 634 template<class ConstBufferSequence> 635 std::size_t 636 put_from_stack( 637 std::size_t size, 638 ConstBufferSequence const& buffers, 639 error_code& ec); 640 641 void 642 maybe_need_more( 643 char const* p, std::size_t n, 644 error_code& ec); 645 646 void 647 parse_start_line( 648 char const*& p, char const* last, 649 error_code& ec, std::true_type); 650 651 void 652 parse_start_line( 653 char const*& p, char const* last, 654 error_code& ec, std::false_type); 655 656 void 657 parse_fields( 658 char const*& p, char const* last, 659 error_code& ec); 660 661 void 662 finish_header( 663 error_code& ec, std::true_type); 664 665 void 666 finish_header( 667 error_code& ec, std::false_type); 668 669 void 670 parse_body(char const*& p, 671 std::size_t n, error_code& ec); 672 673 void 674 parse_body_to_eof(char const*& p, 675 std::size_t n, error_code& ec); 676 677 void 678 parse_chunk_header(char const*& p, 679 std::size_t n, error_code& ec); 680 681 void 682 parse_chunk_body(char const*& p, 683 std::size_t n, error_code& ec); 684 685 void 686 do_field(field f, 687 string_view value, error_code& ec); 688 }; 689 690 } // http 691 } // beast 692 } // boost 693 694 #include <boost/beast/http/impl/basic_parser.hpp> 695 #ifdef BOOST_BEAST_HEADER_ONLY 696 #include <boost/beast/http/impl/basic_parser.ipp> 697 #endif 698 699 #endif 700