• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9 
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
12 
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
22 #include <limits>
23 #include <memory>
24 #include <type_traits>
25 #include <utility>
26 
27 namespace boost {
28 namespace beast {
29 namespace http {
30 
31 /** A parser for decoding HTTP/1 wire format messages.
32 
33     This parser is designed to efficiently parse messages in the
34     HTTP/1 wire format. It allocates no memory when input is
35     presented as a single contiguous buffer, and uses minimal
36     state. It will handle chunked encoding and it understands
37     the semantics of the Connection, Content-Length, and Upgrade
38     fields.
39     The parser is optimized for the case where the input buffer
40     sequence consists of a single contiguous buffer. The
41     @ref beast::basic_flat_buffer class is provided, which guarantees
42     that the input sequence of the stream buffer will be represented
43     by exactly one contiguous buffer. To ensure the optimum performance
44     of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
45     such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46     Alternatively, the caller may use custom techniques to ensure that
47     the structured portion of the HTTP message (header or chunk header)
48     is contained in a linear buffer.
49 
50     The interface to the parser uses virtual member functions.
51     To use this class, derive your type from @ref basic_parser. When
52     bytes are presented, the implementation will make a series of zero
53     or more calls to virtual functions, which the derived class must
54     implement.
55 
56     Every virtual function must be provided by the derived class,
57     or else a compilation error will be generated. The implementation
58     will make sure that `ec` is clear before each virtual function
59     is invoked. If a virtual function sets an error, it is propagated
60     out of the parser to the caller.
61 
62     @tparam isRequest A `bool` indicating whether the parser will be
63     presented with request or response message.
64 
65     @note If the parser encounters a field value with obs-fold
66     longer than 4 kilobytes in length, an error is generated.
67 */
68 template<bool isRequest>
69 class basic_parser
70     : private detail::basic_parser_base
71 {
72     boost::optional<std::uint64_t>
73         body_limit_ =
74             boost::optional<std::uint64_t>(
75                 default_body_limit(is_request{}));   // max payload body
76     std::uint64_t len_ = 0;                 // size of chunk or body
77     std::uint64_t len0_ = 0;                // content length if known
78     std::unique_ptr<char[]> buf_;           // temp storage
79     std::size_t buf_len_ = 0;               // size of buf_
80     std::size_t skip_ = 0;                  // resume search here
81     std::uint32_t header_limit_ = 8192;     // max header size
82     unsigned short status_ = 0;             // response status
83     state state_ = state::nothing_yet;      // initial state
84     unsigned f_ = 0;                        // flags
85 
86     // limit on the size of the stack flat buffer
87     static std::size_t constexpr max_stack_buffer = 8192;
88 
89     // Message will be complete after reading header
90     static unsigned constexpr flagSkipBody              = 1<<  0;
91 
92     // Consume input buffers across semantic boundaries
93     static unsigned constexpr flagEager                 = 1<<  1;
94 
95     // The parser has read at least one byte
96     static unsigned constexpr flagGotSome               = 1<<  2;
97 
98     // Message semantics indicate a body is expected.
99     // cleared if flagSkipBody set
100     //
101     static unsigned constexpr flagHasBody               = 1<<  3;
102 
103     static unsigned constexpr flagHTTP11                = 1<<  4;
104     static unsigned constexpr flagNeedEOF               = 1<<  5;
105     static unsigned constexpr flagExpectCRLF            = 1<<  6;
106     static unsigned constexpr flagConnectionClose       = 1<<  7;
107     static unsigned constexpr flagConnectionUpgrade     = 1<<  8;
108     static unsigned constexpr flagConnectionKeepAlive   = 1<<  9;
109     static unsigned constexpr flagContentLength         = 1<< 10;
110     static unsigned constexpr flagChunked               = 1<< 11;
111     static unsigned constexpr flagUpgrade               = 1<< 12;
112     static unsigned constexpr flagFinalChunk            = 1<< 13;
113 
114     static constexpr
115     std::uint64_t
default_body_limit(std::true_type)116     default_body_limit(std::true_type)
117     {
118         // limit for requests
119         return 1 * 1024 * 1024; // 1MB
120     }
121 
122     static constexpr
123     std::uint64_t
default_body_limit(std::false_type)124     default_body_limit(std::false_type)
125     {
126         // limit for responses
127         return 8 * 1024 * 1024; // 8MB
128     }
129 
130     template<bool OtherIsRequest>
131     friend class basic_parser;
132 
133     friend class basic_parser_test;
134 
135 protected:
136     /// Default constructor
137     basic_parser() = default;
138 
139     /** Move constructor
140 
141         @note
142 
143         After the move, the only valid operation on the
144         moved-from object is destruction.
145     */
146     basic_parser(basic_parser &&) = default;
147 
148     /// Move assignment
149     basic_parser& operator=(basic_parser &&) = default;
150 
151 public:
152     /// `true` if this parser parses requests, `false` for responses.
153     using is_request =
154         std::integral_constant<bool, isRequest>;
155 
156     /// Destructor
157     virtual ~basic_parser() = default;
158 
159     /// Copy constructor
160     basic_parser(basic_parser const&) = delete;
161 
162     /// Copy assignment
163     basic_parser& operator=(basic_parser const&) = delete;
164 
165     /// Returns `true` if the parser has received at least one byte of input.
166     bool
got_some() const167     got_some() const
168     {
169         return state_ != state::nothing_yet;
170     }
171 
172     /** Returns `true` if the message is complete.
173 
174         The message is complete after the full header is prduced
175         and one of the following is true:
176 
177         @li The skip body option was set.
178 
179         @li The semantics of the message indicate there is no body.
180 
181         @li The semantics of the message indicate a body is expected,
182         and the entire body was parsed.
183     */
184     bool
is_done() const185     is_done() const
186     {
187         return state_ == state::complete;
188     }
189 
190     /** Returns `true` if a the parser has produced the full header.
191     */
192     bool
is_header_done() const193     is_header_done() const
194     {
195         return state_ > state::fields;
196     }
197 
198     /** Returns `true` if the message is an upgrade message.
199 
200         @note The return value is undefined unless
201         @ref is_header_done would return `true`.
202     */
203     bool
upgrade() const204     upgrade() const
205     {
206         return (f_ & flagConnectionUpgrade) != 0;
207     }
208 
209     /** Returns `true` if the last value for Transfer-Encoding is "chunked".
210 
211         @note The return value is undefined unless
212         @ref is_header_done would return `true`.
213     */
214     bool
chunked() const215     chunked() const
216     {
217         return (f_ & flagChunked) != 0;
218     }
219 
220     /** Returns `true` if the message has keep-alive connection semantics.
221 
222         This function always returns `false` if @ref need_eof would return
223         `false`.
224 
225         @note The return value is undefined unless
226         @ref is_header_done would return `true`.
227     */
228     bool
229     keep_alive() const;
230 
231     /** Returns the optional value of Content-Length if known.
232 
233         @note The return value is undefined unless
234         @ref is_header_done would return `true`.
235     */
236     boost::optional<std::uint64_t>
237     content_length() const;
238 
239     /** Returns the remaining content length if known
240 
241         If the message header specifies a Content-Length,
242         the return value will be the number of bytes remaining
243         in the payload body have not yet been parsed.
244 
245         @note The return value is undefined unless
246               @ref is_header_done would return `true`.
247     */
248     boost::optional<std::uint64_t>
249     content_length_remaining() const;
250 
251     /** Returns `true` if the message semantics require an end of file.
252 
253         Depending on the contents of the header, the parser may
254         require and end of file notification to know where the end
255         of the body lies. If this function returns `true` it will be
256         necessary to call @ref put_eof when there will never be additional
257         data from the input.
258     */
259     bool
need_eof() const260     need_eof() const
261     {
262         return (f_ & flagNeedEOF) != 0;
263     }
264 
265     /** Set the limit on the payload body.
266 
267         This function sets the maximum allowed size of the payload body,
268         before any encodings except chunked have been removed. Depending
269         on the message semantics, one of these cases will apply:
270 
271         @li The Content-Length is specified and exceeds the limit. In
272         this case the result @ref error::body_limit is returned
273         immediately after the header is parsed.
274 
275         @li The Content-Length is unspecified and the chunked encoding
276         is not specified as the last encoding. In this case the end of
277         message is determined by the end of file indicator on the
278         associated stream or input source. If a sufficient number of
279         body payload octets are presented to the parser to exceed the
280         configured limit, the parse fails with the result
281         @ref error::body_limit
282 
283         @li The Transfer-Encoding specifies the chunked encoding as the
284         last encoding. In this case, when the number of payload body
285         octets produced by removing the chunked encoding  exceeds
286         the configured limit, the parse fails with the result
287         @ref error::body_limit.
288 
289         Setting the limit after any body octets have been parsed
290         results in undefined behavior.
291 
292         The default limit is 1MB for requests and 8MB for responses.
293 
294         @param v An optional integral value representing the body limit.
295         If this is equal to `boost::none`, then the body limit is disabled.
296     */
297     void
body_limit(boost::optional<std::uint64_t> v)298     body_limit(boost::optional<std::uint64_t> v)
299     {
300         body_limit_ = v;
301     }
302 
303     /** Set a limit on the total size of the header.
304 
305         This function sets the maximum allowed size of the header
306         including all field name, value, and delimiter characters
307         and also including the CRLF sequences in the serialized
308         input. If the end of the header is not found within the
309         limit of the header size, the error @ref error::header_limit
310         is returned by @ref put.
311 
312         Setting the limit after any header octets have been parsed
313         results in undefined behavior.
314     */
315     void
header_limit(std::uint32_t v)316     header_limit(std::uint32_t v)
317     {
318         header_limit_ = v;
319     }
320 
321     /// Returns `true` if the eager parse option is set.
322     bool
eager() const323     eager() const
324     {
325         return (f_ & flagEager) != 0;
326     }
327 
328     /** Set the eager parse option.
329 
330         Normally the parser returns after successfully parsing a structured
331         element (header, chunk header, or chunk body) even if there are octets
332         remaining in the input. This is necessary when attempting to parse the
333         header first, or when the caller wants to inspect information which may
334         be invalidated by subsequent parsing, such as a chunk extension. The
335         `eager` option controls whether the parser keeps going after parsing
336         structured element if there are octets remaining in the buffer and no
337         error occurs. This option is automatically set or cleared during certain
338         stream operations to improve performance with no change in functionality.
339 
340         The default setting is `false`.
341 
342         @param v `true` to set the eager parse option or `false` to disable it.
343     */
344     void
eager(bool v)345     eager(bool v)
346     {
347         if(v)
348             f_ |= flagEager;
349         else
350             f_ &= ~flagEager;
351     }
352 
353     /// Returns `true` if the skip parse option is set.
354     bool
skip() const355     skip() const
356     {
357         return (f_ & flagSkipBody) != 0;
358     }
359 
360     /** Set the skip parse option.
361 
362         This option controls whether or not the parser expects to see an HTTP
363         body, regardless of the presence or absence of certain fields such as
364         Content-Length or a chunked Transfer-Encoding. Depending on the request,
365         some responses do not carry a body. For example, a 200 response to a
366         CONNECT request from a tunneling proxy, or a response to a HEAD request.
367         In these cases, callers may use this function inform the parser that
368         no body is expected. The parser will consider the message complete
369         after the header has been received.
370 
371         @param v `true` to set the skip body option or `false` to disable it.
372 
373         @note This function must called before any bytes are processed.
374     */
375     void
376     skip(bool v);
377 
378     /** Write a buffer sequence to the parser.
379 
380         This function attempts to incrementally parse the HTTP
381         message data stored in the caller provided buffers. Upon
382         success, a positive return value indicates that the parser
383         made forward progress, consuming that number of
384         bytes.
385 
386         In some cases there may be an insufficient number of octets
387         in the input buffer in order to make forward progress. This
388         is indicated by the code @ref error::need_more. When
389         this happens, the caller should place additional bytes into
390         the buffer sequence and call @ref put again.
391 
392         The error code @ref error::need_more is special. When this
393         error is returned, a subsequent call to @ref put may succeed
394         if the buffers have been updated. Otherwise, upon error
395         the parser may not be restarted.
396 
397         @param buffers An object meeting the requirements of
398         <em>ConstBufferSequence</em> that represents the next chunk of
399         message data. If the length of this buffer sequence is
400         one, the implementation will not allocate additional memory.
401         The class @ref beast::basic_flat_buffer is provided as one way to
402         meet this requirement
403 
404         @param ec Set to the error, if any occurred.
405 
406         @return The number of octets consumed in the buffer
407         sequence. The caller should remove these octets even if the
408         error is set.
409     */
410     template<class ConstBufferSequence>
411     std::size_t
412     put(ConstBufferSequence const& buffers, error_code& ec);
413 
414 #if ! BOOST_BEAST_DOXYGEN
415     std::size_t
416     put(net::const_buffer buffer,
417         error_code& ec);
418 #endif
419 
420     /** Inform the parser that the end of stream was reached.
421 
422         In certain cases, HTTP needs to know where the end of
423         the stream is. For example, sometimes servers send
424         responses without Content-Length and expect the client
425         to consume input (for the body) until EOF. Callbacks
426         and errors will still be processed as usual.
427 
428         This is typically called when a read from the
429         underlying stream object sets the error code to
430         `net::error::eof`.
431 
432         @note Only valid after parsing a complete header.
433 
434         @param ec Set to the error, if any occurred.
435     */
436     void
437     put_eof(error_code& ec);
438 
439 protected:
440     /** Called after receiving the request-line.
441 
442         This virtual function is invoked after receiving a request-line
443         when parsing HTTP requests.
444         It can only be called when `isRequest == true`.
445 
446         @param method The verb enumeration. If the method string is not
447         one of the predefined strings, this value will be @ref verb::unknown.
448 
449         @param method_str The unmodified string representing the verb.
450 
451         @param target The request-target.
452 
453         @param version The HTTP-version. This will be 10 for HTTP/1.0,
454         and 11 for HTTP/1.1.
455 
456         @param ec An output parameter which the function may set to indicate
457         an error. The error will be clear before this function is invoked.
458     */
459     virtual
460     void
461     on_request_impl(
462         verb method,
463         string_view method_str,
464         string_view target,
465         int version,
466         error_code& ec) = 0;
467 
468     /** Called after receiving the status-line.
469 
470         This virtual function is invoked after receiving a status-line
471         when parsing HTTP responses.
472         It can only be called when `isRequest == false`.
473 
474         @param code The numeric status code.
475 
476         @param reason The reason-phrase. Note that this value is
477         now obsolete, and only provided for historical or diagnostic
478         purposes.
479 
480         @param version The HTTP-version. This will be 10 for HTTP/1.0,
481         and 11 for HTTP/1.1.
482 
483         @param ec An output parameter which the function may set to indicate
484         an error. The error will be clear before this function is invoked.
485     */
486     virtual
487     void
488     on_response_impl(
489         int code,
490         string_view reason,
491         int version,
492         error_code& ec) = 0;
493 
494     /** Called once for each complete field in the HTTP header.
495 
496         This virtual function is invoked for each field that is received
497         while parsing an HTTP message.
498 
499         @param name The known field enum value. If the name of the field
500         is not recognized, this value will be @ref field::unknown.
501 
502         @param name_string The exact name of the field as received from
503         the input, represented as a string.
504 
505         @param value A string holding the value of the field.
506 
507         @param ec An output parameter which the function may set to indicate
508         an error. The error will be clear before this function is invoked.
509     */
510     virtual
511     void
512     on_field_impl(
513         field name,
514         string_view name_string,
515         string_view value,
516         error_code& ec) = 0;
517 
518     /** Called once after the complete HTTP header is received.
519 
520         This virtual function is invoked once, after the complete HTTP
521         header is received while parsing a message.
522 
523         @param ec An output parameter which the function may set to indicate
524         an error. The error will be clear before this function is invoked.
525     */
526     virtual
527     void
528     on_header_impl(error_code& ec) = 0;
529 
530     /** Called once before the body is processed.
531 
532         This virtual function is invoked once, before the content body is
533         processed (but after the complete header is received).
534 
535         @param content_length A value representing the content length in
536         bytes if the length is known (this can include a zero length).
537         Otherwise, the value will be `boost::none`.
538 
539         @param ec An output parameter which the function may set to indicate
540         an error. The error will be clear before this function is invoked.
541     */
542     virtual
543     void
544     on_body_init_impl(
545         boost::optional<std::uint64_t> const& content_length,
546         error_code& ec) = 0;
547 
548     /** Called each time additional data is received representing the content body.
549 
550         This virtual function is invoked for each piece of the body which is
551         received while parsing of a message. This function is only used when
552         no chunked transfer encoding is present.
553 
554         @param body A string holding the additional body contents. This may
555         contain nulls or unprintable characters.
556 
557         @param ec An output parameter which the function may set to indicate
558         an error. The error will be clear before this function is invoked.
559 
560         @see on_chunk_body_impl
561     */
562     virtual
563     std::size_t
564     on_body_impl(
565         string_view body,
566         error_code& ec) = 0;
567 
568     /** Called each time a new chunk header of a chunk encoded body is received.
569 
570         This function is invoked each time a new chunk header is received.
571         The function is only used when the chunked transfer encoding is present.
572 
573         @param size The size of this chunk, in bytes.
574 
575         @param extensions A string containing the entire chunk extensions.
576         This may be empty, indicating no extensions are present.
577 
578         @param ec An output parameter which the function may set to indicate
579         an error. The error will be clear before this function is invoked.
580     */
581     virtual
582     void
583     on_chunk_header_impl(
584         std::uint64_t size,
585         string_view extensions,
586         error_code& ec) = 0;
587 
588     /** Called each time additional data is received representing part of a body chunk.
589 
590         This virtual function is invoked for each piece of the body which is
591         received while parsing of a message. This function is only used when
592         no chunked transfer encoding is present.
593 
594         @param remain The number of bytes remaining in this chunk. This includes
595         the contents of passed `body`. If this value is zero, then this represents
596         the final chunk.
597 
598         @param body A string holding the additional body contents. This may
599         contain nulls or unprintable characters.
600 
601         @param ec An output parameter which the function may set to indicate
602         an error. The error will be clear before this function is invoked.
603 
604         @return This function should return the number of bytes actually consumed
605         from the `body` value. Any bytes that are not consumed on this call
606         will be presented in a subsequent call.
607 
608         @see on_body_impl
609     */
610     virtual
611     std::size_t
612     on_chunk_body_impl(
613         std::uint64_t remain,
614         string_view body,
615         error_code& ec) = 0;
616 
617     /** Called once when the complete message is received.
618 
619         This virtual function is invoked once, after successfully parsing
620         a complete HTTP message.
621 
622         @param ec An output parameter which the function may set to indicate
623         an error. The error will be clear before this function is invoked.
624     */
625     virtual
626     void
627     on_finish_impl(error_code& ec) = 0;
628 
629 private:
630 
631     boost::optional<std::uint64_t>
632     content_length_unchecked() const;
633 
634     template<class ConstBufferSequence>
635     std::size_t
636     put_from_stack(
637         std::size_t size,
638         ConstBufferSequence const& buffers,
639         error_code& ec);
640 
641     void
642     maybe_need_more(
643         char const* p, std::size_t n,
644             error_code& ec);
645 
646     void
647     parse_start_line(
648         char const*& p, char const* last,
649             error_code& ec, std::true_type);
650 
651     void
652     parse_start_line(
653         char const*& p, char const* last,
654             error_code& ec, std::false_type);
655 
656     void
657     parse_fields(
658         char const*& p, char const* last,
659             error_code& ec);
660 
661     void
662     finish_header(
663         error_code& ec, std::true_type);
664 
665     void
666     finish_header(
667         error_code& ec, std::false_type);
668 
669     void
670     parse_body(char const*& p,
671         std::size_t n, error_code& ec);
672 
673     void
674     parse_body_to_eof(char const*& p,
675         std::size_t n, error_code& ec);
676 
677     void
678     parse_chunk_header(char const*& p,
679         std::size_t n, error_code& ec);
680 
681     void
682     parse_chunk_body(char const*& p,
683         std::size_t n, error_code& ec);
684 
685     void
686     do_field(field f,
687         string_view value, error_code& ec);
688 };
689 
690 } // http
691 } // beast
692 } // boost
693 
694 #include <boost/beast/http/impl/basic_parser.hpp>
695 #ifdef BOOST_BEAST_HEADER_ONLY
696 #include <boost/beast/http/impl/basic_parser.ipp>
697 #endif
698 
699 #endif
700