• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright Joyent, Inc. and other Node contributors.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
21 #include "http_parser.h"
22 #include <assert.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <limits.h>
27 
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29 
30 #ifndef ULLONG_MAX
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32 #endif
33 
34 #ifndef MIN
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
36 #endif
37 
38 #ifndef ARRAY_SIZE
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40 #endif
41 
42 #ifndef BIT_AT
43 # define BIT_AT(a, i)                                                \
44   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
45    (1 << ((unsigned int) (i) & 7))))
46 #endif
47 
48 #ifndef ELEM_AT
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50 #endif
51 
52 #define SET_ERRNO(e)                                                 \
53 do {                                                                 \
54   parser->nread = nread;                                             \
55   parser->http_errno = (e);                                          \
56 } while(0)
57 
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
60 #define RETURN(V)                                                    \
61 do {                                                                 \
62   parser->nread = nread;                                             \
63   parser->state = CURRENT_STATE();                                   \
64   return (V);                                                        \
65 } while (0);
66 #define REEXECUTE()                                                  \
67   goto reexecute;                                                    \
68 
69 
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77 
78 
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
81 do {                                                                 \
82   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83                                                                      \
84   if (LIKELY(settings->on_##FOR)) {                                  \
85     parser->state = CURRENT_STATE();                                 \
86     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87       SET_ERRNO(HPE_CB_##FOR);                                       \
88     }                                                                \
89     UPDATE_STATE(parser->state);                                     \
90                                                                      \
91     /* We either errored above or got paused; get out */             \
92     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93       return (ER);                                                   \
94     }                                                                \
95   }                                                                  \
96 } while (0)
97 
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100 
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103 
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106 do {                                                                 \
107   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108                                                                      \
109   if (FOR##_mark) {                                                  \
110     if (LIKELY(settings->on_##FOR)) {                                \
111       parser->state = CURRENT_STATE();                               \
112       if (UNLIKELY(0 !=                                              \
113                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114         SET_ERRNO(HPE_CB_##FOR);                                     \
115       }                                                              \
116       UPDATE_STATE(parser->state);                                   \
117                                                                      \
118       /* We either errored above or got paused; get out */           \
119       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120         return (ER);                                                 \
121       }                                                              \
122     }                                                                \
123     FOR##_mark = NULL;                                               \
124   }                                                                  \
125 } while (0)
126 
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR)                                           \
129     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130 
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134 
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR)                                                    \
137 do {                                                                 \
138   if (!FOR##_mark) {                                                 \
139     FOR##_mark = p;                                                  \
140   }                                                                  \
141 } while (0)
142 
143 /* Don't allow the total size of the HTTP headers (including the status
144  * line) to exceed max_header_size.  This check is here to protect
145  * embedders against denial-of-service attacks where the attacker feeds
146  * us a never-ending header that the embedder keeps buffering.
147  *
148  * This check is arguably the responsibility of embedders but we're doing
149  * it on the embedder's behalf because most won't bother and this way we
150  * make the web a little safer.  max_header_size is still far bigger
151  * than any reasonable request or response so this should never affect
152  * day-to-day operation.
153  */
154 #define COUNT_HEADER_SIZE(V)                                         \
155 do {                                                                 \
156   nread += (uint32_t)(V);                                            \
157   if (UNLIKELY(nread > max_header_size)) {                           \
158     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159     goto error;                                                      \
160   }                                                                  \
161 } while (0)
162 
163 
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172 
173 
174 static const char *method_strings[] =
175   {
176 #define XX(num, name, string) #string,
177   HTTP_METHOD_MAP(XX)
178 #undef XX
179   };
180 
181 
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183  *        token       = 1*<any CHAR except CTLs or separators>
184  *     separators     = "(" | ")" | "<" | ">" | "@"
185  *                    | "," | ";" | ":" | "\" | <">
186  *                    | "/" | "[" | "]" | "?" | "="
187  *                    | "{" | "}" | SP | HT
188  */
189 static const char tokens[256] = {
190 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191         0,       0,       0,       0,       0,       0,       0,       0,
192 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193         0,       0,       0,       0,       0,       0,       0,       0,
194 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195         0,       0,       0,       0,       0,       0,       0,       0,
196 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197         0,       0,       0,       0,       0,       0,       0,       0,
198 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199        ' ',     '!',      0,      '#',     '$',     '%',     '&',    '\'',
200 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201         0,       0,      '*',     '+',      0,      '-',     '.',      0,
202 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205        '8',     '9',      0,       0,       0,       0,       0,       0,
206 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222 
223 
224 static const int8_t unhex[256] =
225   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233   };
234 
235 
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241 
242 
243 static const uint8_t normal_url_char[32] = {
244 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276 
277 #undef T
278 
279 enum state
280   { s_dead = 1 /* important that this is > 0 */
281 
282   , s_start_req_or_res
283   , s_res_or_resp_H
284   , s_start_res
285   , s_res_H
286   , s_res_HT
287   , s_res_HTT
288   , s_res_HTTP
289   , s_res_http_major
290   , s_res_http_dot
291   , s_res_http_minor
292   , s_res_http_end
293   , s_res_first_status_code
294   , s_res_status_code
295   , s_res_status_start
296   , s_res_status
297   , s_res_line_almost_done
298 
299   , s_start_req
300 
301   , s_req_method
302   , s_req_spaces_before_url
303   , s_req_schema
304   , s_req_schema_slash
305   , s_req_schema_slash_slash
306   , s_req_server_start
307   , s_req_server
308   , s_req_server_with_at
309   , s_req_path
310   , s_req_query_string_start
311   , s_req_query_string
312   , s_req_fragment_start
313   , s_req_fragment
314   , s_req_http_start
315   , s_req_http_H
316   , s_req_http_HT
317   , s_req_http_HTT
318   , s_req_http_HTTP
319   , s_req_http_I
320   , s_req_http_IC
321   , s_req_http_major
322   , s_req_http_dot
323   , s_req_http_minor
324   , s_req_http_end
325   , s_req_line_almost_done
326 
327   , s_header_field_start
328   , s_header_field
329   , s_header_value_discard_ws
330   , s_header_value_discard_ws_almost_done
331   , s_header_value_discard_lws
332   , s_header_value_start
333   , s_header_value
334   , s_header_value_lws
335 
336   , s_header_almost_done
337 
338   , s_chunk_size_start
339   , s_chunk_size
340   , s_chunk_parameters
341   , s_chunk_size_almost_done
342 
343   , s_headers_almost_done
344   , s_headers_done
345 
346   /* Important: 's_headers_done' must be the last 'header' state. All
347    * states beyond this must be 'body' states. It is used for overflow
348    * checking. See the PARSING_HEADER() macro.
349    */
350 
351   , s_chunk_data
352   , s_chunk_data_almost_done
353   , s_chunk_data_done
354 
355   , s_body_identity
356   , s_body_identity_eof
357 
358   , s_message_done
359   };
360 
361 
362 #define PARSING_HEADER(state) (state <= s_headers_done)
363 
364 
365 enum header_states
366   { h_general = 0
367   , h_C
368   , h_CO
369   , h_CON
370 
371   , h_matching_connection
372   , h_matching_proxy_connection
373   , h_matching_content_length
374   , h_matching_transfer_encoding
375   , h_matching_upgrade
376 
377   , h_connection
378   , h_content_length
379   , h_content_length_num
380   , h_content_length_ws
381   , h_transfer_encoding
382   , h_upgrade
383 
384   , h_matching_transfer_encoding_token_start
385   , h_matching_transfer_encoding_chunked
386   , h_matching_transfer_encoding_token
387 
388   , h_matching_connection_token_start
389   , h_matching_connection_keep_alive
390   , h_matching_connection_close
391   , h_matching_connection_upgrade
392   , h_matching_connection_token
393 
394   , h_transfer_encoding_chunked
395   , h_connection_keep_alive
396   , h_connection_close
397   , h_connection_upgrade
398   };
399 
400 enum http_host_state
401   {
402     s_http_host_dead = 1
403   , s_http_userinfo_start
404   , s_http_userinfo
405   , s_http_host_start
406   , s_http_host_v6_start
407   , s_http_host
408   , s_http_host_v6
409   , s_http_host_v6_end
410   , s_http_host_v6_zone_start
411   , s_http_host_v6_zone
412   , s_http_host_port_start
413   , s_http_host_port
414 };
415 
416 /* Macros for character classes; depends on strict-mode  */
417 #define CR                  '\r'
418 #define LF                  '\n'
419 #define LOWER(c)            (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
425   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426   (c) == ')')
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429   (c) == '$' || (c) == ',')
430 
431 #define STRICT_TOKEN(c)     ((c == ' ') ? 0 : tokens[(unsigned char)c])
432 
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c)            STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437 #else
438 #define TOKEN(c)            tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c)                                                         \
440   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c)                                                        \
442   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443 #endif
444 
445 /**
446  * Verify that a char is a valid visible (printable) US-ASCII
447  * character or %x80-FF
448  **/
449 #define IS_HEADER_CHAR(ch)                                                     \
450   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451 
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453 
454 
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond)                                          \
457 do {                                                                 \
458   if (cond) {                                                        \
459     SET_ERRNO(HPE_STRICT);                                           \
460     goto error;                                                      \
461   }                                                                  \
462 } while (0)
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464 #else
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
467 #endif
468 
469 
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472 static struct {
473   const char *name;
474   const char *description;
475 } http_strerror_tab[] = {
476   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477 };
478 #undef HTTP_STRERROR_GEN
479 
480 int http_message_needs_eof(const http_parser *parser);
481 
482 /* Our URL parser.
483  *
484  * This is designed to be shared by http_parser_execute() for URL validation,
485  * hence it has a state transition + byte-for-byte interface. In addition, it
486  * is meant to be embedded in http_parser_parse_url(), which does the dirty
487  * work of turning state transitions URL components for its API.
488  *
489  * This function should only be invoked with non-space characters. It is
490  * assumed that the caller cares about (and can detect) the transition between
491  * URL and non-URL states by looking for these.
492  */
493 static enum state
parse_url_char(enum state s,const char ch)494 parse_url_char(enum state s, const char ch)
495 {
496   if (ch == ' ' || ch == '\r' || ch == '\n') {
497     return s_dead;
498   }
499 
500 #if HTTP_PARSER_STRICT
501   if (ch == '\t' || ch == '\f') {
502     return s_dead;
503   }
504 #endif
505 
506   switch (s) {
507     case s_req_spaces_before_url:
508       /* Proxied requests are followed by scheme of an absolute URI (alpha).
509        * All methods except CONNECT are followed by '/' or '*'.
510        */
511 
512       if (ch == '/' || ch == '*') {
513         return s_req_path;
514       }
515 
516       if (IS_ALPHA(ch)) {
517         return s_req_schema;
518       }
519 
520       break;
521 
522     case s_req_schema:
523       if (IS_ALPHA(ch)) {
524         return s;
525       }
526 
527       if (ch == ':') {
528         return s_req_schema_slash;
529       }
530 
531       break;
532 
533     case s_req_schema_slash:
534       if (ch == '/') {
535         return s_req_schema_slash_slash;
536       }
537 
538       break;
539 
540     case s_req_schema_slash_slash:
541       if (ch == '/') {
542         return s_req_server_start;
543       }
544 
545       break;
546 
547     case s_req_server_with_at:
548       if (ch == '@') {
549         return s_dead;
550       }
551 
552     /* fall through */
553     case s_req_server_start:
554     case s_req_server:
555       if (ch == '/') {
556         return s_req_path;
557       }
558 
559       if (ch == '?') {
560         return s_req_query_string_start;
561       }
562 
563       if (ch == '@') {
564         return s_req_server_with_at;
565       }
566 
567       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568         return s_req_server;
569       }
570 
571       break;
572 
573     case s_req_path:
574       if (IS_URL_CHAR(ch)) {
575         return s;
576       }
577 
578       switch (ch) {
579         case '?':
580           return s_req_query_string_start;
581 
582         case '#':
583           return s_req_fragment_start;
584       }
585 
586       break;
587 
588     case s_req_query_string_start:
589     case s_req_query_string:
590       if (IS_URL_CHAR(ch)) {
591         return s_req_query_string;
592       }
593 
594       switch (ch) {
595         case '?':
596           /* allow extra '?' in query string */
597           return s_req_query_string;
598 
599         case '#':
600           return s_req_fragment_start;
601       }
602 
603       break;
604 
605     case s_req_fragment_start:
606       if (IS_URL_CHAR(ch)) {
607         return s_req_fragment;
608       }
609 
610       switch (ch) {
611         case '?':
612           return s_req_fragment;
613 
614         case '#':
615           return s;
616       }
617 
618       break;
619 
620     case s_req_fragment:
621       if (IS_URL_CHAR(ch)) {
622         return s;
623       }
624 
625       switch (ch) {
626         case '?':
627         case '#':
628           return s;
629       }
630 
631       break;
632 
633     default:
634       break;
635   }
636 
637   /* We should never fall out of the switch above unless there's an error */
638   return s_dead;
639 }
640 
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)641 size_t http_parser_execute (http_parser *parser,
642                             const http_parser_settings *settings,
643                             const char *data,
644                             size_t len)
645 {
646   char c, ch;
647   int8_t unhex_val;
648   const char *p = data;
649   const char *header_field_mark = 0;
650   const char *header_value_mark = 0;
651   const char *url_mark = 0;
652   const char *body_mark = 0;
653   const char *status_mark = 0;
654   enum state p_state = (enum state) parser->state;
655   const unsigned int lenient = parser->lenient_http_headers;
656   const unsigned int allow_chunked_length = parser->allow_chunked_length;
657 
658   uint32_t nread = parser->nread;
659 
660   /* We're in an error state. Don't bother doing anything. */
661   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
662     return 0;
663   }
664 
665   if (len == 0) {
666     switch (CURRENT_STATE()) {
667       case s_body_identity_eof:
668         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669          * we got paused.
670          */
671         CALLBACK_NOTIFY_NOADVANCE(message_complete);
672         return 0;
673 
674       case s_dead:
675       case s_start_req_or_res:
676       case s_start_res:
677       case s_start_req:
678         return 0;
679 
680       default:
681         SET_ERRNO(HPE_INVALID_EOF_STATE);
682         return 1;
683     }
684   }
685 
686 
687   if (CURRENT_STATE() == s_header_field)
688     header_field_mark = data;
689   if (CURRENT_STATE() == s_header_value)
690     header_value_mark = data;
691   switch (CURRENT_STATE()) {
692   case s_req_path:
693   case s_req_schema:
694   case s_req_schema_slash:
695   case s_req_schema_slash_slash:
696   case s_req_server_start:
697   case s_req_server:
698   case s_req_server_with_at:
699   case s_req_query_string_start:
700   case s_req_query_string:
701   case s_req_fragment_start:
702   case s_req_fragment:
703     url_mark = data;
704     break;
705   case s_res_status:
706     status_mark = data;
707     break;
708   default:
709     break;
710   }
711 
712   for (p=data; p != data + len; p++) {
713     ch = *p;
714 
715     if (PARSING_HEADER(CURRENT_STATE()))
716       COUNT_HEADER_SIZE(1);
717 
718 reexecute:
719     switch (CURRENT_STATE()) {
720 
721       case s_dead:
722         /* this state is used after a 'Connection: close' message
723          * the parser will error out if it reads another message
724          */
725         if (LIKELY(ch == CR || ch == LF))
726           break;
727 
728         SET_ERRNO(HPE_CLOSED_CONNECTION);
729         goto error;
730 
731       case s_start_req_or_res:
732       {
733         if (ch == CR || ch == LF)
734           break;
735         parser->flags = 0;
736         parser->uses_transfer_encoding = 0;
737         parser->content_length = ULLONG_MAX;
738 
739         if (ch == 'H') {
740           UPDATE_STATE(s_res_or_resp_H);
741 
742           CALLBACK_NOTIFY(message_begin);
743         } else {
744           parser->type = HTTP_REQUEST;
745           UPDATE_STATE(s_start_req);
746           REEXECUTE();
747         }
748 
749         break;
750       }
751 
752       case s_res_or_resp_H:
753         if (ch == 'T') {
754           parser->type = HTTP_RESPONSE;
755           UPDATE_STATE(s_res_HT);
756         } else {
757           if (UNLIKELY(ch != 'E')) {
758             SET_ERRNO(HPE_INVALID_CONSTANT);
759             goto error;
760           }
761 
762           parser->type = HTTP_REQUEST;
763           parser->method = HTTP_HEAD;
764           parser->index = 2;
765           UPDATE_STATE(s_req_method);
766         }
767         break;
768 
769       case s_start_res:
770       {
771         if (ch == CR || ch == LF)
772           break;
773         parser->flags = 0;
774         parser->uses_transfer_encoding = 0;
775         parser->content_length = ULLONG_MAX;
776 
777         if (ch == 'H') {
778           UPDATE_STATE(s_res_H);
779         } else {
780           SET_ERRNO(HPE_INVALID_CONSTANT);
781           goto error;
782         }
783 
784         CALLBACK_NOTIFY(message_begin);
785         break;
786       }
787 
788       case s_res_H:
789         STRICT_CHECK(ch != 'T');
790         UPDATE_STATE(s_res_HT);
791         break;
792 
793       case s_res_HT:
794         STRICT_CHECK(ch != 'T');
795         UPDATE_STATE(s_res_HTT);
796         break;
797 
798       case s_res_HTT:
799         STRICT_CHECK(ch != 'P');
800         UPDATE_STATE(s_res_HTTP);
801         break;
802 
803       case s_res_HTTP:
804         STRICT_CHECK(ch != '/');
805         UPDATE_STATE(s_res_http_major);
806         break;
807 
808       case s_res_http_major:
809         if (UNLIKELY(!IS_NUM(ch))) {
810           SET_ERRNO(HPE_INVALID_VERSION);
811           goto error;
812         }
813 
814         parser->http_major = ch - '0';
815         UPDATE_STATE(s_res_http_dot);
816         break;
817 
818       case s_res_http_dot:
819       {
820         if (UNLIKELY(ch != '.')) {
821           SET_ERRNO(HPE_INVALID_VERSION);
822           goto error;
823         }
824 
825         UPDATE_STATE(s_res_http_minor);
826         break;
827       }
828 
829       case s_res_http_minor:
830         if (UNLIKELY(!IS_NUM(ch))) {
831           SET_ERRNO(HPE_INVALID_VERSION);
832           goto error;
833         }
834 
835         parser->http_minor = ch - '0';
836         UPDATE_STATE(s_res_http_end);
837         break;
838 
839       case s_res_http_end:
840       {
841         if (UNLIKELY(ch != ' ')) {
842           SET_ERRNO(HPE_INVALID_VERSION);
843           goto error;
844         }
845 
846         UPDATE_STATE(s_res_first_status_code);
847         break;
848       }
849 
850       case s_res_first_status_code:
851       {
852         if (!IS_NUM(ch)) {
853           if (ch == ' ') {
854             break;
855           }
856 
857           SET_ERRNO(HPE_INVALID_STATUS);
858           goto error;
859         }
860         parser->status_code = ch - '0';
861         UPDATE_STATE(s_res_status_code);
862         break;
863       }
864 
865       case s_res_status_code:
866       {
867         if (!IS_NUM(ch)) {
868           switch (ch) {
869             case ' ':
870               UPDATE_STATE(s_res_status_start);
871               break;
872             case CR:
873             case LF:
874               UPDATE_STATE(s_res_status_start);
875               REEXECUTE();
876               break;
877             default:
878               SET_ERRNO(HPE_INVALID_STATUS);
879               goto error;
880           }
881           break;
882         }
883 
884         parser->status_code *= 10;
885         parser->status_code += ch - '0';
886 
887         if (UNLIKELY(parser->status_code > 999)) {
888           SET_ERRNO(HPE_INVALID_STATUS);
889           goto error;
890         }
891 
892         break;
893       }
894 
895       case s_res_status_start:
896       {
897         MARK(status);
898         UPDATE_STATE(s_res_status);
899         parser->index = 0;
900 
901         if (ch == CR || ch == LF)
902           REEXECUTE();
903 
904         break;
905       }
906 
907       case s_res_status:
908         if (ch == CR) {
909           UPDATE_STATE(s_res_line_almost_done);
910           CALLBACK_DATA(status);
911           break;
912         }
913 
914         if (ch == LF) {
915           UPDATE_STATE(s_header_field_start);
916           CALLBACK_DATA(status);
917           break;
918         }
919 
920         break;
921 
922       case s_res_line_almost_done:
923         STRICT_CHECK(ch != LF);
924         UPDATE_STATE(s_header_field_start);
925         break;
926 
927       case s_start_req:
928       {
929         if (ch == CR || ch == LF)
930           break;
931         parser->flags = 0;
932         parser->uses_transfer_encoding = 0;
933         parser->content_length = ULLONG_MAX;
934 
935         if (UNLIKELY(!IS_ALPHA(ch))) {
936           SET_ERRNO(HPE_INVALID_METHOD);
937           goto error;
938         }
939 
940         parser->method = (enum http_method) 0;
941         parser->index = 1;
942         switch (ch) {
943           case 'A': parser->method = HTTP_ACL; break;
944           case 'B': parser->method = HTTP_BIND; break;
945           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
946           case 'D': parser->method = HTTP_DELETE; break;
947           case 'G': parser->method = HTTP_GET; break;
948           case 'H': parser->method = HTTP_HEAD; break;
949           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
951           case 'N': parser->method = HTTP_NOTIFY; break;
952           case 'O': parser->method = HTTP_OPTIONS; break;
953           case 'P': parser->method = HTTP_POST;
954             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
955             break;
956           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
958           case 'T': parser->method = HTTP_TRACE; break;
959           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
960           default:
961             SET_ERRNO(HPE_INVALID_METHOD);
962             goto error;
963         }
964         UPDATE_STATE(s_req_method);
965 
966         CALLBACK_NOTIFY(message_begin);
967 
968         break;
969       }
970 
971       case s_req_method:
972       {
973         const char *matcher;
974         if (UNLIKELY(ch == '\0')) {
975           SET_ERRNO(HPE_INVALID_METHOD);
976           goto error;
977         }
978 
979         matcher = method_strings[parser->method];
980         if (ch == ' ' && matcher[parser->index] == '\0') {
981           UPDATE_STATE(s_req_spaces_before_url);
982         } else if (ch == matcher[parser->index]) {
983           ; /* nada */
984         } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985 
986           switch (parser->method << 16 | parser->index << 8 | ch) {
987 #define XX(meth, pos, ch, new_meth) \
988             case (HTTP_##meth << 16 | pos << 8 | ch): \
989               parser->method = HTTP_##new_meth; break;
990 
991             XX(POST,      1, 'U', PUT)
992             XX(POST,      1, 'A', PATCH)
993             XX(POST,      1, 'R', PROPFIND)
994             XX(PUT,       2, 'R', PURGE)
995             XX(CONNECT,   1, 'H', CHECKOUT)
996             XX(CONNECT,   2, 'P', COPY)
997             XX(MKCOL,     1, 'O', MOVE)
998             XX(MKCOL,     1, 'E', MERGE)
999             XX(MKCOL,     1, '-', MSEARCH)
1000             XX(MKCOL,     2, 'A', MKACTIVITY)
1001             XX(MKCOL,     3, 'A', MKCALENDAR)
1002             XX(SUBSCRIBE, 1, 'E', SEARCH)
1003             XX(SUBSCRIBE, 1, 'O', SOURCE)
1004             XX(REPORT,    2, 'B', REBIND)
1005             XX(PROPFIND,  4, 'P', PROPPATCH)
1006             XX(LOCK,      1, 'I', LINK)
1007             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1008             XX(UNLOCK,    2, 'B', UNBIND)
1009             XX(UNLOCK,    3, 'I', UNLINK)
1010 #undef XX
1011             default:
1012               SET_ERRNO(HPE_INVALID_METHOD);
1013               goto error;
1014           }
1015         } else {
1016           SET_ERRNO(HPE_INVALID_METHOD);
1017           goto error;
1018         }
1019 
1020         ++parser->index;
1021         break;
1022       }
1023 
1024       case s_req_spaces_before_url:
1025       {
1026         if (ch == ' ') break;
1027 
1028         MARK(url);
1029         if (parser->method == HTTP_CONNECT) {
1030           UPDATE_STATE(s_req_server_start);
1031         }
1032 
1033         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1035           SET_ERRNO(HPE_INVALID_URL);
1036           goto error;
1037         }
1038 
1039         break;
1040       }
1041 
1042       case s_req_schema:
1043       case s_req_schema_slash:
1044       case s_req_schema_slash_slash:
1045       case s_req_server_start:
1046       {
1047         switch (ch) {
1048           /* No whitespace allowed here */
1049           case ' ':
1050           case CR:
1051           case LF:
1052             SET_ERRNO(HPE_INVALID_URL);
1053             goto error;
1054           default:
1055             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1057               SET_ERRNO(HPE_INVALID_URL);
1058               goto error;
1059             }
1060         }
1061 
1062         break;
1063       }
1064 
1065       case s_req_server:
1066       case s_req_server_with_at:
1067       case s_req_path:
1068       case s_req_query_string_start:
1069       case s_req_query_string:
1070       case s_req_fragment_start:
1071       case s_req_fragment:
1072       {
1073         switch (ch) {
1074           case ' ':
1075             UPDATE_STATE(s_req_http_start);
1076             CALLBACK_DATA(url);
1077             break;
1078           case CR:
1079           case LF:
1080             parser->http_major = 0;
1081             parser->http_minor = 9;
1082             UPDATE_STATE((ch == CR) ?
1083               s_req_line_almost_done :
1084               s_header_field_start);
1085             CALLBACK_DATA(url);
1086             break;
1087           default:
1088             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1090               SET_ERRNO(HPE_INVALID_URL);
1091               goto error;
1092             }
1093         }
1094         break;
1095       }
1096 
1097       case s_req_http_start:
1098         switch (ch) {
1099           case ' ':
1100             break;
1101           case 'H':
1102             UPDATE_STATE(s_req_http_H);
1103             break;
1104           case 'I':
1105             if (parser->method == HTTP_SOURCE) {
1106               UPDATE_STATE(s_req_http_I);
1107               break;
1108             }
1109             /* fall through */
1110           default:
1111             SET_ERRNO(HPE_INVALID_CONSTANT);
1112             goto error;
1113         }
1114         break;
1115 
1116       case s_req_http_H:
1117         STRICT_CHECK(ch != 'T');
1118         UPDATE_STATE(s_req_http_HT);
1119         break;
1120 
1121       case s_req_http_HT:
1122         STRICT_CHECK(ch != 'T');
1123         UPDATE_STATE(s_req_http_HTT);
1124         break;
1125 
1126       case s_req_http_HTT:
1127         STRICT_CHECK(ch != 'P');
1128         UPDATE_STATE(s_req_http_HTTP);
1129         break;
1130 
1131       case s_req_http_I:
1132         STRICT_CHECK(ch != 'C');
1133         UPDATE_STATE(s_req_http_IC);
1134         break;
1135 
1136       case s_req_http_IC:
1137         STRICT_CHECK(ch != 'E');
1138         UPDATE_STATE(s_req_http_HTTP);  /* Treat "ICE" as "HTTP". */
1139         break;
1140 
1141       case s_req_http_HTTP:
1142         STRICT_CHECK(ch != '/');
1143         UPDATE_STATE(s_req_http_major);
1144         break;
1145 
1146       case s_req_http_major:
1147         if (UNLIKELY(!IS_NUM(ch))) {
1148           SET_ERRNO(HPE_INVALID_VERSION);
1149           goto error;
1150         }
1151 
1152         parser->http_major = ch - '0';
1153         UPDATE_STATE(s_req_http_dot);
1154         break;
1155 
1156       case s_req_http_dot:
1157       {
1158         if (UNLIKELY(ch != '.')) {
1159           SET_ERRNO(HPE_INVALID_VERSION);
1160           goto error;
1161         }
1162 
1163         UPDATE_STATE(s_req_http_minor);
1164         break;
1165       }
1166 
1167       case s_req_http_minor:
1168         if (UNLIKELY(!IS_NUM(ch))) {
1169           SET_ERRNO(HPE_INVALID_VERSION);
1170           goto error;
1171         }
1172 
1173         parser->http_minor = ch - '0';
1174         UPDATE_STATE(s_req_http_end);
1175         break;
1176 
1177       case s_req_http_end:
1178       {
1179         if (ch == CR) {
1180           UPDATE_STATE(s_req_line_almost_done);
1181           break;
1182         }
1183 
1184         if (ch == LF) {
1185           UPDATE_STATE(s_header_field_start);
1186           break;
1187         }
1188 
1189         SET_ERRNO(HPE_INVALID_VERSION);
1190         goto error;
1191         break;
1192       }
1193 
1194       /* end of request line */
1195       case s_req_line_almost_done:
1196       {
1197         if (UNLIKELY(ch != LF)) {
1198           SET_ERRNO(HPE_LF_EXPECTED);
1199           goto error;
1200         }
1201 
1202         UPDATE_STATE(s_header_field_start);
1203         break;
1204       }
1205 
1206       case s_header_field_start:
1207       {
1208         if (ch == CR) {
1209           UPDATE_STATE(s_headers_almost_done);
1210           break;
1211         }
1212 
1213         if (ch == LF) {
1214           /* they might be just sending \n instead of \r\n so this would be
1215            * the second \n to denote the end of headers*/
1216           UPDATE_STATE(s_headers_almost_done);
1217           REEXECUTE();
1218         }
1219 
1220         c = TOKEN(ch);
1221 
1222         if (UNLIKELY(!c)) {
1223           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1224           goto error;
1225         }
1226 
1227         MARK(header_field);
1228 
1229         parser->index = 0;
1230         UPDATE_STATE(s_header_field);
1231 
1232         switch (c) {
1233           case 'c':
1234             parser->header_state = h_C;
1235             break;
1236 
1237           case 'p':
1238             parser->header_state = h_matching_proxy_connection;
1239             break;
1240 
1241           case 't':
1242             parser->header_state = h_matching_transfer_encoding;
1243             break;
1244 
1245           case 'u':
1246             parser->header_state = h_matching_upgrade;
1247             break;
1248 
1249           default:
1250             parser->header_state = h_general;
1251             break;
1252         }
1253         break;
1254       }
1255 
1256       case s_header_field:
1257       {
1258         const char* start = p;
1259         for (; p != data + len; p++) {
1260           ch = *p;
1261           c = TOKEN(ch);
1262 
1263           if (!c)
1264             break;
1265 
1266           switch (parser->header_state) {
1267             case h_general: {
1268               size_t left = data + len - p;
1269               const char* pe = p + MIN(left, max_header_size);
1270               while (p+1 < pe && TOKEN(p[1])) {
1271                 p++;
1272               }
1273               break;
1274             }
1275 
1276             case h_C:
1277               parser->index++;
1278               parser->header_state = (c == 'o' ? h_CO : h_general);
1279               break;
1280 
1281             case h_CO:
1282               parser->index++;
1283               parser->header_state = (c == 'n' ? h_CON : h_general);
1284               break;
1285 
1286             case h_CON:
1287               parser->index++;
1288               switch (c) {
1289                 case 'n':
1290                   parser->header_state = h_matching_connection;
1291                   break;
1292                 case 't':
1293                   parser->header_state = h_matching_content_length;
1294                   break;
1295                 default:
1296                   parser->header_state = h_general;
1297                   break;
1298               }
1299               break;
1300 
1301             /* connection */
1302 
1303             case h_matching_connection:
1304               parser->index++;
1305               if (parser->index > sizeof(CONNECTION)-1
1306                   || c != CONNECTION[parser->index]) {
1307                 parser->header_state = h_general;
1308               } else if (parser->index == sizeof(CONNECTION)-2) {
1309                 parser->header_state = h_connection;
1310               }
1311               break;
1312 
1313             /* proxy-connection */
1314 
1315             case h_matching_proxy_connection:
1316               parser->index++;
1317               if (parser->index > sizeof(PROXY_CONNECTION)-1
1318                   || c != PROXY_CONNECTION[parser->index]) {
1319                 parser->header_state = h_general;
1320               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1321                 parser->header_state = h_connection;
1322               }
1323               break;
1324 
1325             /* content-length */
1326 
1327             case h_matching_content_length:
1328               parser->index++;
1329               if (parser->index > sizeof(CONTENT_LENGTH)-1
1330                   || c != CONTENT_LENGTH[parser->index]) {
1331                 parser->header_state = h_general;
1332               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1333                 parser->header_state = h_content_length;
1334               }
1335               break;
1336 
1337             /* transfer-encoding */
1338 
1339             case h_matching_transfer_encoding:
1340               parser->index++;
1341               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1342                   || c != TRANSFER_ENCODING[parser->index]) {
1343                 parser->header_state = h_general;
1344               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1345                 parser->header_state = h_transfer_encoding;
1346                 parser->uses_transfer_encoding = 1;
1347 
1348                 /* Multiple `Transfer-Encoding` headers should be treated as
1349                  * one, but with values separate by a comma.
1350                  *
1351                  * See: https://tools.ietf.org/html/rfc7230#section-3.2.2
1352                  */
1353                 parser->flags &= ~F_CHUNKED;
1354               }
1355               break;
1356 
1357             /* upgrade */
1358 
1359             case h_matching_upgrade:
1360               parser->index++;
1361               if (parser->index > sizeof(UPGRADE)-1
1362                   || c != UPGRADE[parser->index]) {
1363                 parser->header_state = h_general;
1364               } else if (parser->index == sizeof(UPGRADE)-2) {
1365                 parser->header_state = h_upgrade;
1366               }
1367               break;
1368 
1369             case h_connection:
1370             case h_content_length:
1371             case h_transfer_encoding:
1372             case h_upgrade:
1373               if (ch != ' ') parser->header_state = h_general;
1374               break;
1375 
1376             default:
1377               assert(0 && "Unknown header_state");
1378               break;
1379           }
1380         }
1381 
1382         if (p == data + len) {
1383           --p;
1384           COUNT_HEADER_SIZE(p - start);
1385           break;
1386         }
1387 
1388         COUNT_HEADER_SIZE(p - start);
1389 
1390         if (ch == ':') {
1391           UPDATE_STATE(s_header_value_discard_ws);
1392           CALLBACK_DATA(header_field);
1393           break;
1394         }
1395 
1396         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1397         goto error;
1398       }
1399 
1400       case s_header_value_discard_ws:
1401         if (ch == ' ' || ch == '\t') break;
1402 
1403         if (ch == CR) {
1404           UPDATE_STATE(s_header_value_discard_ws_almost_done);
1405           break;
1406         }
1407 
1408         if (ch == LF) {
1409           UPDATE_STATE(s_header_value_discard_lws);
1410           break;
1411         }
1412 
1413         /* fall through */
1414 
1415       case s_header_value_start:
1416       {
1417         MARK(header_value);
1418 
1419         UPDATE_STATE(s_header_value);
1420         parser->index = 0;
1421 
1422         c = LOWER(ch);
1423 
1424         switch (parser->header_state) {
1425           case h_upgrade:
1426             parser->flags |= F_UPGRADE;
1427             parser->header_state = h_general;
1428             break;
1429 
1430           case h_transfer_encoding:
1431             /* looking for 'Transfer-Encoding: chunked' */
1432             if ('c' == c) {
1433               parser->header_state = h_matching_transfer_encoding_chunked;
1434             } else {
1435               parser->header_state = h_matching_transfer_encoding_token;
1436             }
1437             break;
1438 
1439           /* Multi-value `Transfer-Encoding` header */
1440           case h_matching_transfer_encoding_token_start:
1441             break;
1442 
1443           case h_content_length:
1444             if (UNLIKELY(!IS_NUM(ch))) {
1445               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446               goto error;
1447             }
1448 
1449             if (parser->flags & F_CONTENTLENGTH) {
1450               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1451               goto error;
1452             }
1453 
1454             parser->flags |= F_CONTENTLENGTH;
1455             parser->content_length = ch - '0';
1456             parser->header_state = h_content_length_num;
1457             break;
1458 
1459           /* when obsolete line folding is encountered for content length
1460            * continue to the s_header_value state */
1461           case h_content_length_ws:
1462             break;
1463 
1464           case h_connection:
1465             /* looking for 'Connection: keep-alive' */
1466             if (c == 'k') {
1467               parser->header_state = h_matching_connection_keep_alive;
1468             /* looking for 'Connection: close' */
1469             } else if (c == 'c') {
1470               parser->header_state = h_matching_connection_close;
1471             } else if (c == 'u') {
1472               parser->header_state = h_matching_connection_upgrade;
1473             } else {
1474               parser->header_state = h_matching_connection_token;
1475             }
1476             break;
1477 
1478           /* Multi-value `Connection` header */
1479           case h_matching_connection_token_start:
1480             break;
1481 
1482           default:
1483             parser->header_state = h_general;
1484             break;
1485         }
1486         break;
1487       }
1488 
1489       case s_header_value:
1490       {
1491         const char* start = p;
1492         enum header_states h_state = (enum header_states) parser->header_state;
1493         for (; p != data + len; p++) {
1494           ch = *p;
1495           if (ch == CR) {
1496             UPDATE_STATE(s_header_almost_done);
1497             parser->header_state = h_state;
1498             CALLBACK_DATA(header_value);
1499             break;
1500           }
1501 
1502           if (ch == LF) {
1503             UPDATE_STATE(s_header_almost_done);
1504             COUNT_HEADER_SIZE(p - start);
1505             parser->header_state = h_state;
1506             CALLBACK_DATA_NOADVANCE(header_value);
1507             REEXECUTE();
1508           }
1509 
1510           if (!lenient && !IS_HEADER_CHAR(ch)) {
1511             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1512             goto error;
1513           }
1514 
1515           c = LOWER(ch);
1516 
1517           switch (h_state) {
1518             case h_general:
1519               {
1520                 size_t left = data + len - p;
1521                 const char* pe = p + MIN(left, max_header_size);
1522 
1523                 for (; p != pe; p++) {
1524                   ch = *p;
1525                   if (ch == CR || ch == LF) {
1526                     --p;
1527                     break;
1528                   }
1529                   if (!lenient && !IS_HEADER_CHAR(ch)) {
1530                     SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1531                     goto error;
1532                   }
1533                 }
1534                 if (p == data + len)
1535                   --p;
1536                 break;
1537               }
1538 
1539             case h_connection:
1540             case h_transfer_encoding:
1541               assert(0 && "Shouldn't get here.");
1542               break;
1543 
1544             case h_content_length:
1545               if (ch == ' ') break;
1546               h_state = h_content_length_num;
1547               /* fall through */
1548 
1549             case h_content_length_num:
1550             {
1551               uint64_t t;
1552 
1553               if (ch == ' ') {
1554                 h_state = h_content_length_ws;
1555                 break;
1556               }
1557 
1558               if (UNLIKELY(!IS_NUM(ch))) {
1559                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1560                 parser->header_state = h_state;
1561                 goto error;
1562               }
1563 
1564               t = parser->content_length;
1565               t *= 10;
1566               t += ch - '0';
1567 
1568               /* Overflow? Test against a conservative limit for simplicity. */
1569               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1570                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1571                 parser->header_state = h_state;
1572                 goto error;
1573               }
1574 
1575               parser->content_length = t;
1576               break;
1577             }
1578 
1579             case h_content_length_ws:
1580               if (ch == ' ') break;
1581               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1582               parser->header_state = h_state;
1583               goto error;
1584 
1585             /* Transfer-Encoding: chunked */
1586             case h_matching_transfer_encoding_token_start:
1587               /* looking for 'Transfer-Encoding: chunked' */
1588               if ('c' == c) {
1589                 h_state = h_matching_transfer_encoding_chunked;
1590               } else if (STRICT_TOKEN(c)) {
1591                 /* TODO(indutny): similar code below does this, but why?
1592                  * At the very least it seems to be inconsistent given that
1593                  * h_matching_transfer_encoding_token does not check for
1594                  * `STRICT_TOKEN`
1595                  */
1596                 h_state = h_matching_transfer_encoding_token;
1597               } else if (c == ' ' || c == '\t') {
1598                 /* Skip lws */
1599               } else {
1600                 h_state = h_general;
1601               }
1602               break;
1603 
1604             case h_matching_transfer_encoding_chunked:
1605               parser->index++;
1606               if (parser->index > sizeof(CHUNKED)-1
1607                   || c != CHUNKED[parser->index]) {
1608                 h_state = h_matching_transfer_encoding_token;
1609               } else if (parser->index == sizeof(CHUNKED)-2) {
1610                 h_state = h_transfer_encoding_chunked;
1611               }
1612               break;
1613 
1614             case h_matching_transfer_encoding_token:
1615               if (ch == ',') {
1616                 h_state = h_matching_transfer_encoding_token_start;
1617                 parser->index = 0;
1618               }
1619               break;
1620 
1621             case h_matching_connection_token_start:
1622               /* looking for 'Connection: keep-alive' */
1623               if (c == 'k') {
1624                 h_state = h_matching_connection_keep_alive;
1625               /* looking for 'Connection: close' */
1626               } else if (c == 'c') {
1627                 h_state = h_matching_connection_close;
1628               } else if (c == 'u') {
1629                 h_state = h_matching_connection_upgrade;
1630               } else if (STRICT_TOKEN(c)) {
1631                 h_state = h_matching_connection_token;
1632               } else if (c == ' ' || c == '\t') {
1633                 /* Skip lws */
1634               } else {
1635                 h_state = h_general;
1636               }
1637               break;
1638 
1639             /* looking for 'Connection: keep-alive' */
1640             case h_matching_connection_keep_alive:
1641               parser->index++;
1642               if (parser->index > sizeof(KEEP_ALIVE)-1
1643                   || c != KEEP_ALIVE[parser->index]) {
1644                 h_state = h_matching_connection_token;
1645               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1646                 h_state = h_connection_keep_alive;
1647               }
1648               break;
1649 
1650             /* looking for 'Connection: close' */
1651             case h_matching_connection_close:
1652               parser->index++;
1653               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1654                 h_state = h_matching_connection_token;
1655               } else if (parser->index == sizeof(CLOSE)-2) {
1656                 h_state = h_connection_close;
1657               }
1658               break;
1659 
1660             /* looking for 'Connection: upgrade' */
1661             case h_matching_connection_upgrade:
1662               parser->index++;
1663               if (parser->index > sizeof(UPGRADE) - 1 ||
1664                   c != UPGRADE[parser->index]) {
1665                 h_state = h_matching_connection_token;
1666               } else if (parser->index == sizeof(UPGRADE)-2) {
1667                 h_state = h_connection_upgrade;
1668               }
1669               break;
1670 
1671             case h_matching_connection_token:
1672               if (ch == ',') {
1673                 h_state = h_matching_connection_token_start;
1674                 parser->index = 0;
1675               }
1676               break;
1677 
1678             case h_transfer_encoding_chunked:
1679               if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1680               break;
1681 
1682             case h_connection_keep_alive:
1683             case h_connection_close:
1684             case h_connection_upgrade:
1685               if (ch == ',') {
1686                 if (h_state == h_connection_keep_alive) {
1687                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
1688                 } else if (h_state == h_connection_close) {
1689                   parser->flags |= F_CONNECTION_CLOSE;
1690                 } else if (h_state == h_connection_upgrade) {
1691                   parser->flags |= F_CONNECTION_UPGRADE;
1692                 }
1693                 h_state = h_matching_connection_token_start;
1694                 parser->index = 0;
1695               } else if (ch != ' ') {
1696                 h_state = h_matching_connection_token;
1697               }
1698               break;
1699 
1700             default:
1701               UPDATE_STATE(s_header_value);
1702               h_state = h_general;
1703               break;
1704           }
1705         }
1706         parser->header_state = h_state;
1707 
1708         if (p == data + len)
1709           --p;
1710 
1711         COUNT_HEADER_SIZE(p - start);
1712         break;
1713       }
1714 
1715       case s_header_almost_done:
1716       {
1717         if (UNLIKELY(ch != LF)) {
1718           SET_ERRNO(HPE_LF_EXPECTED);
1719           goto error;
1720         }
1721 
1722         UPDATE_STATE(s_header_value_lws);
1723         break;
1724       }
1725 
1726       case s_header_value_lws:
1727       {
1728         if (ch == ' ' || ch == '\t') {
1729           if (parser->header_state == h_content_length_num) {
1730               /* treat obsolete line folding as space */
1731               parser->header_state = h_content_length_ws;
1732           }
1733           UPDATE_STATE(s_header_value_start);
1734           REEXECUTE();
1735         }
1736 
1737         /* finished the header */
1738         switch (parser->header_state) {
1739           case h_connection_keep_alive:
1740             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1741             break;
1742           case h_connection_close:
1743             parser->flags |= F_CONNECTION_CLOSE;
1744             break;
1745           case h_transfer_encoding_chunked:
1746             parser->flags |= F_CHUNKED;
1747             break;
1748           case h_connection_upgrade:
1749             parser->flags |= F_CONNECTION_UPGRADE;
1750             break;
1751           default:
1752             break;
1753         }
1754 
1755         UPDATE_STATE(s_header_field_start);
1756         REEXECUTE();
1757       }
1758 
1759       case s_header_value_discard_ws_almost_done:
1760       {
1761         STRICT_CHECK(ch != LF);
1762         UPDATE_STATE(s_header_value_discard_lws);
1763         break;
1764       }
1765 
1766       case s_header_value_discard_lws:
1767       {
1768         if (ch == ' ' || ch == '\t') {
1769           UPDATE_STATE(s_header_value_discard_ws);
1770           break;
1771         } else {
1772           switch (parser->header_state) {
1773             case h_connection_keep_alive:
1774               parser->flags |= F_CONNECTION_KEEP_ALIVE;
1775               break;
1776             case h_connection_close:
1777               parser->flags |= F_CONNECTION_CLOSE;
1778               break;
1779             case h_connection_upgrade:
1780               parser->flags |= F_CONNECTION_UPGRADE;
1781               break;
1782             case h_transfer_encoding_chunked:
1783               parser->flags |= F_CHUNKED;
1784               break;
1785             case h_content_length:
1786               /* do not allow empty content length */
1787               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788               goto error;
1789               break;
1790             default:
1791               break;
1792           }
1793 
1794           /* header value was empty */
1795           MARK(header_value);
1796           UPDATE_STATE(s_header_field_start);
1797           CALLBACK_DATA_NOADVANCE(header_value);
1798           REEXECUTE();
1799         }
1800       }
1801 
1802       case s_headers_almost_done:
1803       {
1804         STRICT_CHECK(ch != LF);
1805 
1806         if (parser->flags & F_TRAILING) {
1807           /* End of a chunked request */
1808           UPDATE_STATE(s_message_done);
1809           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1810           REEXECUTE();
1811         }
1812 
1813         /* Cannot use transfer-encoding and a content-length header together
1814            per the HTTP specification. (RFC 7230 Section 3.3.3) */
1815         if ((parser->uses_transfer_encoding == 1) &&
1816             (parser->flags & F_CONTENTLENGTH)) {
1817           /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1818            * not `chunked` or allow_length_with_encoding is set
1819            */
1820           if (parser->flags & F_CHUNKED) {
1821             if (!allow_chunked_length) {
1822               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1823               goto error;
1824             }
1825           } else if (!lenient) {
1826             SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1827             goto error;
1828           }
1829         }
1830 
1831         UPDATE_STATE(s_headers_done);
1832 
1833         /* Set this here so that on_headers_complete() callbacks can see it */
1834         if ((parser->flags & F_UPGRADE) &&
1835             (parser->flags & F_CONNECTION_UPGRADE)) {
1836           /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1837            * mandatory only when it is a 101 Switching Protocols response,
1838            * otherwise it is purely informational, to announce support.
1839            */
1840           parser->upgrade =
1841               (parser->type == HTTP_REQUEST || parser->status_code == 101);
1842         } else {
1843           parser->upgrade = (parser->method == HTTP_CONNECT);
1844         }
1845 
1846         /* Here we call the headers_complete callback. This is somewhat
1847          * different than other callbacks because if the user returns 1, we
1848          * will interpret that as saying that this message has no body. This
1849          * is needed for the annoying case of recieving a response to a HEAD
1850          * request.
1851          *
1852          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1853          * we have to simulate it by handling a change in errno below.
1854          */
1855         if (settings->on_headers_complete) {
1856           switch (settings->on_headers_complete(parser)) {
1857             case 0:
1858               break;
1859 
1860             case 2:
1861               parser->upgrade = 1;
1862 
1863               /* fall through */
1864             case 1:
1865               parser->flags |= F_SKIPBODY;
1866               break;
1867 
1868             default:
1869               SET_ERRNO(HPE_CB_headers_complete);
1870               RETURN(p - data); /* Error */
1871           }
1872         }
1873 
1874         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1875           RETURN(p - data);
1876         }
1877 
1878         REEXECUTE();
1879       }
1880 
1881       case s_headers_done:
1882       {
1883         int hasBody;
1884         STRICT_CHECK(ch != LF);
1885 
1886         parser->nread = 0;
1887         nread = 0;
1888 
1889         hasBody = parser->flags & F_CHUNKED ||
1890           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1891         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1892                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
1893           /* Exit, the rest of the message is in a different protocol. */
1894           UPDATE_STATE(NEW_MESSAGE());
1895           CALLBACK_NOTIFY(message_complete);
1896           RETURN((p - data) + 1);
1897         }
1898 
1899         if (parser->flags & F_SKIPBODY) {
1900           UPDATE_STATE(NEW_MESSAGE());
1901           CALLBACK_NOTIFY(message_complete);
1902         } else if (parser->flags & F_CHUNKED) {
1903           /* chunked encoding - ignore Content-Length header,
1904            * prepare for a chunk */
1905           UPDATE_STATE(s_chunk_size_start);
1906         } else if (parser->uses_transfer_encoding == 1) {
1907           if (parser->type == HTTP_REQUEST && !lenient) {
1908             /* RFC 7230 3.3.3 */
1909 
1910             /* If a Transfer-Encoding header field
1911              * is present in a request and the chunked transfer coding is not
1912              * the final encoding, the message body length cannot be determined
1913              * reliably; the server MUST respond with the 400 (Bad Request)
1914              * status code and then close the connection.
1915              */
1916             SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1917             RETURN(p - data); /* Error */
1918           } else {
1919             /* RFC 7230 3.3.3 */
1920 
1921             /* If a Transfer-Encoding header field is present in a response and
1922              * the chunked transfer coding is not the final encoding, the
1923              * message body length is determined by reading the connection until
1924              * it is closed by the server.
1925              */
1926             UPDATE_STATE(s_body_identity_eof);
1927           }
1928         } else {
1929           if (parser->content_length == 0) {
1930             /* Content-Length header given but zero: Content-Length: 0\r\n */
1931             UPDATE_STATE(NEW_MESSAGE());
1932             CALLBACK_NOTIFY(message_complete);
1933           } else if (parser->content_length != ULLONG_MAX) {
1934             /* Content-Length header given and non-zero */
1935             UPDATE_STATE(s_body_identity);
1936           } else {
1937             if (!http_message_needs_eof(parser)) {
1938               /* Assume content-length 0 - read the next */
1939               UPDATE_STATE(NEW_MESSAGE());
1940               CALLBACK_NOTIFY(message_complete);
1941             } else {
1942               /* Read body until EOF */
1943               UPDATE_STATE(s_body_identity_eof);
1944             }
1945           }
1946         }
1947 
1948         break;
1949       }
1950 
1951       case s_body_identity:
1952       {
1953         uint64_t to_read = MIN(parser->content_length,
1954                                (uint64_t) ((data + len) - p));
1955 
1956         assert(parser->content_length != 0
1957             && parser->content_length != ULLONG_MAX);
1958 
1959         /* The difference between advancing content_length and p is because
1960          * the latter will automaticaly advance on the next loop iteration.
1961          * Further, if content_length ends up at 0, we want to see the last
1962          * byte again for our message complete callback.
1963          */
1964         MARK(body);
1965         parser->content_length -= to_read;
1966         p += to_read - 1;
1967 
1968         if (parser->content_length == 0) {
1969           UPDATE_STATE(s_message_done);
1970 
1971           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1972            *
1973            * The alternative to doing this is to wait for the next byte to
1974            * trigger the data callback, just as in every other case. The
1975            * problem with this is that this makes it difficult for the test
1976            * harness to distinguish between complete-on-EOF and
1977            * complete-on-length. It's not clear that this distinction is
1978            * important for applications, but let's keep it for now.
1979            */
1980           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1981           REEXECUTE();
1982         }
1983 
1984         break;
1985       }
1986 
1987       /* read until EOF */
1988       case s_body_identity_eof:
1989         MARK(body);
1990         p = data + len - 1;
1991 
1992         break;
1993 
1994       case s_message_done:
1995         UPDATE_STATE(NEW_MESSAGE());
1996         CALLBACK_NOTIFY(message_complete);
1997         if (parser->upgrade) {
1998           /* Exit, the rest of the message is in a different protocol. */
1999           RETURN((p - data) + 1);
2000         }
2001         break;
2002 
2003       case s_chunk_size_start:
2004       {
2005         assert(nread == 1);
2006         assert(parser->flags & F_CHUNKED);
2007 
2008         unhex_val = unhex[(unsigned char)ch];
2009         if (UNLIKELY(unhex_val == -1)) {
2010           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2011           goto error;
2012         }
2013 
2014         parser->content_length = unhex_val;
2015         UPDATE_STATE(s_chunk_size);
2016         break;
2017       }
2018 
2019       case s_chunk_size:
2020       {
2021         uint64_t t;
2022 
2023         assert(parser->flags & F_CHUNKED);
2024 
2025         if (ch == CR) {
2026           UPDATE_STATE(s_chunk_size_almost_done);
2027           break;
2028         }
2029 
2030         unhex_val = unhex[(unsigned char)ch];
2031 
2032         if (unhex_val == -1) {
2033           if (ch == ';' || ch == ' ') {
2034             UPDATE_STATE(s_chunk_parameters);
2035             break;
2036           }
2037 
2038           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2039           goto error;
2040         }
2041 
2042         t = parser->content_length;
2043         t *= 16;
2044         t += unhex_val;
2045 
2046         /* Overflow? Test against a conservative limit for simplicity. */
2047         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2048           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2049           goto error;
2050         }
2051 
2052         parser->content_length = t;
2053         break;
2054       }
2055 
2056       case s_chunk_parameters:
2057       {
2058         assert(parser->flags & F_CHUNKED);
2059         /* just ignore this shit. TODO check for overflow */
2060         if (ch == CR) {
2061           UPDATE_STATE(s_chunk_size_almost_done);
2062           break;
2063         }
2064         break;
2065       }
2066 
2067       case s_chunk_size_almost_done:
2068       {
2069         assert(parser->flags & F_CHUNKED);
2070         STRICT_CHECK(ch != LF);
2071 
2072         parser->nread = 0;
2073         nread = 0;
2074 
2075         if (parser->content_length == 0) {
2076           parser->flags |= F_TRAILING;
2077           UPDATE_STATE(s_header_field_start);
2078         } else {
2079           UPDATE_STATE(s_chunk_data);
2080         }
2081         CALLBACK_NOTIFY(chunk_header);
2082         break;
2083       }
2084 
2085       case s_chunk_data:
2086       {
2087         uint64_t to_read = MIN(parser->content_length,
2088                                (uint64_t) ((data + len) - p));
2089 
2090         assert(parser->flags & F_CHUNKED);
2091         assert(parser->content_length != 0
2092             && parser->content_length != ULLONG_MAX);
2093 
2094         /* See the explanation in s_body_identity for why the content
2095          * length and data pointers are managed this way.
2096          */
2097         MARK(body);
2098         parser->content_length -= to_read;
2099         p += to_read - 1;
2100 
2101         if (parser->content_length == 0) {
2102           UPDATE_STATE(s_chunk_data_almost_done);
2103         }
2104 
2105         break;
2106       }
2107 
2108       case s_chunk_data_almost_done:
2109         assert(parser->flags & F_CHUNKED);
2110         assert(parser->content_length == 0);
2111         STRICT_CHECK(ch != CR);
2112         UPDATE_STATE(s_chunk_data_done);
2113         CALLBACK_DATA(body);
2114         break;
2115 
2116       case s_chunk_data_done:
2117         assert(parser->flags & F_CHUNKED);
2118         STRICT_CHECK(ch != LF);
2119         parser->nread = 0;
2120         nread = 0;
2121         UPDATE_STATE(s_chunk_size_start);
2122         CALLBACK_NOTIFY(chunk_complete);
2123         break;
2124 
2125       default:
2126         assert(0 && "unhandled state");
2127         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2128         goto error;
2129     }
2130   }
2131 
2132   /* Run callbacks for any marks that we have leftover after we ran out of
2133    * bytes. There should be at most one of these set, so it's OK to invoke
2134    * them in series (unset marks will not result in callbacks).
2135    *
2136    * We use the NOADVANCE() variety of callbacks here because 'p' has already
2137    * overflowed 'data' and this allows us to correct for the off-by-one that
2138    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2139    * value that's in-bounds).
2140    */
2141 
2142   assert(((header_field_mark ? 1 : 0) +
2143           (header_value_mark ? 1 : 0) +
2144           (url_mark ? 1 : 0)  +
2145           (body_mark ? 1 : 0) +
2146           (status_mark ? 1 : 0)) <= 1);
2147 
2148   CALLBACK_DATA_NOADVANCE(header_field);
2149   CALLBACK_DATA_NOADVANCE(header_value);
2150   CALLBACK_DATA_NOADVANCE(url);
2151   CALLBACK_DATA_NOADVANCE(body);
2152   CALLBACK_DATA_NOADVANCE(status);
2153 
2154   RETURN(len);
2155 
2156 error:
2157   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2158     SET_ERRNO(HPE_UNKNOWN);
2159   }
2160 
2161   RETURN(p - data);
2162 }
2163 
2164 
2165 /* Does the parser need to see an EOF to find the end of the message? */
2166 int
http_message_needs_eof(const http_parser * parser)2167 http_message_needs_eof (const http_parser *parser)
2168 {
2169   if (parser->type == HTTP_REQUEST) {
2170     return 0;
2171   }
2172 
2173   /* See RFC 2616 section 4.4 */
2174   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2175       parser->status_code == 204 ||     /* No Content */
2176       parser->status_code == 304 ||     /* Not Modified */
2177       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2178     return 0;
2179   }
2180 
2181   /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2182   if ((parser->uses_transfer_encoding == 1) &&
2183       (parser->flags & F_CHUNKED) == 0) {
2184     return 1;
2185   }
2186 
2187   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2188     return 0;
2189   }
2190 
2191   return 1;
2192 }
2193 
2194 
2195 int
http_should_keep_alive(const http_parser * parser)2196 http_should_keep_alive (const http_parser *parser)
2197 {
2198   if (parser->http_major > 0 && parser->http_minor > 0) {
2199     /* HTTP/1.1 */
2200     if (parser->flags & F_CONNECTION_CLOSE) {
2201       return 0;
2202     }
2203   } else {
2204     /* HTTP/1.0 or earlier */
2205     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2206       return 0;
2207     }
2208   }
2209 
2210   return !http_message_needs_eof(parser);
2211 }
2212 
2213 
2214 const char *
http_method_str(enum http_method m)2215 http_method_str (enum http_method m)
2216 {
2217   return ELEM_AT(method_strings, m, "<unknown>");
2218 }
2219 
2220 const char *
http_status_str(enum http_status s)2221 http_status_str (enum http_status s)
2222 {
2223   switch (s) {
2224 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2225     HTTP_STATUS_MAP(XX)
2226 #undef XX
2227     default: return "<unknown>";
2228   }
2229 }
2230 
2231 void
http_parser_init(http_parser * parser,enum http_parser_type t)2232 http_parser_init (http_parser *parser, enum http_parser_type t)
2233 {
2234   void *data = parser->data; /* preserve application data */
2235   memset(parser, 0, sizeof(*parser));
2236   parser->data = data;
2237   parser->type = t;
2238   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2239   parser->http_errno = HPE_OK;
2240 }
2241 
2242 void
http_parser_settings_init(http_parser_settings * settings)2243 http_parser_settings_init(http_parser_settings *settings)
2244 {
2245   memset(settings, 0, sizeof(*settings));
2246 }
2247 
2248 const char *
http_errno_name(enum http_errno err)2249 http_errno_name(enum http_errno err) {
2250   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2251   return http_strerror_tab[err].name;
2252 }
2253 
2254 const char *
http_errno_description(enum http_errno err)2255 http_errno_description(enum http_errno err) {
2256   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2257   return http_strerror_tab[err].description;
2258 }
2259 
2260 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2261 http_parse_host_char(enum http_host_state s, const char ch) {
2262   switch(s) {
2263     case s_http_userinfo:
2264     case s_http_userinfo_start:
2265       if (ch == '@') {
2266         return s_http_host_start;
2267       }
2268 
2269       if (IS_USERINFO_CHAR(ch)) {
2270         return s_http_userinfo;
2271       }
2272       break;
2273 
2274     case s_http_host_start:
2275       if (ch == '[') {
2276         return s_http_host_v6_start;
2277       }
2278 
2279       if (IS_HOST_CHAR(ch)) {
2280         return s_http_host;
2281       }
2282 
2283       break;
2284 
2285     case s_http_host:
2286       if (IS_HOST_CHAR(ch)) {
2287         return s_http_host;
2288       }
2289 
2290     /* fall through */
2291     case s_http_host_v6_end:
2292       if (ch == ':') {
2293         return s_http_host_port_start;
2294       }
2295 
2296       break;
2297 
2298     case s_http_host_v6:
2299       if (ch == ']') {
2300         return s_http_host_v6_end;
2301       }
2302 
2303     /* fall through */
2304     case s_http_host_v6_start:
2305       if (IS_HEX(ch) || ch == ':' || ch == '.') {
2306         return s_http_host_v6;
2307       }
2308 
2309       if (s == s_http_host_v6 && ch == '%') {
2310         return s_http_host_v6_zone_start;
2311       }
2312       break;
2313 
2314     case s_http_host_v6_zone:
2315       if (ch == ']') {
2316         return s_http_host_v6_end;
2317       }
2318 
2319     /* fall through */
2320     case s_http_host_v6_zone_start:
2321       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2322       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2323           ch == '~') {
2324         return s_http_host_v6_zone;
2325       }
2326       break;
2327 
2328     case s_http_host_port:
2329     case s_http_host_port_start:
2330       if (IS_NUM(ch)) {
2331         return s_http_host_port;
2332       }
2333 
2334       break;
2335 
2336     default:
2337       break;
2338   }
2339   return s_http_host_dead;
2340 }
2341 
2342 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2343 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2344   enum http_host_state s;
2345 
2346   const char *p;
2347   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2348 
2349   assert(u->field_set & (1 << UF_HOST));
2350 
2351   u->field_data[UF_HOST].len = 0;
2352 
2353   s = found_at ? s_http_userinfo_start : s_http_host_start;
2354 
2355   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2356     enum http_host_state new_s = http_parse_host_char(s, *p);
2357 
2358     if (new_s == s_http_host_dead) {
2359       return 1;
2360     }
2361 
2362     switch(new_s) {
2363       case s_http_host:
2364         if (s != s_http_host) {
2365           u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2366         }
2367         u->field_data[UF_HOST].len++;
2368         break;
2369 
2370       case s_http_host_v6:
2371         if (s != s_http_host_v6) {
2372           u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2373         }
2374         u->field_data[UF_HOST].len++;
2375         break;
2376 
2377       case s_http_host_v6_zone_start:
2378       case s_http_host_v6_zone:
2379         u->field_data[UF_HOST].len++;
2380         break;
2381 
2382       case s_http_host_port:
2383         if (s != s_http_host_port) {
2384           u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2385           u->field_data[UF_PORT].len = 0;
2386           u->field_set |= (1 << UF_PORT);
2387         }
2388         u->field_data[UF_PORT].len++;
2389         break;
2390 
2391       case s_http_userinfo:
2392         if (s != s_http_userinfo) {
2393           u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2394           u->field_data[UF_USERINFO].len = 0;
2395           u->field_set |= (1 << UF_USERINFO);
2396         }
2397         u->field_data[UF_USERINFO].len++;
2398         break;
2399 
2400       default:
2401         break;
2402     }
2403     s = new_s;
2404   }
2405 
2406   /* Make sure we don't end somewhere unexpected */
2407   switch (s) {
2408     case s_http_host_start:
2409     case s_http_host_v6_start:
2410     case s_http_host_v6:
2411     case s_http_host_v6_zone_start:
2412     case s_http_host_v6_zone:
2413     case s_http_host_port_start:
2414     case s_http_userinfo:
2415     case s_http_userinfo_start:
2416       return 1;
2417     default:
2418       break;
2419   }
2420 
2421   return 0;
2422 }
2423 
2424 void
http_parser_url_init(struct http_parser_url * u)2425 http_parser_url_init(struct http_parser_url *u) {
2426   memset(u, 0, sizeof(*u));
2427 }
2428 
2429 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2430 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2431                       struct http_parser_url *u)
2432 {
2433   enum state s;
2434   const char *p;
2435   enum http_parser_url_fields uf, old_uf;
2436   int found_at = 0;
2437 
2438   if (buflen == 0) {
2439     return 1;
2440   }
2441 
2442   u->port = u->field_set = 0;
2443   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2444   old_uf = UF_MAX;
2445 
2446   for (p = buf; p < buf + buflen; p++) {
2447     s = parse_url_char(s, *p);
2448 
2449     /* Figure out the next field that we're operating on */
2450     switch (s) {
2451       case s_dead:
2452         return 1;
2453 
2454       /* Skip delimeters */
2455       case s_req_schema_slash:
2456       case s_req_schema_slash_slash:
2457       case s_req_server_start:
2458       case s_req_query_string_start:
2459       case s_req_fragment_start:
2460         continue;
2461 
2462       case s_req_schema:
2463         uf = UF_SCHEMA;
2464         break;
2465 
2466       case s_req_server_with_at:
2467         found_at = 1;
2468 
2469       /* fall through */
2470       case s_req_server:
2471         uf = UF_HOST;
2472         break;
2473 
2474       case s_req_path:
2475         uf = UF_PATH;
2476         break;
2477 
2478       case s_req_query_string:
2479         uf = UF_QUERY;
2480         break;
2481 
2482       case s_req_fragment:
2483         uf = UF_FRAGMENT;
2484         break;
2485 
2486       default:
2487         assert(!"Unexpected state");
2488         return 1;
2489     }
2490 
2491     /* Nothing's changed; soldier on */
2492     if (uf == old_uf) {
2493       u->field_data[uf].len++;
2494       continue;
2495     }
2496 
2497     u->field_data[uf].off = (uint16_t)(p - buf);
2498     u->field_data[uf].len = 1;
2499 
2500     u->field_set |= (1 << uf);
2501     old_uf = uf;
2502   }
2503 
2504   /* host must be present if there is a schema */
2505   /* parsing http:///toto will fail */
2506   if ((u->field_set & (1 << UF_SCHEMA)) &&
2507       (u->field_set & (1 << UF_HOST)) == 0) {
2508     return 1;
2509   }
2510 
2511   if (u->field_set & (1 << UF_HOST)) {
2512     if (http_parse_host(buf, u, found_at) != 0) {
2513       return 1;
2514     }
2515   }
2516 
2517   /* CONNECT requests can only contain "hostname:port" */
2518   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2519     return 1;
2520   }
2521 
2522   if (u->field_set & (1 << UF_PORT)) {
2523     uint16_t off;
2524     uint16_t len;
2525     const char* p;
2526     const char* end;
2527     unsigned long v;
2528 
2529     off = u->field_data[UF_PORT].off;
2530     len = u->field_data[UF_PORT].len;
2531     end = buf + off + len;
2532 
2533     /* NOTE: The characters are already validated and are in the [0-9] range */
2534     assert((size_t) (off + len) <= buflen && "Port number overflow");
2535     v = 0;
2536     for (p = buf + off; p < end; p++) {
2537       v *= 10;
2538       v += *p - '0';
2539 
2540       /* Ports have a max value of 2^16 */
2541       if (v > 0xffff) {
2542         return 1;
2543       }
2544     }
2545 
2546     u->port = (uint16_t) v;
2547   }
2548 
2549   return 0;
2550 }
2551 
2552 void
http_parser_pause(http_parser * parser,int paused)2553 http_parser_pause(http_parser *parser, int paused) {
2554   /* Users should only be pausing/unpausing a parser that is not in an error
2555    * state. In non-debug builds, there's not much that we can do about this
2556    * other than ignore it.
2557    */
2558   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2559       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2560     uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2561     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2562   } else {
2563     assert(0 && "Attempting to pause parser in error state");
2564   }
2565 }
2566 
2567 int
http_body_is_final(const struct http_parser * parser)2568 http_body_is_final(const struct http_parser *parser) {
2569     return parser->state == s_message_done;
2570 }
2571 
2572 unsigned long
http_parser_version(void)2573 http_parser_version(void) {
2574   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2575          HTTP_PARSER_VERSION_MINOR * 0x00100 |
2576          HTTP_PARSER_VERSION_PATCH * 0x00001;
2577 }
2578 
2579 void
http_parser_set_max_header_size(uint32_t size)2580 http_parser_set_max_header_size(uint32_t size) {
2581   max_header_size = size;
2582 }
2583