• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2  *
3  * Additional changes are licensed under the same terms as NGINX and
4  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31 
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35 
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39 
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43 
44 #ifndef BIT_AT
45 # define BIT_AT(a, i)                                                \
46   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
47    (1 << ((unsigned int) (i) & 7))))
48 #endif
49 
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53 
54 #define SET_ERRNO(e)                                                 \
55 do {                                                                 \
56   parser->http_errno = (e);                                          \
57 } while(0)
58 
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
61 #define RETURN(V)                                                    \
62 do {                                                                 \
63   parser->state = CURRENT_STATE();                                   \
64   return (V);                                                        \
65 } while (0);
66 #define REEXECUTE()                                                  \
67   goto reexecute;                                                    \
68 
69 
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77 
78 
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
81 do {                                                                 \
82   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83                                                                      \
84   if (LIKELY(settings->on_##FOR)) {                                  \
85     parser->state = CURRENT_STATE();                                 \
86     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87       SET_ERRNO(HPE_CB_##FOR);                                       \
88     }                                                                \
89     UPDATE_STATE(parser->state);                                     \
90                                                                      \
91     /* We either errored above or got paused; get out */             \
92     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93       return (ER);                                                   \
94     }                                                                \
95   }                                                                  \
96 } while (0)
97 
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100 
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103 
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106 do {                                                                 \
107   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108                                                                      \
109   if (FOR##_mark) {                                                  \
110     if (LIKELY(settings->on_##FOR)) {                                \
111       parser->state = CURRENT_STATE();                               \
112       if (UNLIKELY(0 !=                                              \
113                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114         SET_ERRNO(HPE_CB_##FOR);                                     \
115       }                                                              \
116       UPDATE_STATE(parser->state);                                   \
117                                                                      \
118       /* We either errored above or got paused; get out */           \
119       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120         return (ER);                                                 \
121       }                                                              \
122     }                                                                \
123     FOR##_mark = NULL;                                               \
124   }                                                                  \
125 } while (0)
126 
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR)                                           \
129     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130 
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134 
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR)                                                    \
137 do {                                                                 \
138   if (!FOR##_mark) {                                                 \
139     FOR##_mark = p;                                                  \
140   }                                                                  \
141 } while (0)
142 
143 /* Don't allow the total size of the HTTP headers (including the status
144  * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
145  * embedders against denial-of-service attacks where the attacker feeds
146  * us a never-ending header that the embedder keeps buffering.
147  *
148  * This check is arguably the responsibility of embedders but we're doing
149  * it on the embedder's behalf because most won't bother and this way we
150  * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
151  * than any reasonable request or response so this should never affect
152  * day-to-day operation.
153  */
154 #define COUNT_HEADER_SIZE(V)                                         \
155 do {                                                                 \
156   parser->nread += (V);                                              \
157   if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) {            \
158     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159     goto error;                                                      \
160   }                                                                  \
161 } while (0)
162 
163 
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172 
173 
174 static const char *method_strings[] =
175   {
176 #define XX(num, name, string) #string,
177   HTTP_METHOD_MAP(XX)
178 #undef XX
179   };
180 
181 
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183  *        token       = 1*<any CHAR except CTLs or separators>
184  *     separators     = "(" | ")" | "<" | ">" | "@"
185  *                    | "," | ";" | ":" | "\" | <">
186  *                    | "/" | "[" | "]" | "?" | "="
187  *                    | "{" | "}" | SP | HT
188  */
189 static const char tokens[256] = {
190 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191         0,       0,       0,       0,       0,       0,       0,       0,
192 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193         0,       0,       0,       0,       0,       0,       0,       0,
194 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195         0,       0,       0,       0,       0,       0,       0,       0,
196 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197         0,       0,       0,       0,       0,       0,       0,       0,
198 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
200 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201         0,       0,      '*',     '+',      0,      '-',     '.',      0,
202 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205        '8',     '9',      0,       0,       0,       0,       0,       0,
206 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222 
223 
224 static const int8_t unhex[256] =
225   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233   };
234 
235 
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241 
242 
243 static const uint8_t normal_url_char[32] = {
244 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276 
277 #undef T
278 
279 enum state
280   { s_dead = 1 /* important that this is > 0 */
281 
282   , s_start_req_or_res
283   , s_res_or_resp_H
284   , s_start_res
285   , s_res_H
286   , s_res_HT
287   , s_res_HTT
288   , s_res_HTTP
289   , s_res_first_http_major
290   , s_res_http_major
291   , s_res_first_http_minor
292   , s_res_http_minor
293   , s_res_first_status_code
294   , s_res_status_code
295   , s_res_status_start
296   , s_res_status
297   , s_res_line_almost_done
298 
299   , s_start_req
300 
301   , s_req_method
302   , s_req_spaces_before_url
303   , s_req_schema
304   , s_req_schema_slash
305   , s_req_schema_slash_slash
306   , s_req_server_start
307   , s_req_server
308   , s_req_server_with_at
309   , s_req_path
310   , s_req_query_string_start
311   , s_req_query_string
312   , s_req_fragment_start
313   , s_req_fragment
314   , s_req_http_start
315   , s_req_http_H
316   , s_req_http_HT
317   , s_req_http_HTT
318   , s_req_http_HTTP
319   , s_req_first_http_major
320   , s_req_http_major
321   , s_req_first_http_minor
322   , s_req_http_minor
323   , s_req_line_almost_done
324 
325   , s_header_field_start
326   , s_header_field
327   , s_header_value_discard_ws
328   , s_header_value_discard_ws_almost_done
329   , s_header_value_discard_lws
330   , s_header_value_start
331   , s_header_value
332   , s_header_value_lws
333 
334   , s_header_almost_done
335 
336   , s_chunk_size_start
337   , s_chunk_size
338   , s_chunk_parameters
339   , s_chunk_size_almost_done
340 
341   , s_headers_almost_done
342   , s_headers_done
343 
344   /* Important: 's_headers_done' must be the last 'header' state. All
345    * states beyond this must be 'body' states. It is used for overflow
346    * checking. See the PARSING_HEADER() macro.
347    */
348 
349   , s_chunk_data
350   , s_chunk_data_almost_done
351   , s_chunk_data_done
352 
353   , s_body_identity
354   , s_body_identity_eof
355 
356   , s_message_done
357   };
358 
359 
360 #define PARSING_HEADER(state) (state <= s_headers_done)
361 
362 
363 enum header_states
364   { h_general = 0
365   , h_C
366   , h_CO
367   , h_CON
368 
369   , h_matching_connection
370   , h_matching_proxy_connection
371   , h_matching_content_length
372   , h_matching_transfer_encoding
373   , h_matching_upgrade
374 
375   , h_connection
376   , h_content_length
377   , h_transfer_encoding
378   , h_upgrade
379 
380   , h_matching_transfer_encoding_chunked
381   , h_matching_connection_token_start
382   , h_matching_connection_keep_alive
383   , h_matching_connection_close
384   , h_matching_connection_upgrade
385   , h_matching_connection_token
386 
387   , h_transfer_encoding_chunked
388   , h_connection_keep_alive
389   , h_connection_close
390   , h_connection_upgrade
391   };
392 
393 enum http_host_state
394   {
395     s_http_host_dead = 1
396   , s_http_userinfo_start
397   , s_http_userinfo
398   , s_http_host_start
399   , s_http_host_v6_start
400   , s_http_host
401   , s_http_host_v6
402   , s_http_host_v6_end
403   , s_http_host_v6_zone_start
404   , s_http_host_v6_zone
405   , s_http_host_port_start
406   , s_http_host_port
407 };
408 
409 /* Macros for character classes; depends on strict-mode  */
410 #define CR                  '\r'
411 #define LF                  '\n'
412 #define LOWER(c)            (unsigned char)(c | 0x20)
413 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
415 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
416 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
418   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
419   (c) == ')')
420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422   (c) == '$' || (c) == ',')
423 
424 #define STRICT_TOKEN(c)     (tokens[(unsigned char)c])
425 
426 #if HTTP_PARSER_STRICT
427 #define TOKEN(c)            (tokens[(unsigned char)c])
428 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
429 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
430 #else
431 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432 #define IS_URL_CHAR(c)                                                         \
433   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434 #define IS_HOST_CHAR(c)                                                        \
435   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
436 #endif
437 
438 /**
439  * Verify that a char is a valid visible (printable) US-ASCII
440  * character or %x80-FF
441  **/
442 #define IS_HEADER_CHAR(ch)                                                     \
443   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
444 
445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
446 
447 
448 #if HTTP_PARSER_STRICT
449 # define STRICT_CHECK(cond)                                          \
450 do {                                                                 \
451   if (cond) {                                                        \
452     SET_ERRNO(HPE_STRICT);                                           \
453     goto error;                                                      \
454   }                                                                  \
455 } while (0)
456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
457 #else
458 # define STRICT_CHECK(cond)
459 # define NEW_MESSAGE() start_state
460 #endif
461 
462 
463 /* Map errno values to strings for human-readable output */
464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
465 static struct {
466   const char *name;
467   const char *description;
468 } http_strerror_tab[] = {
469   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
470 };
471 #undef HTTP_STRERROR_GEN
472 
473 int http_message_needs_eof(const http_parser *parser);
474 
475 /* Our URL parser.
476  *
477  * This is designed to be shared by http_parser_execute() for URL validation,
478  * hence it has a state transition + byte-for-byte interface. In addition, it
479  * is meant to be embedded in http_parser_parse_url(), which does the dirty
480  * work of turning state transitions URL components for its API.
481  *
482  * This function should only be invoked with non-space characters. It is
483  * assumed that the caller cares about (and can detect) the transition between
484  * URL and non-URL states by looking for these.
485  */
486 static enum state
parse_url_char(enum state s,const char ch)487 parse_url_char(enum state s, const char ch)
488 {
489   if (ch == ' ' || ch == '\r' || ch == '\n') {
490     return s_dead;
491   }
492 
493 #if HTTP_PARSER_STRICT
494   if (ch == '\t' || ch == '\f') {
495     return s_dead;
496   }
497 #endif
498 
499   switch (s) {
500     case s_req_spaces_before_url:
501       /* Proxied requests are followed by scheme of an absolute URI (alpha).
502        * All methods except CONNECT are followed by '/' or '*'.
503        */
504 
505       if (ch == '/' || ch == '*') {
506         return s_req_path;
507       }
508 
509       if (IS_ALPHA(ch)) {
510         return s_req_schema;
511       }
512 
513       break;
514 
515     case s_req_schema:
516       if (IS_ALPHA(ch)) {
517         return s;
518       }
519 
520       if (ch == ':') {
521         return s_req_schema_slash;
522       }
523 
524       break;
525 
526     case s_req_schema_slash:
527       if (ch == '/') {
528         return s_req_schema_slash_slash;
529       }
530 
531       break;
532 
533     case s_req_schema_slash_slash:
534       if (ch == '/') {
535         return s_req_server_start;
536       }
537 
538       break;
539 
540     case s_req_server_with_at:
541       if (ch == '@') {
542         return s_dead;
543       }
544 
545     /* FALLTHROUGH */
546     case s_req_server_start:
547     case s_req_server:
548       if (ch == '/') {
549         return s_req_path;
550       }
551 
552       if (ch == '?') {
553         return s_req_query_string_start;
554       }
555 
556       if (ch == '@') {
557         return s_req_server_with_at;
558       }
559 
560       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
561         return s_req_server;
562       }
563 
564       break;
565 
566     case s_req_path:
567       if (IS_URL_CHAR(ch)) {
568         return s;
569       }
570 
571       switch (ch) {
572         case '?':
573           return s_req_query_string_start;
574 
575         case '#':
576           return s_req_fragment_start;
577       }
578 
579       break;
580 
581     case s_req_query_string_start:
582     case s_req_query_string:
583       if (IS_URL_CHAR(ch)) {
584         return s_req_query_string;
585       }
586 
587       switch (ch) {
588         case '?':
589           /* allow extra '?' in query string */
590           return s_req_query_string;
591 
592         case '#':
593           return s_req_fragment_start;
594       }
595 
596       break;
597 
598     case s_req_fragment_start:
599       if (IS_URL_CHAR(ch)) {
600         return s_req_fragment;
601       }
602 
603       switch (ch) {
604         case '?':
605           return s_req_fragment;
606 
607         case '#':
608           return s;
609       }
610 
611       break;
612 
613     case s_req_fragment:
614       if (IS_URL_CHAR(ch)) {
615         return s;
616       }
617 
618       switch (ch) {
619         case '?':
620         case '#':
621           return s;
622       }
623 
624       break;
625 
626     default:
627       break;
628   }
629 
630   /* We should never fall out of the switch above unless there's an error */
631   return s_dead;
632 }
633 
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)634 size_t http_parser_execute (http_parser *parser,
635                             const http_parser_settings *settings,
636                             const char *data,
637                             size_t len)
638 {
639   char c, ch;
640   int8_t unhex_val;
641   const char *p = data;
642   const char *header_field_mark = 0;
643   const char *header_value_mark = 0;
644   const char *url_mark = 0;
645   const char *body_mark = 0;
646   const char *status_mark = 0;
647   enum state p_state = (enum state) parser->state;
648   const unsigned int lenient = parser->lenient_http_headers;
649 
650   /* We're in an error state. Don't bother doing anything. */
651   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
652     return 0;
653   }
654 
655   if (len == 0) {
656     switch (CURRENT_STATE()) {
657       case s_body_identity_eof:
658         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
659          * we got paused.
660          */
661         CALLBACK_NOTIFY_NOADVANCE(message_complete);
662         return 0;
663 
664       case s_dead:
665       case s_start_req_or_res:
666       case s_start_res:
667       case s_start_req:
668         return 0;
669 
670       default:
671         SET_ERRNO(HPE_INVALID_EOF_STATE);
672         return 1;
673     }
674   }
675 
676 
677   if (CURRENT_STATE() == s_header_field)
678     header_field_mark = data;
679   if (CURRENT_STATE() == s_header_value)
680     header_value_mark = data;
681   switch (CURRENT_STATE()) {
682   case s_req_path:
683   case s_req_schema:
684   case s_req_schema_slash:
685   case s_req_schema_slash_slash:
686   case s_req_server_start:
687   case s_req_server:
688   case s_req_server_with_at:
689   case s_req_query_string_start:
690   case s_req_query_string:
691   case s_req_fragment_start:
692   case s_req_fragment:
693     url_mark = data;
694     break;
695   case s_res_status:
696     status_mark = data;
697     break;
698   default:
699     break;
700   }
701 
702   for (p=data; p != data + len; p++) {
703     ch = *p;
704 
705     if (PARSING_HEADER(CURRENT_STATE()))
706       COUNT_HEADER_SIZE(1);
707 
708 reexecute:
709     switch (CURRENT_STATE()) {
710 
711       case s_dead:
712         /* this state is used after a 'Connection: close' message
713          * the parser will error out if it reads another message
714          */
715         if (LIKELY(ch == CR || ch == LF))
716           break;
717 
718         SET_ERRNO(HPE_CLOSED_CONNECTION);
719         goto error;
720 
721       case s_start_req_or_res:
722       {
723         if (ch == CR || ch == LF)
724           break;
725         parser->flags = 0;
726         parser->content_length = ULLONG_MAX;
727 
728         if (ch == 'H') {
729           UPDATE_STATE(s_res_or_resp_H);
730 
731           CALLBACK_NOTIFY(message_begin);
732         } else {
733           parser->type = HTTP_REQUEST;
734           UPDATE_STATE(s_start_req);
735           REEXECUTE();
736         }
737 
738         break;
739       }
740 
741       case s_res_or_resp_H:
742         if (ch == 'T') {
743           parser->type = HTTP_RESPONSE;
744           UPDATE_STATE(s_res_HT);
745         } else {
746           if (UNLIKELY(ch != 'E')) {
747             SET_ERRNO(HPE_INVALID_CONSTANT);
748             goto error;
749           }
750 
751           parser->type = HTTP_REQUEST;
752           parser->method = HTTP_HEAD;
753           parser->index = 2;
754           UPDATE_STATE(s_req_method);
755         }
756         break;
757 
758       case s_start_res:
759       {
760         parser->flags = 0;
761         parser->content_length = ULLONG_MAX;
762 
763         switch (ch) {
764           case 'H':
765             UPDATE_STATE(s_res_H);
766             break;
767 
768           case CR:
769           case LF:
770             break;
771 
772           default:
773             SET_ERRNO(HPE_INVALID_CONSTANT);
774             goto error;
775         }
776 
777         CALLBACK_NOTIFY(message_begin);
778         break;
779       }
780 
781       case s_res_H:
782         STRICT_CHECK(ch != 'T');
783         UPDATE_STATE(s_res_HT);
784         break;
785 
786       case s_res_HT:
787         STRICT_CHECK(ch != 'T');
788         UPDATE_STATE(s_res_HTT);
789         break;
790 
791       case s_res_HTT:
792         STRICT_CHECK(ch != 'P');
793         UPDATE_STATE(s_res_HTTP);
794         break;
795 
796       case s_res_HTTP:
797         STRICT_CHECK(ch != '/');
798         UPDATE_STATE(s_res_first_http_major);
799         break;
800 
801       case s_res_first_http_major:
802         if (UNLIKELY(ch < '0' || ch > '9')) {
803           SET_ERRNO(HPE_INVALID_VERSION);
804           goto error;
805         }
806 
807         parser->http_major = ch - '0';
808         UPDATE_STATE(s_res_http_major);
809         break;
810 
811       /* major HTTP version or dot */
812       case s_res_http_major:
813       {
814         if (ch == '.') {
815           UPDATE_STATE(s_res_first_http_minor);
816           break;
817         }
818 
819         if (!IS_NUM(ch)) {
820           SET_ERRNO(HPE_INVALID_VERSION);
821           goto error;
822         }
823 
824         parser->http_major *= 10;
825         parser->http_major += ch - '0';
826 
827         if (UNLIKELY(parser->http_major > 999)) {
828           SET_ERRNO(HPE_INVALID_VERSION);
829           goto error;
830         }
831 
832         break;
833       }
834 
835       /* first digit of minor HTTP version */
836       case s_res_first_http_minor:
837         if (UNLIKELY(!IS_NUM(ch))) {
838           SET_ERRNO(HPE_INVALID_VERSION);
839           goto error;
840         }
841 
842         parser->http_minor = ch - '0';
843         UPDATE_STATE(s_res_http_minor);
844         break;
845 
846       /* minor HTTP version or end of request line */
847       case s_res_http_minor:
848       {
849         if (ch == ' ') {
850           UPDATE_STATE(s_res_first_status_code);
851           break;
852         }
853 
854         if (UNLIKELY(!IS_NUM(ch))) {
855           SET_ERRNO(HPE_INVALID_VERSION);
856           goto error;
857         }
858 
859         parser->http_minor *= 10;
860         parser->http_minor += ch - '0';
861 
862         if (UNLIKELY(parser->http_minor > 999)) {
863           SET_ERRNO(HPE_INVALID_VERSION);
864           goto error;
865         }
866 
867         break;
868       }
869 
870       case s_res_first_status_code:
871       {
872         if (!IS_NUM(ch)) {
873           if (ch == ' ') {
874             break;
875           }
876 
877           SET_ERRNO(HPE_INVALID_STATUS);
878           goto error;
879         }
880         parser->status_code = ch - '0';
881         UPDATE_STATE(s_res_status_code);
882         break;
883       }
884 
885       case s_res_status_code:
886       {
887         if (!IS_NUM(ch)) {
888           switch (ch) {
889             case ' ':
890               UPDATE_STATE(s_res_status_start);
891               break;
892             case CR:
893               UPDATE_STATE(s_res_line_almost_done);
894               break;
895             case LF:
896               UPDATE_STATE(s_header_field_start);
897               break;
898             default:
899               SET_ERRNO(HPE_INVALID_STATUS);
900               goto error;
901           }
902           break;
903         }
904 
905         parser->status_code *= 10;
906         parser->status_code += ch - '0';
907 
908         if (UNLIKELY(parser->status_code > 999)) {
909           SET_ERRNO(HPE_INVALID_STATUS);
910           goto error;
911         }
912 
913         break;
914       }
915 
916       case s_res_status_start:
917       {
918         if (ch == CR) {
919           UPDATE_STATE(s_res_line_almost_done);
920           break;
921         }
922 
923         if (ch == LF) {
924           UPDATE_STATE(s_header_field_start);
925           break;
926         }
927 
928         MARK(status);
929         UPDATE_STATE(s_res_status);
930         parser->index = 0;
931         break;
932       }
933 
934       case s_res_status:
935         if (ch == CR) {
936           UPDATE_STATE(s_res_line_almost_done);
937           CALLBACK_DATA(status);
938           break;
939         }
940 
941         if (ch == LF) {
942           UPDATE_STATE(s_header_field_start);
943           CALLBACK_DATA(status);
944           break;
945         }
946 
947         break;
948 
949       case s_res_line_almost_done:
950         STRICT_CHECK(ch != LF);
951         UPDATE_STATE(s_header_field_start);
952         break;
953 
954       case s_start_req:
955       {
956         if (ch == CR || ch == LF)
957           break;
958         parser->flags = 0;
959         parser->content_length = ULLONG_MAX;
960 
961         if (UNLIKELY(!IS_ALPHA(ch))) {
962           SET_ERRNO(HPE_INVALID_METHOD);
963           goto error;
964         }
965 
966         parser->method = (enum http_method) 0;
967         parser->index = 1;
968         switch (ch) {
969           case 'A': parser->method = HTTP_ACL; break;
970           case 'B': parser->method = HTTP_BIND; break;
971           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
972           case 'D': parser->method = HTTP_DELETE; break;
973           case 'G': parser->method = HTTP_GET; break;
974           case 'H': parser->method = HTTP_HEAD; break;
975           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
976           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
977           case 'N': parser->method = HTTP_NOTIFY; break;
978           case 'O': parser->method = HTTP_OPTIONS; break;
979           case 'P': parser->method = HTTP_POST;
980             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
981             break;
982           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
983           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
984           case 'T': parser->method = HTTP_TRACE; break;
985           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
986           default:
987             SET_ERRNO(HPE_INVALID_METHOD);
988             goto error;
989         }
990         UPDATE_STATE(s_req_method);
991 
992         CALLBACK_NOTIFY(message_begin);
993 
994         break;
995       }
996 
997       case s_req_method:
998       {
999         const char *matcher;
1000         if (UNLIKELY(ch == '\0')) {
1001           SET_ERRNO(HPE_INVALID_METHOD);
1002           goto error;
1003         }
1004 
1005         matcher = method_strings[parser->method];
1006         if (ch == ' ' && matcher[parser->index] == '\0') {
1007           UPDATE_STATE(s_req_spaces_before_url);
1008         } else if (ch == matcher[parser->index]) {
1009           ; /* nada */
1010         } else if (IS_ALPHA(ch)) {
1011 
1012           switch (parser->method << 16 | parser->index << 8 | ch) {
1013 #define XX(meth, pos, ch, new_meth) \
1014             case (HTTP_##meth << 16 | pos << 8 | ch): \
1015               parser->method = HTTP_##new_meth; break;
1016 
1017             XX(POST,      1, 'U', PUT)
1018             XX(POST,      1, 'A', PATCH)
1019             XX(CONNECT,   1, 'H', CHECKOUT)
1020             XX(CONNECT,   2, 'P', COPY)
1021             XX(MKCOL,     1, 'O', MOVE)
1022             XX(MKCOL,     1, 'E', MERGE)
1023             XX(MKCOL,     2, 'A', MKACTIVITY)
1024             XX(MKCOL,     3, 'A', MKCALENDAR)
1025             XX(SUBSCRIBE, 1, 'E', SEARCH)
1026             XX(REPORT,    2, 'B', REBIND)
1027             XX(POST,      1, 'R', PROPFIND)
1028             XX(PROPFIND,  4, 'P', PROPPATCH)
1029             XX(PUT,       2, 'R', PURGE)
1030             XX(LOCK,      1, 'I', LINK)
1031             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1032             XX(UNLOCK,    2, 'B', UNBIND)
1033             XX(UNLOCK,    3, 'I', UNLINK)
1034 #undef XX
1035 
1036             default:
1037               SET_ERRNO(HPE_INVALID_METHOD);
1038               goto error;
1039           }
1040         } else if (ch == '-' &&
1041                    parser->index == 1 &&
1042                    parser->method == HTTP_MKCOL) {
1043           parser->method = HTTP_MSEARCH;
1044         } else {
1045           SET_ERRNO(HPE_INVALID_METHOD);
1046           goto error;
1047         }
1048 
1049         ++parser->index;
1050         break;
1051       }
1052 
1053       case s_req_spaces_before_url:
1054       {
1055         if (ch == ' ') break;
1056 
1057         MARK(url);
1058         if (parser->method == HTTP_CONNECT) {
1059           UPDATE_STATE(s_req_server_start);
1060         }
1061 
1062         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1063         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1064           SET_ERRNO(HPE_INVALID_URL);
1065           goto error;
1066         }
1067 
1068         break;
1069       }
1070 
1071       case s_req_schema:
1072       case s_req_schema_slash:
1073       case s_req_schema_slash_slash:
1074       case s_req_server_start:
1075       {
1076         switch (ch) {
1077           /* No whitespace allowed here */
1078           case ' ':
1079           case CR:
1080           case LF:
1081             SET_ERRNO(HPE_INVALID_URL);
1082             goto error;
1083           default:
1084             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1085             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1086               SET_ERRNO(HPE_INVALID_URL);
1087               goto error;
1088             }
1089         }
1090 
1091         break;
1092       }
1093 
1094       case s_req_server:
1095       case s_req_server_with_at:
1096       case s_req_path:
1097       case s_req_query_string_start:
1098       case s_req_query_string:
1099       case s_req_fragment_start:
1100       case s_req_fragment:
1101       {
1102         switch (ch) {
1103           case ' ':
1104             UPDATE_STATE(s_req_http_start);
1105             CALLBACK_DATA(url);
1106             break;
1107           case CR:
1108           case LF:
1109             parser->http_major = 0;
1110             parser->http_minor = 9;
1111             UPDATE_STATE((ch == CR) ?
1112               s_req_line_almost_done :
1113               s_header_field_start);
1114             CALLBACK_DATA(url);
1115             break;
1116           default:
1117             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1118             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1119               SET_ERRNO(HPE_INVALID_URL);
1120               goto error;
1121             }
1122         }
1123         break;
1124       }
1125 
1126       case s_req_http_start:
1127         switch (ch) {
1128           case 'H':
1129             UPDATE_STATE(s_req_http_H);
1130             break;
1131           case ' ':
1132             break;
1133           default:
1134             SET_ERRNO(HPE_INVALID_CONSTANT);
1135             goto error;
1136         }
1137         break;
1138 
1139       case s_req_http_H:
1140         STRICT_CHECK(ch != 'T');
1141         UPDATE_STATE(s_req_http_HT);
1142         break;
1143 
1144       case s_req_http_HT:
1145         STRICT_CHECK(ch != 'T');
1146         UPDATE_STATE(s_req_http_HTT);
1147         break;
1148 
1149       case s_req_http_HTT:
1150         STRICT_CHECK(ch != 'P');
1151         UPDATE_STATE(s_req_http_HTTP);
1152         break;
1153 
1154       case s_req_http_HTTP:
1155         STRICT_CHECK(ch != '/');
1156         UPDATE_STATE(s_req_first_http_major);
1157         break;
1158 
1159       /* first digit of major HTTP version */
1160       case s_req_first_http_major:
1161         if (UNLIKELY(ch < '1' || ch > '9')) {
1162           SET_ERRNO(HPE_INVALID_VERSION);
1163           goto error;
1164         }
1165 
1166         parser->http_major = ch - '0';
1167         UPDATE_STATE(s_req_http_major);
1168         break;
1169 
1170       /* major HTTP version or dot */
1171       case s_req_http_major:
1172       {
1173         if (ch == '.') {
1174           UPDATE_STATE(s_req_first_http_minor);
1175           break;
1176         }
1177 
1178         if (UNLIKELY(!IS_NUM(ch))) {
1179           SET_ERRNO(HPE_INVALID_VERSION);
1180           goto error;
1181         }
1182 
1183         parser->http_major *= 10;
1184         parser->http_major += ch - '0';
1185 
1186         if (UNLIKELY(parser->http_major > 999)) {
1187           SET_ERRNO(HPE_INVALID_VERSION);
1188           goto error;
1189         }
1190 
1191         break;
1192       }
1193 
1194       /* first digit of minor HTTP version */
1195       case s_req_first_http_minor:
1196         if (UNLIKELY(!IS_NUM(ch))) {
1197           SET_ERRNO(HPE_INVALID_VERSION);
1198           goto error;
1199         }
1200 
1201         parser->http_minor = ch - '0';
1202         UPDATE_STATE(s_req_http_minor);
1203         break;
1204 
1205       /* minor HTTP version or end of request line */
1206       case s_req_http_minor:
1207       {
1208         if (ch == CR) {
1209           UPDATE_STATE(s_req_line_almost_done);
1210           break;
1211         }
1212 
1213         if (ch == LF) {
1214           UPDATE_STATE(s_header_field_start);
1215           break;
1216         }
1217 
1218         /* XXX allow spaces after digit? */
1219 
1220         if (UNLIKELY(!IS_NUM(ch))) {
1221           SET_ERRNO(HPE_INVALID_VERSION);
1222           goto error;
1223         }
1224 
1225         parser->http_minor *= 10;
1226         parser->http_minor += ch - '0';
1227 
1228         if (UNLIKELY(parser->http_minor > 999)) {
1229           SET_ERRNO(HPE_INVALID_VERSION);
1230           goto error;
1231         }
1232 
1233         break;
1234       }
1235 
1236       /* end of request line */
1237       case s_req_line_almost_done:
1238       {
1239         if (UNLIKELY(ch != LF)) {
1240           SET_ERRNO(HPE_LF_EXPECTED);
1241           goto error;
1242         }
1243 
1244         UPDATE_STATE(s_header_field_start);
1245         break;
1246       }
1247 
1248       case s_header_field_start:
1249       {
1250         if (ch == CR) {
1251           UPDATE_STATE(s_headers_almost_done);
1252           break;
1253         }
1254 
1255         if (ch == LF) {
1256           /* they might be just sending \n instead of \r\n so this would be
1257            * the second \n to denote the end of headers*/
1258           UPDATE_STATE(s_headers_almost_done);
1259           REEXECUTE();
1260         }
1261 
1262         c = TOKEN(ch);
1263 
1264         if (UNLIKELY(!c)) {
1265           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1266           goto error;
1267         }
1268 
1269         MARK(header_field);
1270 
1271         parser->index = 0;
1272         UPDATE_STATE(s_header_field);
1273 
1274         switch (c) {
1275           case 'c':
1276             parser->header_state = h_C;
1277             break;
1278 
1279           case 'p':
1280             parser->header_state = h_matching_proxy_connection;
1281             break;
1282 
1283           case 't':
1284             parser->header_state = h_matching_transfer_encoding;
1285             break;
1286 
1287           case 'u':
1288             parser->header_state = h_matching_upgrade;
1289             break;
1290 
1291           default:
1292             parser->header_state = h_general;
1293             break;
1294         }
1295         break;
1296       }
1297 
1298       case s_header_field:
1299       {
1300         const char* start = p;
1301         for (; p != data + len; p++) {
1302           ch = *p;
1303           c = TOKEN(ch);
1304 
1305           if (!c)
1306             break;
1307 
1308           switch (parser->header_state) {
1309             case h_general:
1310               break;
1311 
1312             case h_C:
1313               parser->index++;
1314               parser->header_state = (c == 'o' ? h_CO : h_general);
1315               break;
1316 
1317             case h_CO:
1318               parser->index++;
1319               parser->header_state = (c == 'n' ? h_CON : h_general);
1320               break;
1321 
1322             case h_CON:
1323               parser->index++;
1324               switch (c) {
1325                 case 'n':
1326                   parser->header_state = h_matching_connection;
1327                   break;
1328                 case 't':
1329                   parser->header_state = h_matching_content_length;
1330                   break;
1331                 default:
1332                   parser->header_state = h_general;
1333                   break;
1334               }
1335               break;
1336 
1337             /* connection */
1338 
1339             case h_matching_connection:
1340               parser->index++;
1341               if (parser->index > sizeof(CONNECTION)-1
1342                   || c != CONNECTION[parser->index]) {
1343                 parser->header_state = h_general;
1344               } else if (parser->index == sizeof(CONNECTION)-2) {
1345                 parser->header_state = h_connection;
1346               }
1347               break;
1348 
1349             /* proxy-connection */
1350 
1351             case h_matching_proxy_connection:
1352               parser->index++;
1353               if (parser->index > sizeof(PROXY_CONNECTION)-1
1354                   || c != PROXY_CONNECTION[parser->index]) {
1355                 parser->header_state = h_general;
1356               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1357                 parser->header_state = h_connection;
1358               }
1359               break;
1360 
1361             /* content-length */
1362 
1363             case h_matching_content_length:
1364               parser->index++;
1365               if (parser->index > sizeof(CONTENT_LENGTH)-1
1366                   || c != CONTENT_LENGTH[parser->index]) {
1367                 parser->header_state = h_general;
1368               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1369                 if (parser->flags & F_CONTENTLENGTH) {
1370                   SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1371                   goto error;
1372                 }
1373                 parser->header_state = h_content_length;
1374                 parser->flags |= F_CONTENTLENGTH;
1375               }
1376               break;
1377 
1378             /* transfer-encoding */
1379 
1380             case h_matching_transfer_encoding:
1381               parser->index++;
1382               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1383                   || c != TRANSFER_ENCODING[parser->index]) {
1384                 parser->header_state = h_general;
1385               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1386                 parser->header_state = h_transfer_encoding;
1387               }
1388               break;
1389 
1390             /* upgrade */
1391 
1392             case h_matching_upgrade:
1393               parser->index++;
1394               if (parser->index > sizeof(UPGRADE)-1
1395                   || c != UPGRADE[parser->index]) {
1396                 parser->header_state = h_general;
1397               } else if (parser->index == sizeof(UPGRADE)-2) {
1398                 parser->header_state = h_upgrade;
1399               }
1400               break;
1401 
1402             case h_connection:
1403             case h_content_length:
1404             case h_transfer_encoding:
1405             case h_upgrade:
1406               if (ch != ' ') parser->header_state = h_general;
1407               break;
1408 
1409             default:
1410               assert(0 && "Unknown header_state");
1411               break;
1412           }
1413         }
1414 
1415         COUNT_HEADER_SIZE(p - start);
1416 
1417         if (p == data + len) {
1418           --p;
1419           break;
1420         }
1421 
1422         if (ch == ':') {
1423           UPDATE_STATE(s_header_value_discard_ws);
1424           CALLBACK_DATA(header_field);
1425           break;
1426         }
1427 
1428         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1429         goto error;
1430       }
1431 
1432       case s_header_value_discard_ws:
1433         if (ch == ' ' || ch == '\t') break;
1434 
1435         if (ch == CR) {
1436           UPDATE_STATE(s_header_value_discard_ws_almost_done);
1437           break;
1438         }
1439 
1440         if (ch == LF) {
1441           UPDATE_STATE(s_header_value_discard_lws);
1442           break;
1443         }
1444 
1445         /* FALLTHROUGH */
1446 
1447       case s_header_value_start:
1448       {
1449         MARK(header_value);
1450 
1451         UPDATE_STATE(s_header_value);
1452         parser->index = 0;
1453 
1454         c = LOWER(ch);
1455 
1456         switch (parser->header_state) {
1457           case h_upgrade:
1458             parser->flags |= F_UPGRADE;
1459             parser->header_state = h_general;
1460             break;
1461 
1462           case h_transfer_encoding:
1463             /* looking for 'Transfer-Encoding: chunked' */
1464             if ('c' == c) {
1465               parser->header_state = h_matching_transfer_encoding_chunked;
1466             } else {
1467               parser->header_state = h_general;
1468             }
1469             break;
1470 
1471           case h_content_length:
1472             if (UNLIKELY(!IS_NUM(ch))) {
1473               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1474               goto error;
1475             }
1476 
1477             parser->content_length = ch - '0';
1478             break;
1479 
1480           case h_connection:
1481             /* looking for 'Connection: keep-alive' */
1482             if (c == 'k') {
1483               parser->header_state = h_matching_connection_keep_alive;
1484             /* looking for 'Connection: close' */
1485             } else if (c == 'c') {
1486               parser->header_state = h_matching_connection_close;
1487             } else if (c == 'u') {
1488               parser->header_state = h_matching_connection_upgrade;
1489             } else {
1490               parser->header_state = h_matching_connection_token;
1491             }
1492             break;
1493 
1494           /* Multi-value `Connection` header */
1495           case h_matching_connection_token_start:
1496             break;
1497 
1498           default:
1499             parser->header_state = h_general;
1500             break;
1501         }
1502         break;
1503       }
1504 
1505       case s_header_value:
1506       {
1507         const char* start = p;
1508         enum header_states h_state = (enum header_states) parser->header_state;
1509         for (; p != data + len; p++) {
1510           ch = *p;
1511           if (ch == CR) {
1512             UPDATE_STATE(s_header_almost_done);
1513             parser->header_state = h_state;
1514             CALLBACK_DATA(header_value);
1515             break;
1516           }
1517 
1518           if (ch == LF) {
1519             UPDATE_STATE(s_header_almost_done);
1520             COUNT_HEADER_SIZE(p - start);
1521             parser->header_state = h_state;
1522             CALLBACK_DATA_NOADVANCE(header_value);
1523             REEXECUTE();
1524           }
1525 
1526           if (!lenient && !IS_HEADER_CHAR(ch)) {
1527             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1528             goto error;
1529           }
1530 
1531           c = LOWER(ch);
1532 
1533           switch (h_state) {
1534             case h_general:
1535             {
1536               const char* p_cr;
1537               const char* p_lf;
1538               size_t limit = data + len - p;
1539 
1540               limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1541 
1542               p_cr = (const char*) memchr(p, CR, limit);
1543               p_lf = (const char*) memchr(p, LF, limit);
1544               if (p_cr != NULL) {
1545                 if (p_lf != NULL && p_cr >= p_lf)
1546                   p = p_lf;
1547                 else
1548                   p = p_cr;
1549               } else if (UNLIKELY(p_lf != NULL)) {
1550                 p = p_lf;
1551               } else {
1552                 p = data + len;
1553               }
1554               --p;
1555 
1556               break;
1557             }
1558 
1559             case h_connection:
1560             case h_transfer_encoding:
1561               assert(0 && "Shouldn't get here.");
1562               break;
1563 
1564             case h_content_length:
1565             {
1566               uint64_t t;
1567 
1568               if (ch == ' ') break;
1569 
1570               if (UNLIKELY(!IS_NUM(ch))) {
1571                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1572                 parser->header_state = h_state;
1573                 goto error;
1574               }
1575 
1576               t = parser->content_length;
1577               t *= 10;
1578               t += ch - '0';
1579 
1580               /* Overflow? Test against a conservative limit for simplicity. */
1581               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1582                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1583                 parser->header_state = h_state;
1584                 goto error;
1585               }
1586 
1587               parser->content_length = t;
1588               break;
1589             }
1590 
1591             /* Transfer-Encoding: chunked */
1592             case h_matching_transfer_encoding_chunked:
1593               parser->index++;
1594               if (parser->index > sizeof(CHUNKED)-1
1595                   || c != CHUNKED[parser->index]) {
1596                 h_state = h_general;
1597               } else if (parser->index == sizeof(CHUNKED)-2) {
1598                 h_state = h_transfer_encoding_chunked;
1599               }
1600               break;
1601 
1602             case h_matching_connection_token_start:
1603               /* looking for 'Connection: keep-alive' */
1604               if (c == 'k') {
1605                 h_state = h_matching_connection_keep_alive;
1606               /* looking for 'Connection: close' */
1607               } else if (c == 'c') {
1608                 h_state = h_matching_connection_close;
1609               } else if (c == 'u') {
1610                 h_state = h_matching_connection_upgrade;
1611               } else if (STRICT_TOKEN(c)) {
1612                 h_state = h_matching_connection_token;
1613               } else if (c == ' ' || c == '\t') {
1614                 /* Skip lws */
1615               } else {
1616                 h_state = h_general;
1617               }
1618               break;
1619 
1620             /* looking for 'Connection: keep-alive' */
1621             case h_matching_connection_keep_alive:
1622               parser->index++;
1623               if (parser->index > sizeof(KEEP_ALIVE)-1
1624                   || c != KEEP_ALIVE[parser->index]) {
1625                 h_state = h_matching_connection_token;
1626               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1627                 h_state = h_connection_keep_alive;
1628               }
1629               break;
1630 
1631             /* looking for 'Connection: close' */
1632             case h_matching_connection_close:
1633               parser->index++;
1634               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1635                 h_state = h_matching_connection_token;
1636               } else if (parser->index == sizeof(CLOSE)-2) {
1637                 h_state = h_connection_close;
1638               }
1639               break;
1640 
1641             /* looking for 'Connection: upgrade' */
1642             case h_matching_connection_upgrade:
1643               parser->index++;
1644               if (parser->index > sizeof(UPGRADE) - 1 ||
1645                   c != UPGRADE[parser->index]) {
1646                 h_state = h_matching_connection_token;
1647               } else if (parser->index == sizeof(UPGRADE)-2) {
1648                 h_state = h_connection_upgrade;
1649               }
1650               break;
1651 
1652             case h_matching_connection_token:
1653               if (ch == ',') {
1654                 h_state = h_matching_connection_token_start;
1655                 parser->index = 0;
1656               }
1657               break;
1658 
1659             case h_transfer_encoding_chunked:
1660               if (ch != ' ') h_state = h_general;
1661               break;
1662 
1663             case h_connection_keep_alive:
1664             case h_connection_close:
1665             case h_connection_upgrade:
1666               if (ch == ',') {
1667                 if (h_state == h_connection_keep_alive) {
1668                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
1669                 } else if (h_state == h_connection_close) {
1670                   parser->flags |= F_CONNECTION_CLOSE;
1671                 } else if (h_state == h_connection_upgrade) {
1672                   parser->flags |= F_CONNECTION_UPGRADE;
1673                 }
1674                 h_state = h_matching_connection_token_start;
1675                 parser->index = 0;
1676               } else if (ch != ' ') {
1677                 h_state = h_matching_connection_token;
1678               }
1679               break;
1680 
1681             default:
1682               UPDATE_STATE(s_header_value);
1683               h_state = h_general;
1684               break;
1685           }
1686         }
1687         parser->header_state = h_state;
1688 
1689         COUNT_HEADER_SIZE(p - start);
1690 
1691         if (p == data + len)
1692           --p;
1693         break;
1694       }
1695 
1696       case s_header_almost_done:
1697       {
1698         if (UNLIKELY(ch != LF)) {
1699           SET_ERRNO(HPE_LF_EXPECTED);
1700           goto error;
1701         }
1702 
1703         UPDATE_STATE(s_header_value_lws);
1704         break;
1705       }
1706 
1707       case s_header_value_lws:
1708       {
1709         if (ch == ' ' || ch == '\t') {
1710           UPDATE_STATE(s_header_value_start);
1711           REEXECUTE();
1712         }
1713 
1714         /* finished the header */
1715         switch (parser->header_state) {
1716           case h_connection_keep_alive:
1717             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1718             break;
1719           case h_connection_close:
1720             parser->flags |= F_CONNECTION_CLOSE;
1721             break;
1722           case h_transfer_encoding_chunked:
1723             parser->flags |= F_CHUNKED;
1724             break;
1725           case h_connection_upgrade:
1726             parser->flags |= F_CONNECTION_UPGRADE;
1727             break;
1728           default:
1729             break;
1730         }
1731 
1732         UPDATE_STATE(s_header_field_start);
1733         REEXECUTE();
1734       }
1735 
1736       case s_header_value_discard_ws_almost_done:
1737       {
1738         STRICT_CHECK(ch != LF);
1739         UPDATE_STATE(s_header_value_discard_lws);
1740         break;
1741       }
1742 
1743       case s_header_value_discard_lws:
1744       {
1745         if (ch == ' ' || ch == '\t') {
1746           UPDATE_STATE(s_header_value_discard_ws);
1747           break;
1748         } else {
1749           switch (parser->header_state) {
1750             case h_connection_keep_alive:
1751               parser->flags |= F_CONNECTION_KEEP_ALIVE;
1752               break;
1753             case h_connection_close:
1754               parser->flags |= F_CONNECTION_CLOSE;
1755               break;
1756             case h_connection_upgrade:
1757               parser->flags |= F_CONNECTION_UPGRADE;
1758               break;
1759             case h_transfer_encoding_chunked:
1760               parser->flags |= F_CHUNKED;
1761               break;
1762             default:
1763               break;
1764           }
1765 
1766           /* header value was empty */
1767           MARK(header_value);
1768           UPDATE_STATE(s_header_field_start);
1769           CALLBACK_DATA_NOADVANCE(header_value);
1770           REEXECUTE();
1771         }
1772       }
1773 
1774       case s_headers_almost_done:
1775       {
1776         STRICT_CHECK(ch != LF);
1777 
1778         if (parser->flags & F_TRAILING) {
1779           /* End of a chunked request */
1780           UPDATE_STATE(s_message_done);
1781           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1782           REEXECUTE();
1783         }
1784 
1785         /* Cannot use chunked encoding and a content-length header together
1786            per the HTTP specification. */
1787         if ((parser->flags & F_CHUNKED) &&
1788             (parser->flags & F_CONTENTLENGTH)) {
1789           SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1790           goto error;
1791         }
1792 
1793         UPDATE_STATE(s_headers_done);
1794 
1795         /* Set this here so that on_headers_complete() callbacks can see it */
1796         parser->upgrade =
1797           ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1798            (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1799            parser->method == HTTP_CONNECT);
1800 
1801         /* Here we call the headers_complete callback. This is somewhat
1802          * different than other callbacks because if the user returns 1, we
1803          * will interpret that as saying that this message has no body. This
1804          * is needed for the annoying case of recieving a response to a HEAD
1805          * request.
1806          *
1807          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1808          * we have to simulate it by handling a change in errno below.
1809          */
1810         if (settings->on_headers_complete) {
1811           switch (settings->on_headers_complete(parser)) {
1812             case 0:
1813               break;
1814 
1815             case 2:
1816               parser->upgrade = 1;
1817 
1818             case 1:
1819               parser->flags |= F_SKIPBODY;
1820               break;
1821 
1822             default:
1823               SET_ERRNO(HPE_CB_headers_complete);
1824               RETURN(p - data); /* Error */
1825           }
1826         }
1827 
1828         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1829           RETURN(p - data);
1830         }
1831 
1832         REEXECUTE();
1833       }
1834 
1835       case s_headers_done:
1836       {
1837         int hasBody;
1838         STRICT_CHECK(ch != LF);
1839 
1840         parser->nread = 0;
1841 
1842         hasBody = parser->flags & F_CHUNKED ||
1843           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1844         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1845                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
1846           /* Exit, the rest of the message is in a different protocol. */
1847           UPDATE_STATE(NEW_MESSAGE());
1848           CALLBACK_NOTIFY(message_complete);
1849           RETURN((p - data) + 1);
1850         }
1851 
1852         if (parser->flags & F_SKIPBODY) {
1853           UPDATE_STATE(NEW_MESSAGE());
1854           CALLBACK_NOTIFY(message_complete);
1855         } else if (parser->flags & F_CHUNKED) {
1856           /* chunked encoding - ignore Content-Length header */
1857           UPDATE_STATE(s_chunk_size_start);
1858         } else {
1859           if (parser->content_length == 0) {
1860             /* Content-Length header given but zero: Content-Length: 0\r\n */
1861             UPDATE_STATE(NEW_MESSAGE());
1862             CALLBACK_NOTIFY(message_complete);
1863           } else if (parser->content_length != ULLONG_MAX) {
1864             /* Content-Length header given and non-zero */
1865             UPDATE_STATE(s_body_identity);
1866           } else {
1867             if (!http_message_needs_eof(parser)) {
1868               /* Assume content-length 0 - read the next */
1869               UPDATE_STATE(NEW_MESSAGE());
1870               CALLBACK_NOTIFY(message_complete);
1871             } else {
1872               /* Read body until EOF */
1873               UPDATE_STATE(s_body_identity_eof);
1874             }
1875           }
1876         }
1877 
1878         break;
1879       }
1880 
1881       case s_body_identity:
1882       {
1883         uint64_t to_read = MIN(parser->content_length,
1884                                (uint64_t) ((data + len) - p));
1885 
1886         assert(parser->content_length != 0
1887             && parser->content_length != ULLONG_MAX);
1888 
1889         /* The difference between advancing content_length and p is because
1890          * the latter will automaticaly advance on the next loop iteration.
1891          * Further, if content_length ends up at 0, we want to see the last
1892          * byte again for our message complete callback.
1893          */
1894         MARK(body);
1895         parser->content_length -= to_read;
1896         p += to_read - 1;
1897 
1898         if (parser->content_length == 0) {
1899           UPDATE_STATE(s_message_done);
1900 
1901           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1902            *
1903            * The alternative to doing this is to wait for the next byte to
1904            * trigger the data callback, just as in every other case. The
1905            * problem with this is that this makes it difficult for the test
1906            * harness to distinguish between complete-on-EOF and
1907            * complete-on-length. It's not clear that this distinction is
1908            * important for applications, but let's keep it for now.
1909            */
1910           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1911           REEXECUTE();
1912         }
1913 
1914         break;
1915       }
1916 
1917       /* read until EOF */
1918       case s_body_identity_eof:
1919         MARK(body);
1920         p = data + len - 1;
1921 
1922         break;
1923 
1924       case s_message_done:
1925         UPDATE_STATE(NEW_MESSAGE());
1926         CALLBACK_NOTIFY(message_complete);
1927         if (parser->upgrade) {
1928           /* Exit, the rest of the message is in a different protocol. */
1929           RETURN((p - data) + 1);
1930         }
1931         break;
1932 
1933       case s_chunk_size_start:
1934       {
1935         assert(parser->nread == 1);
1936         assert(parser->flags & F_CHUNKED);
1937 
1938         unhex_val = unhex[(unsigned char)ch];
1939         if (UNLIKELY(unhex_val == -1)) {
1940           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1941           goto error;
1942         }
1943 
1944         parser->content_length = unhex_val;
1945         UPDATE_STATE(s_chunk_size);
1946         break;
1947       }
1948 
1949       case s_chunk_size:
1950       {
1951         uint64_t t;
1952 
1953         assert(parser->flags & F_CHUNKED);
1954 
1955         if (ch == CR) {
1956           UPDATE_STATE(s_chunk_size_almost_done);
1957           break;
1958         }
1959 
1960         unhex_val = unhex[(unsigned char)ch];
1961 
1962         if (unhex_val == -1) {
1963           if (ch == ';' || ch == ' ') {
1964             UPDATE_STATE(s_chunk_parameters);
1965             break;
1966           }
1967 
1968           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1969           goto error;
1970         }
1971 
1972         t = parser->content_length;
1973         t *= 16;
1974         t += unhex_val;
1975 
1976         /* Overflow? Test against a conservative limit for simplicity. */
1977         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1978           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1979           goto error;
1980         }
1981 
1982         parser->content_length = t;
1983         break;
1984       }
1985 
1986       case s_chunk_parameters:
1987       {
1988         assert(parser->flags & F_CHUNKED);
1989         /* just ignore this shit. TODO check for overflow */
1990         if (ch == CR) {
1991           UPDATE_STATE(s_chunk_size_almost_done);
1992           break;
1993         }
1994         break;
1995       }
1996 
1997       case s_chunk_size_almost_done:
1998       {
1999         assert(parser->flags & F_CHUNKED);
2000         STRICT_CHECK(ch != LF);
2001 
2002         parser->nread = 0;
2003 
2004         if (parser->content_length == 0) {
2005           parser->flags |= F_TRAILING;
2006           UPDATE_STATE(s_header_field_start);
2007         } else {
2008           UPDATE_STATE(s_chunk_data);
2009         }
2010         CALLBACK_NOTIFY(chunk_header);
2011         break;
2012       }
2013 
2014       case s_chunk_data:
2015       {
2016         uint64_t to_read = MIN(parser->content_length,
2017                                (uint64_t) ((data + len) - p));
2018 
2019         assert(parser->flags & F_CHUNKED);
2020         assert(parser->content_length != 0
2021             && parser->content_length != ULLONG_MAX);
2022 
2023         /* See the explanation in s_body_identity for why the content
2024          * length and data pointers are managed this way.
2025          */
2026         MARK(body);
2027         parser->content_length -= to_read;
2028         p += to_read - 1;
2029 
2030         if (parser->content_length == 0) {
2031           UPDATE_STATE(s_chunk_data_almost_done);
2032         }
2033 
2034         break;
2035       }
2036 
2037       case s_chunk_data_almost_done:
2038         assert(parser->flags & F_CHUNKED);
2039         assert(parser->content_length == 0);
2040         STRICT_CHECK(ch != CR);
2041         UPDATE_STATE(s_chunk_data_done);
2042         CALLBACK_DATA(body);
2043         break;
2044 
2045       case s_chunk_data_done:
2046         assert(parser->flags & F_CHUNKED);
2047         STRICT_CHECK(ch != LF);
2048         parser->nread = 0;
2049         UPDATE_STATE(s_chunk_size_start);
2050         CALLBACK_NOTIFY(chunk_complete);
2051         break;
2052 
2053       default:
2054         assert(0 && "unhandled state");
2055         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2056         goto error;
2057     }
2058   }
2059 
2060   /* Run callbacks for any marks that we have leftover after we ran our of
2061    * bytes. There should be at most one of these set, so it's OK to invoke
2062    * them in series (unset marks will not result in callbacks).
2063    *
2064    * We use the NOADVANCE() variety of callbacks here because 'p' has already
2065    * overflowed 'data' and this allows us to correct for the off-by-one that
2066    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2067    * value that's in-bounds).
2068    */
2069 
2070   assert(((header_field_mark ? 1 : 0) +
2071           (header_value_mark ? 1 : 0) +
2072           (url_mark ? 1 : 0)  +
2073           (body_mark ? 1 : 0) +
2074           (status_mark ? 1 : 0)) <= 1);
2075 
2076   CALLBACK_DATA_NOADVANCE(header_field);
2077   CALLBACK_DATA_NOADVANCE(header_value);
2078   CALLBACK_DATA_NOADVANCE(url);
2079   CALLBACK_DATA_NOADVANCE(body);
2080   CALLBACK_DATA_NOADVANCE(status);
2081 
2082   RETURN(len);
2083 
2084 error:
2085   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2086     SET_ERRNO(HPE_UNKNOWN);
2087   }
2088 
2089   RETURN(p - data);
2090 }
2091 
2092 
2093 /* Does the parser need to see an EOF to find the end of the message? */
2094 int
http_message_needs_eof(const http_parser * parser)2095 http_message_needs_eof (const http_parser *parser)
2096 {
2097   if (parser->type == HTTP_REQUEST) {
2098     return 0;
2099   }
2100 
2101   /* See RFC 2616 section 4.4 */
2102   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2103       parser->status_code == 204 ||     /* No Content */
2104       parser->status_code == 304 ||     /* Not Modified */
2105       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2106     return 0;
2107   }
2108 
2109   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2110     return 0;
2111   }
2112 
2113   return 1;
2114 }
2115 
2116 
2117 int
http_should_keep_alive(const http_parser * parser)2118 http_should_keep_alive (const http_parser *parser)
2119 {
2120   if (parser->http_major > 0 && parser->http_minor > 0) {
2121     /* HTTP/1.1 */
2122     if (parser->flags & F_CONNECTION_CLOSE) {
2123       return 0;
2124     }
2125   } else {
2126     /* HTTP/1.0 or earlier */
2127     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2128       return 0;
2129     }
2130   }
2131 
2132   return !http_message_needs_eof(parser);
2133 }
2134 
2135 
2136 const char *
http_method_str(enum http_method m)2137 http_method_str (enum http_method m)
2138 {
2139   return ELEM_AT(method_strings, m, "<unknown>");
2140 }
2141 
2142 
2143 void
http_parser_init(http_parser * parser,enum http_parser_type t)2144 http_parser_init (http_parser *parser, enum http_parser_type t)
2145 {
2146   void *data = parser->data; /* preserve application data */
2147   memset(parser, 0, sizeof(*parser));
2148   parser->data = data;
2149   parser->type = t;
2150   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2151   parser->http_errno = HPE_OK;
2152 }
2153 
2154 void
http_parser_settings_init(http_parser_settings * settings)2155 http_parser_settings_init(http_parser_settings *settings)
2156 {
2157   memset(settings, 0, sizeof(*settings));
2158 }
2159 
2160 const char *
http_errno_name(enum http_errno err)2161 http_errno_name(enum http_errno err) {
2162   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2163   return http_strerror_tab[err].name;
2164 }
2165 
2166 const char *
http_errno_description(enum http_errno err)2167 http_errno_description(enum http_errno err) {
2168   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2169   return http_strerror_tab[err].description;
2170 }
2171 
2172 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2173 http_parse_host_char(enum http_host_state s, const char ch) {
2174   switch(s) {
2175     case s_http_userinfo:
2176     case s_http_userinfo_start:
2177       if (ch == '@') {
2178         return s_http_host_start;
2179       }
2180 
2181       if (IS_USERINFO_CHAR(ch)) {
2182         return s_http_userinfo;
2183       }
2184       break;
2185 
2186     case s_http_host_start:
2187       if (ch == '[') {
2188         return s_http_host_v6_start;
2189       }
2190 
2191       if (IS_HOST_CHAR(ch)) {
2192         return s_http_host;
2193       }
2194 
2195       break;
2196 
2197     case s_http_host:
2198       if (IS_HOST_CHAR(ch)) {
2199         return s_http_host;
2200       }
2201 
2202     /* FALLTHROUGH */
2203     case s_http_host_v6_end:
2204       if (ch == ':') {
2205         return s_http_host_port_start;
2206       }
2207 
2208       break;
2209 
2210     case s_http_host_v6:
2211       if (ch == ']') {
2212         return s_http_host_v6_end;
2213       }
2214 
2215     /* FALLTHROUGH */
2216     case s_http_host_v6_start:
2217       if (IS_HEX(ch) || ch == ':' || ch == '.') {
2218         return s_http_host_v6;
2219       }
2220 
2221       if (s == s_http_host_v6 && ch == '%') {
2222         return s_http_host_v6_zone_start;
2223       }
2224       break;
2225 
2226     case s_http_host_v6_zone:
2227       if (ch == ']') {
2228         return s_http_host_v6_end;
2229       }
2230 
2231     /* FALLTHROUGH */
2232     case s_http_host_v6_zone_start:
2233       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2234       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2235           ch == '~') {
2236         return s_http_host_v6_zone;
2237       }
2238       break;
2239 
2240     case s_http_host_port:
2241     case s_http_host_port_start:
2242       if (IS_NUM(ch)) {
2243         return s_http_host_port;
2244       }
2245 
2246       break;
2247 
2248     default:
2249       break;
2250   }
2251   return s_http_host_dead;
2252 }
2253 
2254 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2255 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2256   assert(u->field_set & (1 << UF_HOST));
2257   enum http_host_state s;
2258 
2259   const char *p;
2260   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2261 
2262   u->field_data[UF_HOST].len = 0;
2263 
2264   s = found_at ? s_http_userinfo_start : s_http_host_start;
2265 
2266   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2267     enum http_host_state new_s = http_parse_host_char(s, *p);
2268 
2269     if (new_s == s_http_host_dead) {
2270       return 1;
2271     }
2272 
2273     switch(new_s) {
2274       case s_http_host:
2275         if (s != s_http_host) {
2276           u->field_data[UF_HOST].off = p - buf;
2277         }
2278         u->field_data[UF_HOST].len++;
2279         break;
2280 
2281       case s_http_host_v6:
2282         if (s != s_http_host_v6) {
2283           u->field_data[UF_HOST].off = p - buf;
2284         }
2285         u->field_data[UF_HOST].len++;
2286         break;
2287 
2288       case s_http_host_v6_zone_start:
2289       case s_http_host_v6_zone:
2290         u->field_data[UF_HOST].len++;
2291         break;
2292 
2293       case s_http_host_port:
2294         if (s != s_http_host_port) {
2295           u->field_data[UF_PORT].off = p - buf;
2296           u->field_data[UF_PORT].len = 0;
2297           u->field_set |= (1 << UF_PORT);
2298         }
2299         u->field_data[UF_PORT].len++;
2300         break;
2301 
2302       case s_http_userinfo:
2303         if (s != s_http_userinfo) {
2304           u->field_data[UF_USERINFO].off = p - buf ;
2305           u->field_data[UF_USERINFO].len = 0;
2306           u->field_set |= (1 << UF_USERINFO);
2307         }
2308         u->field_data[UF_USERINFO].len++;
2309         break;
2310 
2311       default:
2312         break;
2313     }
2314     s = new_s;
2315   }
2316 
2317   /* Make sure we don't end somewhere unexpected */
2318   switch (s) {
2319     case s_http_host_start:
2320     case s_http_host_v6_start:
2321     case s_http_host_v6:
2322     case s_http_host_v6_zone_start:
2323     case s_http_host_v6_zone:
2324     case s_http_host_port_start:
2325     case s_http_userinfo:
2326     case s_http_userinfo_start:
2327       return 1;
2328     default:
2329       break;
2330   }
2331 
2332   return 0;
2333 }
2334 
2335 void
http_parser_url_init(struct http_parser_url * u)2336 http_parser_url_init(struct http_parser_url *u) {
2337   memset(u, 0, sizeof(*u));
2338 }
2339 
2340 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2341 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2342                       struct http_parser_url *u)
2343 {
2344   enum state s;
2345   const char *p;
2346   enum http_parser_url_fields uf, old_uf;
2347   int found_at = 0;
2348 
2349   u->port = u->field_set = 0;
2350   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2351   old_uf = UF_MAX;
2352 
2353   for (p = buf; p < buf + buflen; p++) {
2354     s = parse_url_char(s, *p);
2355 
2356     /* Figure out the next field that we're operating on */
2357     switch (s) {
2358       case s_dead:
2359         return 1;
2360 
2361       /* Skip delimeters */
2362       case s_req_schema_slash:
2363       case s_req_schema_slash_slash:
2364       case s_req_server_start:
2365       case s_req_query_string_start:
2366       case s_req_fragment_start:
2367         continue;
2368 
2369       case s_req_schema:
2370         uf = UF_SCHEMA;
2371         break;
2372 
2373       case s_req_server_with_at:
2374         found_at = 1;
2375 
2376       /* FALLTROUGH */
2377       case s_req_server:
2378         uf = UF_HOST;
2379         break;
2380 
2381       case s_req_path:
2382         uf = UF_PATH;
2383         break;
2384 
2385       case s_req_query_string:
2386         uf = UF_QUERY;
2387         break;
2388 
2389       case s_req_fragment:
2390         uf = UF_FRAGMENT;
2391         break;
2392 
2393       default:
2394         assert(!"Unexpected state");
2395         return 1;
2396     }
2397 
2398     /* Nothing's changed; soldier on */
2399     if (uf == old_uf) {
2400       u->field_data[uf].len++;
2401       continue;
2402     }
2403 
2404     u->field_data[uf].off = p - buf;
2405     u->field_data[uf].len = 1;
2406 
2407     u->field_set |= (1 << uf);
2408     old_uf = uf;
2409   }
2410 
2411   /* host must be present if there is a schema */
2412   /* parsing http:///toto will fail */
2413   if ((u->field_set & (1 << UF_SCHEMA)) &&
2414       (u->field_set & (1 << UF_HOST)) == 0) {
2415     return 1;
2416   }
2417 
2418   if (u->field_set & (1 << UF_HOST)) {
2419     if (http_parse_host(buf, u, found_at) != 0) {
2420       return 1;
2421     }
2422   }
2423 
2424   /* CONNECT requests can only contain "hostname:port" */
2425   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2426     return 1;
2427   }
2428 
2429   if (u->field_set & (1 << UF_PORT)) {
2430     /* Don't bother with endp; we've already validated the string */
2431     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2432 
2433     /* Ports have a max value of 2^16 */
2434     if (v > 0xffff) {
2435       return 1;
2436     }
2437 
2438     u->port = (uint16_t) v;
2439   }
2440 
2441   return 0;
2442 }
2443 
2444 void
http_parser_pause(http_parser * parser,int paused)2445 http_parser_pause(http_parser *parser, int paused) {
2446   /* Users should only be pausing/unpausing a parser that is not in an error
2447    * state. In non-debug builds, there's not much that we can do about this
2448    * other than ignore it.
2449    */
2450   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2451       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2452     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2453   } else {
2454     assert(0 && "Attempting to pause parser in error state");
2455   }
2456 }
2457 
2458 int
http_body_is_final(const struct http_parser * parser)2459 http_body_is_final(const struct http_parser *parser) {
2460     return parser->state == s_message_done;
2461 }
2462 
2463 unsigned long
http_parser_version(void)2464 http_parser_version(void) {
2465   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2466          HTTP_PARSER_VERSION_MINOR * 0x00100 |
2467          HTTP_PARSER_VERSION_PATCH * 0x00001;
2468 }
2469