1 /* Copyright Joyent, Inc. and other Node contributors.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21 #include "http_parser.h"
22 #include <assert.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <limits.h>
27
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
30 #ifndef ULLONG_MAX
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32 #endif
33
34 #ifndef MIN
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
36 #endif
37
38 #ifndef ARRAY_SIZE
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40 #endif
41
42 #ifndef BIT_AT
43 # define BIT_AT(a, i) \
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
45 (1 << ((unsigned int) (i) & 7))))
46 #endif
47
48 #ifndef ELEM_AT
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50 #endif
51
52 #define SET_ERRNO(e) \
53 do { \
54 parser->nread = nread; \
55 parser->http_errno = (e); \
56 } while(0)
57
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
60 #define RETURN(V) \
61 do { \
62 parser->nread = nread; \
63 parser->state = CURRENT_STATE(); \
64 return (V); \
65 } while (0);
66 #define REEXECUTE() \
67 goto reexecute; \
68
69
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77
78
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
81 do { \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83 \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
88 } \
89 UPDATE_STATE(parser->state); \
90 \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93 return (ER); \
94 } \
95 } \
96 } while (0)
97
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
106 do { \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108 \
109 if (FOR##_mark) { \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
112 if (UNLIKELY(0 != \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
115 } \
116 UPDATE_STATE(parser->state); \
117 \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120 return (ER); \
121 } \
122 } \
123 FOR##_mark = NULL; \
124 } \
125 } while (0)
126
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR) \
137 do { \
138 if (!FOR##_mark) { \
139 FOR##_mark = p; \
140 } \
141 } while (0)
142
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed max_header_size. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
147 *
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. max_header_size is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
153 */
154 #define COUNT_HEADER_SIZE(V) \
155 do { \
156 nread += (uint32_t)(V); \
157 if (UNLIKELY(nread > max_header_size)) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
159 goto error; \
160 } \
161 } while (0)
162
163
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172
173
174 static const char *method_strings[] =
175 {
176 #define XX(num, name, string) #string,
177 HTTP_METHOD_MAP(XX)
178 #undef XX
179 };
180
181
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
188 */
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
223
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233 };
234
235
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241
242
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
277 #undef T
278
279 enum state
280 { s_dead = 1 /* important that this is > 0 */
281
282 , s_start_req_or_res
283 , s_res_or_resp_H
284 , s_start_res
285 , s_res_H
286 , s_res_HT
287 , s_res_HTT
288 , s_res_HTTP
289 , s_res_http_major
290 , s_res_http_dot
291 , s_res_http_minor
292 , s_res_http_end
293 , s_res_first_status_code
294 , s_res_status_code
295 , s_res_status_start
296 , s_res_status
297 , s_res_line_almost_done
298
299 , s_start_req
300
301 , s_req_method
302 , s_req_spaces_before_url
303 , s_req_schema
304 , s_req_schema_slash
305 , s_req_schema_slash_slash
306 , s_req_server_start
307 , s_req_server
308 , s_req_server_with_at
309 , s_req_path
310 , s_req_query_string_start
311 , s_req_query_string
312 , s_req_fragment_start
313 , s_req_fragment
314 , s_req_http_start
315 , s_req_http_H
316 , s_req_http_HT
317 , s_req_http_HTT
318 , s_req_http_HTTP
319 , s_req_http_I
320 , s_req_http_IC
321 , s_req_http_major
322 , s_req_http_dot
323 , s_req_http_minor
324 , s_req_http_end
325 , s_req_line_almost_done
326
327 , s_header_field_start
328 , s_header_field
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
333 , s_header_value
334 , s_header_value_lws
335
336 , s_header_almost_done
337
338 , s_chunk_size_start
339 , s_chunk_size
340 , s_chunk_parameters
341 , s_chunk_size_almost_done
342
343 , s_headers_almost_done
344 , s_headers_done
345
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
349 */
350
351 , s_chunk_data
352 , s_chunk_data_almost_done
353 , s_chunk_data_done
354
355 , s_body_identity
356 , s_body_identity_eof
357
358 , s_message_done
359 };
360
361
362 #define PARSING_HEADER(state) (state <= s_headers_done)
363
364
365 enum header_states
366 { h_general = 0
367 , h_C
368 , h_CO
369 , h_CON
370
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
375 , h_matching_upgrade
376
377 , h_connection
378 , h_content_length
379 , h_content_length_num
380 , h_content_length_ws
381 , h_transfer_encoding
382 , h_upgrade
383
384 , h_matching_transfer_encoding_token_start
385 , h_matching_transfer_encoding_chunked
386 , h_matching_transfer_encoding_token
387
388 , h_matching_connection_token_start
389 , h_matching_connection_keep_alive
390 , h_matching_connection_close
391 , h_matching_connection_upgrade
392 , h_matching_connection_token
393
394 , h_transfer_encoding_chunked
395 , h_connection_keep_alive
396 , h_connection_close
397 , h_connection_upgrade
398 };
399
400 enum http_host_state
401 {
402 s_http_host_dead = 1
403 , s_http_userinfo_start
404 , s_http_userinfo
405 , s_http_host_start
406 , s_http_host_v6_start
407 , s_http_host
408 , s_http_host_v6
409 , s_http_host_v6_end
410 , s_http_host_v6_zone_start
411 , s_http_host_v6_zone
412 , s_http_host_port_start
413 , s_http_host_port
414 };
415
416 /* Macros for character classes; depends on strict-mode */
417 #define CR '\r'
418 #define LF '\n'
419 #define LOWER(c) (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
425 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426 (c) == ')')
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429 (c) == '$' || (c) == ',')
430
431 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c) STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437 #else
438 #define TOKEN(c) tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c) \
440 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c) \
442 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443 #endif
444
445 /**
446 * Verify that a char is a valid visible (printable) US-ASCII
447 * character or %x80-FF
448 **/
449 #define IS_HEADER_CHAR(ch) \
450 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453
454
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond) \
457 do { \
458 if (cond) { \
459 SET_ERRNO(HPE_STRICT); \
460 goto error; \
461 } \
462 } while (0)
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464 #else
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
467 #endif
468
469
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472 static struct {
473 const char *name;
474 const char *description;
475 } http_strerror_tab[] = {
476 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477 };
478 #undef HTTP_STRERROR_GEN
479
480 int http_message_needs_eof(const http_parser *parser);
481
482 /* Our URL parser.
483 *
484 * This is designed to be shared by http_parser_execute() for URL validation,
485 * hence it has a state transition + byte-for-byte interface. In addition, it
486 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487 * work of turning state transitions URL components for its API.
488 *
489 * This function should only be invoked with non-space characters. It is
490 * assumed that the caller cares about (and can detect) the transition between
491 * URL and non-URL states by looking for these.
492 */
493 static enum state
parse_url_char(enum state s,const char ch)494 parse_url_char(enum state s, const char ch)
495 {
496 if (ch == ' ' || ch == '\r' || ch == '\n') {
497 return s_dead;
498 }
499
500 #if HTTP_PARSER_STRICT
501 if (ch == '\t' || ch == '\f') {
502 return s_dead;
503 }
504 #endif
505
506 switch (s) {
507 case s_req_spaces_before_url:
508 /* Proxied requests are followed by scheme of an absolute URI (alpha).
509 * All methods except CONNECT are followed by '/' or '*'.
510 */
511
512 if (ch == '/' || ch == '*') {
513 return s_req_path;
514 }
515
516 if (IS_ALPHA(ch)) {
517 return s_req_schema;
518 }
519
520 break;
521
522 case s_req_schema:
523 if (IS_ALPHA(ch)) {
524 return s;
525 }
526
527 if (ch == ':') {
528 return s_req_schema_slash;
529 }
530
531 break;
532
533 case s_req_schema_slash:
534 if (ch == '/') {
535 return s_req_schema_slash_slash;
536 }
537
538 break;
539
540 case s_req_schema_slash_slash:
541 if (ch == '/') {
542 return s_req_server_start;
543 }
544
545 break;
546
547 case s_req_server_with_at:
548 if (ch == '@') {
549 return s_dead;
550 }
551
552 /* fall through */
553 case s_req_server_start:
554 case s_req_server:
555 if (ch == '/') {
556 return s_req_path;
557 }
558
559 if (ch == '?') {
560 return s_req_query_string_start;
561 }
562
563 if (ch == '@') {
564 return s_req_server_with_at;
565 }
566
567 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568 return s_req_server;
569 }
570
571 break;
572
573 case s_req_path:
574 if (IS_URL_CHAR(ch)) {
575 return s;
576 }
577
578 switch (ch) {
579 case '?':
580 return s_req_query_string_start;
581
582 case '#':
583 return s_req_fragment_start;
584 }
585
586 break;
587
588 case s_req_query_string_start:
589 case s_req_query_string:
590 if (IS_URL_CHAR(ch)) {
591 return s_req_query_string;
592 }
593
594 switch (ch) {
595 case '?':
596 /* allow extra '?' in query string */
597 return s_req_query_string;
598
599 case '#':
600 return s_req_fragment_start;
601 }
602
603 break;
604
605 case s_req_fragment_start:
606 if (IS_URL_CHAR(ch)) {
607 return s_req_fragment;
608 }
609
610 switch (ch) {
611 case '?':
612 return s_req_fragment;
613
614 case '#':
615 return s;
616 }
617
618 break;
619
620 case s_req_fragment:
621 if (IS_URL_CHAR(ch)) {
622 return s;
623 }
624
625 switch (ch) {
626 case '?':
627 case '#':
628 return s;
629 }
630
631 break;
632
633 default:
634 break;
635 }
636
637 /* We should never fall out of the switch above unless there's an error */
638 return s_dead;
639 }
640
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)641 size_t http_parser_execute (http_parser *parser,
642 const http_parser_settings *settings,
643 const char *data,
644 size_t len)
645 {
646 char c, ch;
647 int8_t unhex_val;
648 const char *p = data;
649 const char *header_field_mark = 0;
650 const char *header_value_mark = 0;
651 const char *url_mark = 0;
652 const char *body_mark = 0;
653 const char *status_mark = 0;
654 enum state p_state = (enum state) parser->state;
655 const unsigned int lenient = parser->lenient_http_headers;
656 const unsigned int allow_chunked_length = parser->allow_chunked_length;
657
658 uint32_t nread = parser->nread;
659
660 /* We're in an error state. Don't bother doing anything. */
661 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
662 return 0;
663 }
664
665 if (len == 0) {
666 switch (CURRENT_STATE()) {
667 case s_body_identity_eof:
668 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669 * we got paused.
670 */
671 CALLBACK_NOTIFY_NOADVANCE(message_complete);
672 return 0;
673
674 case s_dead:
675 case s_start_req_or_res:
676 case s_start_res:
677 case s_start_req:
678 return 0;
679
680 default:
681 SET_ERRNO(HPE_INVALID_EOF_STATE);
682 return 1;
683 }
684 }
685
686
687 if (CURRENT_STATE() == s_header_field)
688 header_field_mark = data;
689 if (CURRENT_STATE() == s_header_value)
690 header_value_mark = data;
691 switch (CURRENT_STATE()) {
692 case s_req_path:
693 case s_req_schema:
694 case s_req_schema_slash:
695 case s_req_schema_slash_slash:
696 case s_req_server_start:
697 case s_req_server:
698 case s_req_server_with_at:
699 case s_req_query_string_start:
700 case s_req_query_string:
701 case s_req_fragment_start:
702 case s_req_fragment:
703 url_mark = data;
704 break;
705 case s_res_status:
706 status_mark = data;
707 break;
708 default:
709 break;
710 }
711
712 for (p=data; p != data + len; p++) {
713 ch = *p;
714
715 if (PARSING_HEADER(CURRENT_STATE()))
716 COUNT_HEADER_SIZE(1);
717
718 reexecute:
719 switch (CURRENT_STATE()) {
720
721 case s_dead:
722 /* this state is used after a 'Connection: close' message
723 * the parser will error out if it reads another message
724 */
725 if (LIKELY(ch == CR || ch == LF))
726 break;
727
728 SET_ERRNO(HPE_CLOSED_CONNECTION);
729 goto error;
730
731 case s_start_req_or_res:
732 {
733 if (ch == CR || ch == LF)
734 break;
735 parser->flags = 0;
736 parser->uses_transfer_encoding = 0;
737 parser->content_length = ULLONG_MAX;
738
739 if (ch == 'H') {
740 UPDATE_STATE(s_res_or_resp_H);
741
742 CALLBACK_NOTIFY(message_begin);
743 } else {
744 parser->type = HTTP_REQUEST;
745 UPDATE_STATE(s_start_req);
746 REEXECUTE();
747 }
748
749 break;
750 }
751
752 case s_res_or_resp_H:
753 if (ch == 'T') {
754 parser->type = HTTP_RESPONSE;
755 UPDATE_STATE(s_res_HT);
756 } else {
757 if (UNLIKELY(ch != 'E')) {
758 SET_ERRNO(HPE_INVALID_CONSTANT);
759 goto error;
760 }
761
762 parser->type = HTTP_REQUEST;
763 parser->method = HTTP_HEAD;
764 parser->index = 2;
765 UPDATE_STATE(s_req_method);
766 }
767 break;
768
769 case s_start_res:
770 {
771 if (ch == CR || ch == LF)
772 break;
773 parser->flags = 0;
774 parser->uses_transfer_encoding = 0;
775 parser->content_length = ULLONG_MAX;
776
777 if (ch == 'H') {
778 UPDATE_STATE(s_res_H);
779 } else {
780 SET_ERRNO(HPE_INVALID_CONSTANT);
781 goto error;
782 }
783
784 CALLBACK_NOTIFY(message_begin);
785 break;
786 }
787
788 case s_res_H:
789 STRICT_CHECK(ch != 'T');
790 UPDATE_STATE(s_res_HT);
791 break;
792
793 case s_res_HT:
794 STRICT_CHECK(ch != 'T');
795 UPDATE_STATE(s_res_HTT);
796 break;
797
798 case s_res_HTT:
799 STRICT_CHECK(ch != 'P');
800 UPDATE_STATE(s_res_HTTP);
801 break;
802
803 case s_res_HTTP:
804 STRICT_CHECK(ch != '/');
805 UPDATE_STATE(s_res_http_major);
806 break;
807
808 case s_res_http_major:
809 if (UNLIKELY(!IS_NUM(ch))) {
810 SET_ERRNO(HPE_INVALID_VERSION);
811 goto error;
812 }
813
814 parser->http_major = ch - '0';
815 UPDATE_STATE(s_res_http_dot);
816 break;
817
818 case s_res_http_dot:
819 {
820 if (UNLIKELY(ch != '.')) {
821 SET_ERRNO(HPE_INVALID_VERSION);
822 goto error;
823 }
824
825 UPDATE_STATE(s_res_http_minor);
826 break;
827 }
828
829 case s_res_http_minor:
830 if (UNLIKELY(!IS_NUM(ch))) {
831 SET_ERRNO(HPE_INVALID_VERSION);
832 goto error;
833 }
834
835 parser->http_minor = ch - '0';
836 UPDATE_STATE(s_res_http_end);
837 break;
838
839 case s_res_http_end:
840 {
841 if (UNLIKELY(ch != ' ')) {
842 SET_ERRNO(HPE_INVALID_VERSION);
843 goto error;
844 }
845
846 UPDATE_STATE(s_res_first_status_code);
847 break;
848 }
849
850 case s_res_first_status_code:
851 {
852 if (!IS_NUM(ch)) {
853 if (ch == ' ') {
854 break;
855 }
856
857 SET_ERRNO(HPE_INVALID_STATUS);
858 goto error;
859 }
860 parser->status_code = ch - '0';
861 UPDATE_STATE(s_res_status_code);
862 break;
863 }
864
865 case s_res_status_code:
866 {
867 if (!IS_NUM(ch)) {
868 switch (ch) {
869 case ' ':
870 UPDATE_STATE(s_res_status_start);
871 break;
872 case CR:
873 case LF:
874 UPDATE_STATE(s_res_status_start);
875 REEXECUTE();
876 break;
877 default:
878 SET_ERRNO(HPE_INVALID_STATUS);
879 goto error;
880 }
881 break;
882 }
883
884 parser->status_code *= 10;
885 parser->status_code += ch - '0';
886
887 if (UNLIKELY(parser->status_code > 999)) {
888 SET_ERRNO(HPE_INVALID_STATUS);
889 goto error;
890 }
891
892 break;
893 }
894
895 case s_res_status_start:
896 {
897 MARK(status);
898 UPDATE_STATE(s_res_status);
899 parser->index = 0;
900
901 if (ch == CR || ch == LF)
902 REEXECUTE();
903
904 break;
905 }
906
907 case s_res_status:
908 if (ch == CR) {
909 UPDATE_STATE(s_res_line_almost_done);
910 CALLBACK_DATA(status);
911 break;
912 }
913
914 if (ch == LF) {
915 UPDATE_STATE(s_header_field_start);
916 CALLBACK_DATA(status);
917 break;
918 }
919
920 break;
921
922 case s_res_line_almost_done:
923 STRICT_CHECK(ch != LF);
924 UPDATE_STATE(s_header_field_start);
925 break;
926
927 case s_start_req:
928 {
929 if (ch == CR || ch == LF)
930 break;
931 parser->flags = 0;
932 parser->uses_transfer_encoding = 0;
933 parser->content_length = ULLONG_MAX;
934
935 if (UNLIKELY(!IS_ALPHA(ch))) {
936 SET_ERRNO(HPE_INVALID_METHOD);
937 goto error;
938 }
939
940 parser->method = (enum http_method) 0;
941 parser->index = 1;
942 switch (ch) {
943 case 'A': parser->method = HTTP_ACL; break;
944 case 'B': parser->method = HTTP_BIND; break;
945 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
946 case 'D': parser->method = HTTP_DELETE; break;
947 case 'G': parser->method = HTTP_GET; break;
948 case 'H': parser->method = HTTP_HEAD; break;
949 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
951 case 'N': parser->method = HTTP_NOTIFY; break;
952 case 'O': parser->method = HTTP_OPTIONS; break;
953 case 'P': parser->method = HTTP_POST;
954 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
955 break;
956 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
958 case 'T': parser->method = HTTP_TRACE; break;
959 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
960 default:
961 SET_ERRNO(HPE_INVALID_METHOD);
962 goto error;
963 }
964 UPDATE_STATE(s_req_method);
965
966 CALLBACK_NOTIFY(message_begin);
967
968 break;
969 }
970
971 case s_req_method:
972 {
973 const char *matcher;
974 if (UNLIKELY(ch == '\0')) {
975 SET_ERRNO(HPE_INVALID_METHOD);
976 goto error;
977 }
978
979 matcher = method_strings[parser->method];
980 if (ch == ' ' && matcher[parser->index] == '\0') {
981 UPDATE_STATE(s_req_spaces_before_url);
982 } else if (ch == matcher[parser->index]) {
983 ; /* nada */
984 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985
986 switch (parser->method << 16 | parser->index << 8 | ch) {
987 #define XX(meth, pos, ch, new_meth) \
988 case (HTTP_##meth << 16 | pos << 8 | ch): \
989 parser->method = HTTP_##new_meth; break;
990
991 XX(POST, 1, 'U', PUT)
992 XX(POST, 1, 'A', PATCH)
993 XX(POST, 1, 'R', PROPFIND)
994 XX(PUT, 2, 'R', PURGE)
995 XX(CONNECT, 1, 'H', CHECKOUT)
996 XX(CONNECT, 2, 'P', COPY)
997 XX(MKCOL, 1, 'O', MOVE)
998 XX(MKCOL, 1, 'E', MERGE)
999 XX(MKCOL, 1, '-', MSEARCH)
1000 XX(MKCOL, 2, 'A', MKACTIVITY)
1001 XX(MKCOL, 3, 'A', MKCALENDAR)
1002 XX(SUBSCRIBE, 1, 'E', SEARCH)
1003 XX(SUBSCRIBE, 1, 'O', SOURCE)
1004 XX(REPORT, 2, 'B', REBIND)
1005 XX(PROPFIND, 4, 'P', PROPPATCH)
1006 XX(LOCK, 1, 'I', LINK)
1007 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1008 XX(UNLOCK, 2, 'B', UNBIND)
1009 XX(UNLOCK, 3, 'I', UNLINK)
1010 #undef XX
1011 default:
1012 SET_ERRNO(HPE_INVALID_METHOD);
1013 goto error;
1014 }
1015 } else {
1016 SET_ERRNO(HPE_INVALID_METHOD);
1017 goto error;
1018 }
1019
1020 ++parser->index;
1021 break;
1022 }
1023
1024 case s_req_spaces_before_url:
1025 {
1026 if (ch == ' ') break;
1027
1028 MARK(url);
1029 if (parser->method == HTTP_CONNECT) {
1030 UPDATE_STATE(s_req_server_start);
1031 }
1032
1033 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1035 SET_ERRNO(HPE_INVALID_URL);
1036 goto error;
1037 }
1038
1039 break;
1040 }
1041
1042 case s_req_schema:
1043 case s_req_schema_slash:
1044 case s_req_schema_slash_slash:
1045 case s_req_server_start:
1046 {
1047 switch (ch) {
1048 /* No whitespace allowed here */
1049 case ' ':
1050 case CR:
1051 case LF:
1052 SET_ERRNO(HPE_INVALID_URL);
1053 goto error;
1054 default:
1055 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1057 SET_ERRNO(HPE_INVALID_URL);
1058 goto error;
1059 }
1060 }
1061
1062 break;
1063 }
1064
1065 case s_req_server:
1066 case s_req_server_with_at:
1067 case s_req_path:
1068 case s_req_query_string_start:
1069 case s_req_query_string:
1070 case s_req_fragment_start:
1071 case s_req_fragment:
1072 {
1073 switch (ch) {
1074 case ' ':
1075 UPDATE_STATE(s_req_http_start);
1076 CALLBACK_DATA(url);
1077 break;
1078 case CR:
1079 case LF:
1080 parser->http_major = 0;
1081 parser->http_minor = 9;
1082 UPDATE_STATE((ch == CR) ?
1083 s_req_line_almost_done :
1084 s_header_field_start);
1085 CALLBACK_DATA(url);
1086 break;
1087 default:
1088 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1090 SET_ERRNO(HPE_INVALID_URL);
1091 goto error;
1092 }
1093 }
1094 break;
1095 }
1096
1097 case s_req_http_start:
1098 switch (ch) {
1099 case ' ':
1100 break;
1101 case 'H':
1102 UPDATE_STATE(s_req_http_H);
1103 break;
1104 case 'I':
1105 if (parser->method == HTTP_SOURCE) {
1106 UPDATE_STATE(s_req_http_I);
1107 break;
1108 }
1109 /* fall through */
1110 default:
1111 SET_ERRNO(HPE_INVALID_CONSTANT);
1112 goto error;
1113 }
1114 break;
1115
1116 case s_req_http_H:
1117 STRICT_CHECK(ch != 'T');
1118 UPDATE_STATE(s_req_http_HT);
1119 break;
1120
1121 case s_req_http_HT:
1122 STRICT_CHECK(ch != 'T');
1123 UPDATE_STATE(s_req_http_HTT);
1124 break;
1125
1126 case s_req_http_HTT:
1127 STRICT_CHECK(ch != 'P');
1128 UPDATE_STATE(s_req_http_HTTP);
1129 break;
1130
1131 case s_req_http_I:
1132 STRICT_CHECK(ch != 'C');
1133 UPDATE_STATE(s_req_http_IC);
1134 break;
1135
1136 case s_req_http_IC:
1137 STRICT_CHECK(ch != 'E');
1138 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1139 break;
1140
1141 case s_req_http_HTTP:
1142 STRICT_CHECK(ch != '/');
1143 UPDATE_STATE(s_req_http_major);
1144 break;
1145
1146 case s_req_http_major:
1147 if (UNLIKELY(!IS_NUM(ch))) {
1148 SET_ERRNO(HPE_INVALID_VERSION);
1149 goto error;
1150 }
1151
1152 parser->http_major = ch - '0';
1153 UPDATE_STATE(s_req_http_dot);
1154 break;
1155
1156 case s_req_http_dot:
1157 {
1158 if (UNLIKELY(ch != '.')) {
1159 SET_ERRNO(HPE_INVALID_VERSION);
1160 goto error;
1161 }
1162
1163 UPDATE_STATE(s_req_http_minor);
1164 break;
1165 }
1166
1167 case s_req_http_minor:
1168 if (UNLIKELY(!IS_NUM(ch))) {
1169 SET_ERRNO(HPE_INVALID_VERSION);
1170 goto error;
1171 }
1172
1173 parser->http_minor = ch - '0';
1174 UPDATE_STATE(s_req_http_end);
1175 break;
1176
1177 case s_req_http_end:
1178 {
1179 if (ch == CR) {
1180 UPDATE_STATE(s_req_line_almost_done);
1181 break;
1182 }
1183
1184 if (ch == LF) {
1185 UPDATE_STATE(s_header_field_start);
1186 break;
1187 }
1188
1189 SET_ERRNO(HPE_INVALID_VERSION);
1190 goto error;
1191 break;
1192 }
1193
1194 /* end of request line */
1195 case s_req_line_almost_done:
1196 {
1197 if (UNLIKELY(ch != LF)) {
1198 SET_ERRNO(HPE_LF_EXPECTED);
1199 goto error;
1200 }
1201
1202 UPDATE_STATE(s_header_field_start);
1203 break;
1204 }
1205
1206 case s_header_field_start:
1207 {
1208 if (ch == CR) {
1209 UPDATE_STATE(s_headers_almost_done);
1210 break;
1211 }
1212
1213 if (ch == LF) {
1214 /* they might be just sending \n instead of \r\n so this would be
1215 * the second \n to denote the end of headers*/
1216 UPDATE_STATE(s_headers_almost_done);
1217 REEXECUTE();
1218 }
1219
1220 c = TOKEN(ch);
1221
1222 if (UNLIKELY(!c)) {
1223 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1224 goto error;
1225 }
1226
1227 MARK(header_field);
1228
1229 parser->index = 0;
1230 UPDATE_STATE(s_header_field);
1231
1232 switch (c) {
1233 case 'c':
1234 parser->header_state = h_C;
1235 break;
1236
1237 case 'p':
1238 parser->header_state = h_matching_proxy_connection;
1239 break;
1240
1241 case 't':
1242 parser->header_state = h_matching_transfer_encoding;
1243 break;
1244
1245 case 'u':
1246 parser->header_state = h_matching_upgrade;
1247 break;
1248
1249 default:
1250 parser->header_state = h_general;
1251 break;
1252 }
1253 break;
1254 }
1255
1256 case s_header_field:
1257 {
1258 const char* start = p;
1259 for (; p != data + len; p++) {
1260 ch = *p;
1261 c = TOKEN(ch);
1262
1263 if (!c)
1264 break;
1265
1266 switch (parser->header_state) {
1267 case h_general: {
1268 size_t left = data + len - p;
1269 const char* pe = p + MIN(left, max_header_size);
1270 while (p+1 < pe && TOKEN(p[1])) {
1271 p++;
1272 }
1273 break;
1274 }
1275
1276 case h_C:
1277 parser->index++;
1278 parser->header_state = (c == 'o' ? h_CO : h_general);
1279 break;
1280
1281 case h_CO:
1282 parser->index++;
1283 parser->header_state = (c == 'n' ? h_CON : h_general);
1284 break;
1285
1286 case h_CON:
1287 parser->index++;
1288 switch (c) {
1289 case 'n':
1290 parser->header_state = h_matching_connection;
1291 break;
1292 case 't':
1293 parser->header_state = h_matching_content_length;
1294 break;
1295 default:
1296 parser->header_state = h_general;
1297 break;
1298 }
1299 break;
1300
1301 /* connection */
1302
1303 case h_matching_connection:
1304 parser->index++;
1305 if (parser->index > sizeof(CONNECTION)-1
1306 || c != CONNECTION[parser->index]) {
1307 parser->header_state = h_general;
1308 } else if (parser->index == sizeof(CONNECTION)-2) {
1309 parser->header_state = h_connection;
1310 }
1311 break;
1312
1313 /* proxy-connection */
1314
1315 case h_matching_proxy_connection:
1316 parser->index++;
1317 if (parser->index > sizeof(PROXY_CONNECTION)-1
1318 || c != PROXY_CONNECTION[parser->index]) {
1319 parser->header_state = h_general;
1320 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1321 parser->header_state = h_connection;
1322 }
1323 break;
1324
1325 /* content-length */
1326
1327 case h_matching_content_length:
1328 parser->index++;
1329 if (parser->index > sizeof(CONTENT_LENGTH)-1
1330 || c != CONTENT_LENGTH[parser->index]) {
1331 parser->header_state = h_general;
1332 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1333 parser->header_state = h_content_length;
1334 }
1335 break;
1336
1337 /* transfer-encoding */
1338
1339 case h_matching_transfer_encoding:
1340 parser->index++;
1341 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1342 || c != TRANSFER_ENCODING[parser->index]) {
1343 parser->header_state = h_general;
1344 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1345 parser->header_state = h_transfer_encoding;
1346 parser->uses_transfer_encoding = 1;
1347
1348 /* Multiple `Transfer-Encoding` headers should be treated as
1349 * one, but with values separate by a comma.
1350 *
1351 * See: https://tools.ietf.org/html/rfc7230#section-3.2.2
1352 */
1353 parser->flags &= ~F_CHUNKED;
1354 }
1355 break;
1356
1357 /* upgrade */
1358
1359 case h_matching_upgrade:
1360 parser->index++;
1361 if (parser->index > sizeof(UPGRADE)-1
1362 || c != UPGRADE[parser->index]) {
1363 parser->header_state = h_general;
1364 } else if (parser->index == sizeof(UPGRADE)-2) {
1365 parser->header_state = h_upgrade;
1366 }
1367 break;
1368
1369 case h_connection:
1370 case h_content_length:
1371 case h_transfer_encoding:
1372 case h_upgrade:
1373 if (ch != ' ') parser->header_state = h_general;
1374 break;
1375
1376 default:
1377 assert(0 && "Unknown header_state");
1378 break;
1379 }
1380 }
1381
1382 if (p == data + len) {
1383 --p;
1384 COUNT_HEADER_SIZE(p - start);
1385 break;
1386 }
1387
1388 COUNT_HEADER_SIZE(p - start);
1389
1390 if (ch == ':') {
1391 UPDATE_STATE(s_header_value_discard_ws);
1392 CALLBACK_DATA(header_field);
1393 break;
1394 }
1395
1396 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1397 goto error;
1398 }
1399
1400 case s_header_value_discard_ws:
1401 if (ch == ' ' || ch == '\t') break;
1402
1403 if (ch == CR) {
1404 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1405 break;
1406 }
1407
1408 if (ch == LF) {
1409 UPDATE_STATE(s_header_value_discard_lws);
1410 break;
1411 }
1412
1413 /* fall through */
1414
1415 case s_header_value_start:
1416 {
1417 MARK(header_value);
1418
1419 UPDATE_STATE(s_header_value);
1420 parser->index = 0;
1421
1422 c = LOWER(ch);
1423
1424 switch (parser->header_state) {
1425 case h_upgrade:
1426 parser->flags |= F_UPGRADE;
1427 parser->header_state = h_general;
1428 break;
1429
1430 case h_transfer_encoding:
1431 /* looking for 'Transfer-Encoding: chunked' */
1432 if ('c' == c) {
1433 parser->header_state = h_matching_transfer_encoding_chunked;
1434 } else {
1435 parser->header_state = h_matching_transfer_encoding_token;
1436 }
1437 break;
1438
1439 /* Multi-value `Transfer-Encoding` header */
1440 case h_matching_transfer_encoding_token_start:
1441 break;
1442
1443 case h_content_length:
1444 if (UNLIKELY(!IS_NUM(ch))) {
1445 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446 goto error;
1447 }
1448
1449 if (parser->flags & F_CONTENTLENGTH) {
1450 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1451 goto error;
1452 }
1453
1454 parser->flags |= F_CONTENTLENGTH;
1455 parser->content_length = ch - '0';
1456 parser->header_state = h_content_length_num;
1457 break;
1458
1459 /* when obsolete line folding is encountered for content length
1460 * continue to the s_header_value state */
1461 case h_content_length_ws:
1462 break;
1463
1464 case h_connection:
1465 /* looking for 'Connection: keep-alive' */
1466 if (c == 'k') {
1467 parser->header_state = h_matching_connection_keep_alive;
1468 /* looking for 'Connection: close' */
1469 } else if (c == 'c') {
1470 parser->header_state = h_matching_connection_close;
1471 } else if (c == 'u') {
1472 parser->header_state = h_matching_connection_upgrade;
1473 } else {
1474 parser->header_state = h_matching_connection_token;
1475 }
1476 break;
1477
1478 /* Multi-value `Connection` header */
1479 case h_matching_connection_token_start:
1480 break;
1481
1482 default:
1483 parser->header_state = h_general;
1484 break;
1485 }
1486 break;
1487 }
1488
1489 case s_header_value:
1490 {
1491 const char* start = p;
1492 enum header_states h_state = (enum header_states) parser->header_state;
1493 for (; p != data + len; p++) {
1494 ch = *p;
1495 if (ch == CR) {
1496 UPDATE_STATE(s_header_almost_done);
1497 parser->header_state = h_state;
1498 CALLBACK_DATA(header_value);
1499 break;
1500 }
1501
1502 if (ch == LF) {
1503 UPDATE_STATE(s_header_almost_done);
1504 COUNT_HEADER_SIZE(p - start);
1505 parser->header_state = h_state;
1506 CALLBACK_DATA_NOADVANCE(header_value);
1507 REEXECUTE();
1508 }
1509
1510 if (!lenient && !IS_HEADER_CHAR(ch)) {
1511 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1512 goto error;
1513 }
1514
1515 c = LOWER(ch);
1516
1517 switch (h_state) {
1518 case h_general:
1519 {
1520 size_t left = data + len - p;
1521 const char* pe = p + MIN(left, max_header_size);
1522
1523 for (; p != pe; p++) {
1524 ch = *p;
1525 if (ch == CR || ch == LF) {
1526 --p;
1527 break;
1528 }
1529 if (!lenient && !IS_HEADER_CHAR(ch)) {
1530 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1531 goto error;
1532 }
1533 }
1534 if (p == data + len)
1535 --p;
1536 break;
1537 }
1538
1539 case h_connection:
1540 case h_transfer_encoding:
1541 assert(0 && "Shouldn't get here.");
1542 break;
1543
1544 case h_content_length:
1545 if (ch == ' ') break;
1546 h_state = h_content_length_num;
1547 /* fall through */
1548
1549 case h_content_length_num:
1550 {
1551 uint64_t t;
1552
1553 if (ch == ' ') {
1554 h_state = h_content_length_ws;
1555 break;
1556 }
1557
1558 if (UNLIKELY(!IS_NUM(ch))) {
1559 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1560 parser->header_state = h_state;
1561 goto error;
1562 }
1563
1564 t = parser->content_length;
1565 t *= 10;
1566 t += ch - '0';
1567
1568 /* Overflow? Test against a conservative limit for simplicity. */
1569 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1570 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1571 parser->header_state = h_state;
1572 goto error;
1573 }
1574
1575 parser->content_length = t;
1576 break;
1577 }
1578
1579 case h_content_length_ws:
1580 if (ch == ' ') break;
1581 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1582 parser->header_state = h_state;
1583 goto error;
1584
1585 /* Transfer-Encoding: chunked */
1586 case h_matching_transfer_encoding_token_start:
1587 /* looking for 'Transfer-Encoding: chunked' */
1588 if ('c' == c) {
1589 h_state = h_matching_transfer_encoding_chunked;
1590 } else if (STRICT_TOKEN(c)) {
1591 /* TODO(indutny): similar code below does this, but why?
1592 * At the very least it seems to be inconsistent given that
1593 * h_matching_transfer_encoding_token does not check for
1594 * `STRICT_TOKEN`
1595 */
1596 h_state = h_matching_transfer_encoding_token;
1597 } else if (c == ' ' || c == '\t') {
1598 /* Skip lws */
1599 } else {
1600 h_state = h_general;
1601 }
1602 break;
1603
1604 case h_matching_transfer_encoding_chunked:
1605 parser->index++;
1606 if (parser->index > sizeof(CHUNKED)-1
1607 || c != CHUNKED[parser->index]) {
1608 h_state = h_matching_transfer_encoding_token;
1609 } else if (parser->index == sizeof(CHUNKED)-2) {
1610 h_state = h_transfer_encoding_chunked;
1611 }
1612 break;
1613
1614 case h_matching_transfer_encoding_token:
1615 if (ch == ',') {
1616 h_state = h_matching_transfer_encoding_token_start;
1617 parser->index = 0;
1618 }
1619 break;
1620
1621 case h_matching_connection_token_start:
1622 /* looking for 'Connection: keep-alive' */
1623 if (c == 'k') {
1624 h_state = h_matching_connection_keep_alive;
1625 /* looking for 'Connection: close' */
1626 } else if (c == 'c') {
1627 h_state = h_matching_connection_close;
1628 } else if (c == 'u') {
1629 h_state = h_matching_connection_upgrade;
1630 } else if (STRICT_TOKEN(c)) {
1631 h_state = h_matching_connection_token;
1632 } else if (c == ' ' || c == '\t') {
1633 /* Skip lws */
1634 } else {
1635 h_state = h_general;
1636 }
1637 break;
1638
1639 /* looking for 'Connection: keep-alive' */
1640 case h_matching_connection_keep_alive:
1641 parser->index++;
1642 if (parser->index > sizeof(KEEP_ALIVE)-1
1643 || c != KEEP_ALIVE[parser->index]) {
1644 h_state = h_matching_connection_token;
1645 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1646 h_state = h_connection_keep_alive;
1647 }
1648 break;
1649
1650 /* looking for 'Connection: close' */
1651 case h_matching_connection_close:
1652 parser->index++;
1653 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1654 h_state = h_matching_connection_token;
1655 } else if (parser->index == sizeof(CLOSE)-2) {
1656 h_state = h_connection_close;
1657 }
1658 break;
1659
1660 /* looking for 'Connection: upgrade' */
1661 case h_matching_connection_upgrade:
1662 parser->index++;
1663 if (parser->index > sizeof(UPGRADE) - 1 ||
1664 c != UPGRADE[parser->index]) {
1665 h_state = h_matching_connection_token;
1666 } else if (parser->index == sizeof(UPGRADE)-2) {
1667 h_state = h_connection_upgrade;
1668 }
1669 break;
1670
1671 case h_matching_connection_token:
1672 if (ch == ',') {
1673 h_state = h_matching_connection_token_start;
1674 parser->index = 0;
1675 }
1676 break;
1677
1678 case h_transfer_encoding_chunked:
1679 if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1680 break;
1681
1682 case h_connection_keep_alive:
1683 case h_connection_close:
1684 case h_connection_upgrade:
1685 if (ch == ',') {
1686 if (h_state == h_connection_keep_alive) {
1687 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1688 } else if (h_state == h_connection_close) {
1689 parser->flags |= F_CONNECTION_CLOSE;
1690 } else if (h_state == h_connection_upgrade) {
1691 parser->flags |= F_CONNECTION_UPGRADE;
1692 }
1693 h_state = h_matching_connection_token_start;
1694 parser->index = 0;
1695 } else if (ch != ' ') {
1696 h_state = h_matching_connection_token;
1697 }
1698 break;
1699
1700 default:
1701 UPDATE_STATE(s_header_value);
1702 h_state = h_general;
1703 break;
1704 }
1705 }
1706 parser->header_state = h_state;
1707
1708 if (p == data + len)
1709 --p;
1710
1711 COUNT_HEADER_SIZE(p - start);
1712 break;
1713 }
1714
1715 case s_header_almost_done:
1716 {
1717 if (UNLIKELY(ch != LF)) {
1718 SET_ERRNO(HPE_LF_EXPECTED);
1719 goto error;
1720 }
1721
1722 UPDATE_STATE(s_header_value_lws);
1723 break;
1724 }
1725
1726 case s_header_value_lws:
1727 {
1728 if (ch == ' ' || ch == '\t') {
1729 if (parser->header_state == h_content_length_num) {
1730 /* treat obsolete line folding as space */
1731 parser->header_state = h_content_length_ws;
1732 }
1733 UPDATE_STATE(s_header_value_start);
1734 REEXECUTE();
1735 }
1736
1737 /* finished the header */
1738 switch (parser->header_state) {
1739 case h_connection_keep_alive:
1740 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1741 break;
1742 case h_connection_close:
1743 parser->flags |= F_CONNECTION_CLOSE;
1744 break;
1745 case h_transfer_encoding_chunked:
1746 parser->flags |= F_CHUNKED;
1747 break;
1748 case h_connection_upgrade:
1749 parser->flags |= F_CONNECTION_UPGRADE;
1750 break;
1751 default:
1752 break;
1753 }
1754
1755 UPDATE_STATE(s_header_field_start);
1756 REEXECUTE();
1757 }
1758
1759 case s_header_value_discard_ws_almost_done:
1760 {
1761 STRICT_CHECK(ch != LF);
1762 UPDATE_STATE(s_header_value_discard_lws);
1763 break;
1764 }
1765
1766 case s_header_value_discard_lws:
1767 {
1768 if (ch == ' ' || ch == '\t') {
1769 UPDATE_STATE(s_header_value_discard_ws);
1770 break;
1771 } else {
1772 switch (parser->header_state) {
1773 case h_connection_keep_alive:
1774 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1775 break;
1776 case h_connection_close:
1777 parser->flags |= F_CONNECTION_CLOSE;
1778 break;
1779 case h_connection_upgrade:
1780 parser->flags |= F_CONNECTION_UPGRADE;
1781 break;
1782 case h_transfer_encoding_chunked:
1783 parser->flags |= F_CHUNKED;
1784 break;
1785 case h_content_length:
1786 /* do not allow empty content length */
1787 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788 goto error;
1789 break;
1790 default:
1791 break;
1792 }
1793
1794 /* header value was empty */
1795 MARK(header_value);
1796 UPDATE_STATE(s_header_field_start);
1797 CALLBACK_DATA_NOADVANCE(header_value);
1798 REEXECUTE();
1799 }
1800 }
1801
1802 case s_headers_almost_done:
1803 {
1804 STRICT_CHECK(ch != LF);
1805
1806 if (parser->flags & F_TRAILING) {
1807 /* End of a chunked request */
1808 UPDATE_STATE(s_message_done);
1809 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1810 REEXECUTE();
1811 }
1812
1813 /* Cannot use transfer-encoding and a content-length header together
1814 per the HTTP specification. (RFC 7230 Section 3.3.3) */
1815 if ((parser->uses_transfer_encoding == 1) &&
1816 (parser->flags & F_CONTENTLENGTH)) {
1817 /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1818 * not `chunked` or allow_length_with_encoding is set
1819 */
1820 if (parser->flags & F_CHUNKED) {
1821 if (!allow_chunked_length) {
1822 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1823 goto error;
1824 }
1825 } else if (!lenient) {
1826 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1827 goto error;
1828 }
1829 }
1830
1831 UPDATE_STATE(s_headers_done);
1832
1833 /* Set this here so that on_headers_complete() callbacks can see it */
1834 if ((parser->flags & F_UPGRADE) &&
1835 (parser->flags & F_CONNECTION_UPGRADE)) {
1836 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1837 * mandatory only when it is a 101 Switching Protocols response,
1838 * otherwise it is purely informational, to announce support.
1839 */
1840 parser->upgrade =
1841 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1842 } else {
1843 parser->upgrade = (parser->method == HTTP_CONNECT);
1844 }
1845
1846 /* Here we call the headers_complete callback. This is somewhat
1847 * different than other callbacks because if the user returns 1, we
1848 * will interpret that as saying that this message has no body. This
1849 * is needed for the annoying case of recieving a response to a HEAD
1850 * request.
1851 *
1852 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1853 * we have to simulate it by handling a change in errno below.
1854 */
1855 if (settings->on_headers_complete) {
1856 switch (settings->on_headers_complete(parser)) {
1857 case 0:
1858 break;
1859
1860 case 2:
1861 parser->upgrade = 1;
1862
1863 /* fall through */
1864 case 1:
1865 parser->flags |= F_SKIPBODY;
1866 break;
1867
1868 default:
1869 SET_ERRNO(HPE_CB_headers_complete);
1870 RETURN(p - data); /* Error */
1871 }
1872 }
1873
1874 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1875 RETURN(p - data);
1876 }
1877
1878 REEXECUTE();
1879 }
1880
1881 case s_headers_done:
1882 {
1883 int hasBody;
1884 STRICT_CHECK(ch != LF);
1885
1886 parser->nread = 0;
1887 nread = 0;
1888
1889 hasBody = parser->flags & F_CHUNKED ||
1890 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1891 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1892 (parser->flags & F_SKIPBODY) || !hasBody)) {
1893 /* Exit, the rest of the message is in a different protocol. */
1894 UPDATE_STATE(NEW_MESSAGE());
1895 CALLBACK_NOTIFY(message_complete);
1896 RETURN((p - data) + 1);
1897 }
1898
1899 if (parser->flags & F_SKIPBODY) {
1900 UPDATE_STATE(NEW_MESSAGE());
1901 CALLBACK_NOTIFY(message_complete);
1902 } else if (parser->flags & F_CHUNKED) {
1903 /* chunked encoding - ignore Content-Length header,
1904 * prepare for a chunk */
1905 UPDATE_STATE(s_chunk_size_start);
1906 } else if (parser->uses_transfer_encoding == 1) {
1907 if (parser->type == HTTP_REQUEST && !lenient) {
1908 /* RFC 7230 3.3.3 */
1909
1910 /* If a Transfer-Encoding header field
1911 * is present in a request and the chunked transfer coding is not
1912 * the final encoding, the message body length cannot be determined
1913 * reliably; the server MUST respond with the 400 (Bad Request)
1914 * status code and then close the connection.
1915 */
1916 SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1917 RETURN(p - data); /* Error */
1918 } else {
1919 /* RFC 7230 3.3.3 */
1920
1921 /* If a Transfer-Encoding header field is present in a response and
1922 * the chunked transfer coding is not the final encoding, the
1923 * message body length is determined by reading the connection until
1924 * it is closed by the server.
1925 */
1926 UPDATE_STATE(s_body_identity_eof);
1927 }
1928 } else {
1929 if (parser->content_length == 0) {
1930 /* Content-Length header given but zero: Content-Length: 0\r\n */
1931 UPDATE_STATE(NEW_MESSAGE());
1932 CALLBACK_NOTIFY(message_complete);
1933 } else if (parser->content_length != ULLONG_MAX) {
1934 /* Content-Length header given and non-zero */
1935 UPDATE_STATE(s_body_identity);
1936 } else {
1937 if (!http_message_needs_eof(parser)) {
1938 /* Assume content-length 0 - read the next */
1939 UPDATE_STATE(NEW_MESSAGE());
1940 CALLBACK_NOTIFY(message_complete);
1941 } else {
1942 /* Read body until EOF */
1943 UPDATE_STATE(s_body_identity_eof);
1944 }
1945 }
1946 }
1947
1948 break;
1949 }
1950
1951 case s_body_identity:
1952 {
1953 uint64_t to_read = MIN(parser->content_length,
1954 (uint64_t) ((data + len) - p));
1955
1956 assert(parser->content_length != 0
1957 && parser->content_length != ULLONG_MAX);
1958
1959 /* The difference between advancing content_length and p is because
1960 * the latter will automaticaly advance on the next loop iteration.
1961 * Further, if content_length ends up at 0, we want to see the last
1962 * byte again for our message complete callback.
1963 */
1964 MARK(body);
1965 parser->content_length -= to_read;
1966 p += to_read - 1;
1967
1968 if (parser->content_length == 0) {
1969 UPDATE_STATE(s_message_done);
1970
1971 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1972 *
1973 * The alternative to doing this is to wait for the next byte to
1974 * trigger the data callback, just as in every other case. The
1975 * problem with this is that this makes it difficult for the test
1976 * harness to distinguish between complete-on-EOF and
1977 * complete-on-length. It's not clear that this distinction is
1978 * important for applications, but let's keep it for now.
1979 */
1980 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1981 REEXECUTE();
1982 }
1983
1984 break;
1985 }
1986
1987 /* read until EOF */
1988 case s_body_identity_eof:
1989 MARK(body);
1990 p = data + len - 1;
1991
1992 break;
1993
1994 case s_message_done:
1995 UPDATE_STATE(NEW_MESSAGE());
1996 CALLBACK_NOTIFY(message_complete);
1997 if (parser->upgrade) {
1998 /* Exit, the rest of the message is in a different protocol. */
1999 RETURN((p - data) + 1);
2000 }
2001 break;
2002
2003 case s_chunk_size_start:
2004 {
2005 assert(nread == 1);
2006 assert(parser->flags & F_CHUNKED);
2007
2008 unhex_val = unhex[(unsigned char)ch];
2009 if (UNLIKELY(unhex_val == -1)) {
2010 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2011 goto error;
2012 }
2013
2014 parser->content_length = unhex_val;
2015 UPDATE_STATE(s_chunk_size);
2016 break;
2017 }
2018
2019 case s_chunk_size:
2020 {
2021 uint64_t t;
2022
2023 assert(parser->flags & F_CHUNKED);
2024
2025 if (ch == CR) {
2026 UPDATE_STATE(s_chunk_size_almost_done);
2027 break;
2028 }
2029
2030 unhex_val = unhex[(unsigned char)ch];
2031
2032 if (unhex_val == -1) {
2033 if (ch == ';' || ch == ' ') {
2034 UPDATE_STATE(s_chunk_parameters);
2035 break;
2036 }
2037
2038 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2039 goto error;
2040 }
2041
2042 t = parser->content_length;
2043 t *= 16;
2044 t += unhex_val;
2045
2046 /* Overflow? Test against a conservative limit for simplicity. */
2047 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2048 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2049 goto error;
2050 }
2051
2052 parser->content_length = t;
2053 break;
2054 }
2055
2056 case s_chunk_parameters:
2057 {
2058 assert(parser->flags & F_CHUNKED);
2059 /* just ignore this shit. TODO check for overflow */
2060 if (ch == CR) {
2061 UPDATE_STATE(s_chunk_size_almost_done);
2062 break;
2063 }
2064 break;
2065 }
2066
2067 case s_chunk_size_almost_done:
2068 {
2069 assert(parser->flags & F_CHUNKED);
2070 STRICT_CHECK(ch != LF);
2071
2072 parser->nread = 0;
2073 nread = 0;
2074
2075 if (parser->content_length == 0) {
2076 parser->flags |= F_TRAILING;
2077 UPDATE_STATE(s_header_field_start);
2078 } else {
2079 UPDATE_STATE(s_chunk_data);
2080 }
2081 CALLBACK_NOTIFY(chunk_header);
2082 break;
2083 }
2084
2085 case s_chunk_data:
2086 {
2087 uint64_t to_read = MIN(parser->content_length,
2088 (uint64_t) ((data + len) - p));
2089
2090 assert(parser->flags & F_CHUNKED);
2091 assert(parser->content_length != 0
2092 && parser->content_length != ULLONG_MAX);
2093
2094 /* See the explanation in s_body_identity for why the content
2095 * length and data pointers are managed this way.
2096 */
2097 MARK(body);
2098 parser->content_length -= to_read;
2099 p += to_read - 1;
2100
2101 if (parser->content_length == 0) {
2102 UPDATE_STATE(s_chunk_data_almost_done);
2103 }
2104
2105 break;
2106 }
2107
2108 case s_chunk_data_almost_done:
2109 assert(parser->flags & F_CHUNKED);
2110 assert(parser->content_length == 0);
2111 STRICT_CHECK(ch != CR);
2112 UPDATE_STATE(s_chunk_data_done);
2113 CALLBACK_DATA(body);
2114 break;
2115
2116 case s_chunk_data_done:
2117 assert(parser->flags & F_CHUNKED);
2118 STRICT_CHECK(ch != LF);
2119 parser->nread = 0;
2120 nread = 0;
2121 UPDATE_STATE(s_chunk_size_start);
2122 CALLBACK_NOTIFY(chunk_complete);
2123 break;
2124
2125 default:
2126 assert(0 && "unhandled state");
2127 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2128 goto error;
2129 }
2130 }
2131
2132 /* Run callbacks for any marks that we have leftover after we ran out of
2133 * bytes. There should be at most one of these set, so it's OK to invoke
2134 * them in series (unset marks will not result in callbacks).
2135 *
2136 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2137 * overflowed 'data' and this allows us to correct for the off-by-one that
2138 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2139 * value that's in-bounds).
2140 */
2141
2142 assert(((header_field_mark ? 1 : 0) +
2143 (header_value_mark ? 1 : 0) +
2144 (url_mark ? 1 : 0) +
2145 (body_mark ? 1 : 0) +
2146 (status_mark ? 1 : 0)) <= 1);
2147
2148 CALLBACK_DATA_NOADVANCE(header_field);
2149 CALLBACK_DATA_NOADVANCE(header_value);
2150 CALLBACK_DATA_NOADVANCE(url);
2151 CALLBACK_DATA_NOADVANCE(body);
2152 CALLBACK_DATA_NOADVANCE(status);
2153
2154 RETURN(len);
2155
2156 error:
2157 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2158 SET_ERRNO(HPE_UNKNOWN);
2159 }
2160
2161 RETURN(p - data);
2162 }
2163
2164
2165 /* Does the parser need to see an EOF to find the end of the message? */
2166 int
http_message_needs_eof(const http_parser * parser)2167 http_message_needs_eof (const http_parser *parser)
2168 {
2169 if (parser->type == HTTP_REQUEST) {
2170 return 0;
2171 }
2172
2173 /* See RFC 2616 section 4.4 */
2174 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2175 parser->status_code == 204 || /* No Content */
2176 parser->status_code == 304 || /* Not Modified */
2177 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2178 return 0;
2179 }
2180
2181 /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2182 if ((parser->uses_transfer_encoding == 1) &&
2183 (parser->flags & F_CHUNKED) == 0) {
2184 return 1;
2185 }
2186
2187 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2188 return 0;
2189 }
2190
2191 return 1;
2192 }
2193
2194
2195 int
http_should_keep_alive(const http_parser * parser)2196 http_should_keep_alive (const http_parser *parser)
2197 {
2198 if (parser->http_major > 0 && parser->http_minor > 0) {
2199 /* HTTP/1.1 */
2200 if (parser->flags & F_CONNECTION_CLOSE) {
2201 return 0;
2202 }
2203 } else {
2204 /* HTTP/1.0 or earlier */
2205 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2206 return 0;
2207 }
2208 }
2209
2210 return !http_message_needs_eof(parser);
2211 }
2212
2213
2214 const char *
http_method_str(enum http_method m)2215 http_method_str (enum http_method m)
2216 {
2217 return ELEM_AT(method_strings, m, "<unknown>");
2218 }
2219
2220 const char *
http_status_str(enum http_status s)2221 http_status_str (enum http_status s)
2222 {
2223 switch (s) {
2224 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2225 HTTP_STATUS_MAP(XX)
2226 #undef XX
2227 default: return "<unknown>";
2228 }
2229 }
2230
2231 void
http_parser_init(http_parser * parser,enum http_parser_type t)2232 http_parser_init (http_parser *parser, enum http_parser_type t)
2233 {
2234 void *data = parser->data; /* preserve application data */
2235 memset(parser, 0, sizeof(*parser));
2236 parser->data = data;
2237 parser->type = t;
2238 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2239 parser->http_errno = HPE_OK;
2240 }
2241
2242 void
http_parser_settings_init(http_parser_settings * settings)2243 http_parser_settings_init(http_parser_settings *settings)
2244 {
2245 memset(settings, 0, sizeof(*settings));
2246 }
2247
2248 const char *
http_errno_name(enum http_errno err)2249 http_errno_name(enum http_errno err) {
2250 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2251 return http_strerror_tab[err].name;
2252 }
2253
2254 const char *
http_errno_description(enum http_errno err)2255 http_errno_description(enum http_errno err) {
2256 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2257 return http_strerror_tab[err].description;
2258 }
2259
2260 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2261 http_parse_host_char(enum http_host_state s, const char ch) {
2262 switch(s) {
2263 case s_http_userinfo:
2264 case s_http_userinfo_start:
2265 if (ch == '@') {
2266 return s_http_host_start;
2267 }
2268
2269 if (IS_USERINFO_CHAR(ch)) {
2270 return s_http_userinfo;
2271 }
2272 break;
2273
2274 case s_http_host_start:
2275 if (ch == '[') {
2276 return s_http_host_v6_start;
2277 }
2278
2279 if (IS_HOST_CHAR(ch)) {
2280 return s_http_host;
2281 }
2282
2283 break;
2284
2285 case s_http_host:
2286 if (IS_HOST_CHAR(ch)) {
2287 return s_http_host;
2288 }
2289
2290 /* fall through */
2291 case s_http_host_v6_end:
2292 if (ch == ':') {
2293 return s_http_host_port_start;
2294 }
2295
2296 break;
2297
2298 case s_http_host_v6:
2299 if (ch == ']') {
2300 return s_http_host_v6_end;
2301 }
2302
2303 /* fall through */
2304 case s_http_host_v6_start:
2305 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2306 return s_http_host_v6;
2307 }
2308
2309 if (s == s_http_host_v6 && ch == '%') {
2310 return s_http_host_v6_zone_start;
2311 }
2312 break;
2313
2314 case s_http_host_v6_zone:
2315 if (ch == ']') {
2316 return s_http_host_v6_end;
2317 }
2318
2319 /* fall through */
2320 case s_http_host_v6_zone_start:
2321 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2322 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2323 ch == '~') {
2324 return s_http_host_v6_zone;
2325 }
2326 break;
2327
2328 case s_http_host_port:
2329 case s_http_host_port_start:
2330 if (IS_NUM(ch)) {
2331 return s_http_host_port;
2332 }
2333
2334 break;
2335
2336 default:
2337 break;
2338 }
2339 return s_http_host_dead;
2340 }
2341
2342 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2343 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2344 enum http_host_state s;
2345
2346 const char *p;
2347 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2348
2349 assert(u->field_set & (1 << UF_HOST));
2350
2351 u->field_data[UF_HOST].len = 0;
2352
2353 s = found_at ? s_http_userinfo_start : s_http_host_start;
2354
2355 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2356 enum http_host_state new_s = http_parse_host_char(s, *p);
2357
2358 if (new_s == s_http_host_dead) {
2359 return 1;
2360 }
2361
2362 switch(new_s) {
2363 case s_http_host:
2364 if (s != s_http_host) {
2365 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2366 }
2367 u->field_data[UF_HOST].len++;
2368 break;
2369
2370 case s_http_host_v6:
2371 if (s != s_http_host_v6) {
2372 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2373 }
2374 u->field_data[UF_HOST].len++;
2375 break;
2376
2377 case s_http_host_v6_zone_start:
2378 case s_http_host_v6_zone:
2379 u->field_data[UF_HOST].len++;
2380 break;
2381
2382 case s_http_host_port:
2383 if (s != s_http_host_port) {
2384 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2385 u->field_data[UF_PORT].len = 0;
2386 u->field_set |= (1 << UF_PORT);
2387 }
2388 u->field_data[UF_PORT].len++;
2389 break;
2390
2391 case s_http_userinfo:
2392 if (s != s_http_userinfo) {
2393 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2394 u->field_data[UF_USERINFO].len = 0;
2395 u->field_set |= (1 << UF_USERINFO);
2396 }
2397 u->field_data[UF_USERINFO].len++;
2398 break;
2399
2400 default:
2401 break;
2402 }
2403 s = new_s;
2404 }
2405
2406 /* Make sure we don't end somewhere unexpected */
2407 switch (s) {
2408 case s_http_host_start:
2409 case s_http_host_v6_start:
2410 case s_http_host_v6:
2411 case s_http_host_v6_zone_start:
2412 case s_http_host_v6_zone:
2413 case s_http_host_port_start:
2414 case s_http_userinfo:
2415 case s_http_userinfo_start:
2416 return 1;
2417 default:
2418 break;
2419 }
2420
2421 return 0;
2422 }
2423
2424 void
http_parser_url_init(struct http_parser_url * u)2425 http_parser_url_init(struct http_parser_url *u) {
2426 memset(u, 0, sizeof(*u));
2427 }
2428
2429 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2430 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2431 struct http_parser_url *u)
2432 {
2433 enum state s;
2434 const char *p;
2435 enum http_parser_url_fields uf, old_uf;
2436 int found_at = 0;
2437
2438 if (buflen == 0) {
2439 return 1;
2440 }
2441
2442 u->port = u->field_set = 0;
2443 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2444 old_uf = UF_MAX;
2445
2446 for (p = buf; p < buf + buflen; p++) {
2447 s = parse_url_char(s, *p);
2448
2449 /* Figure out the next field that we're operating on */
2450 switch (s) {
2451 case s_dead:
2452 return 1;
2453
2454 /* Skip delimeters */
2455 case s_req_schema_slash:
2456 case s_req_schema_slash_slash:
2457 case s_req_server_start:
2458 case s_req_query_string_start:
2459 case s_req_fragment_start:
2460 continue;
2461
2462 case s_req_schema:
2463 uf = UF_SCHEMA;
2464 break;
2465
2466 case s_req_server_with_at:
2467 found_at = 1;
2468
2469 /* fall through */
2470 case s_req_server:
2471 uf = UF_HOST;
2472 break;
2473
2474 case s_req_path:
2475 uf = UF_PATH;
2476 break;
2477
2478 case s_req_query_string:
2479 uf = UF_QUERY;
2480 break;
2481
2482 case s_req_fragment:
2483 uf = UF_FRAGMENT;
2484 break;
2485
2486 default:
2487 assert(!"Unexpected state");
2488 return 1;
2489 }
2490
2491 /* Nothing's changed; soldier on */
2492 if (uf == old_uf) {
2493 u->field_data[uf].len++;
2494 continue;
2495 }
2496
2497 u->field_data[uf].off = (uint16_t)(p - buf);
2498 u->field_data[uf].len = 1;
2499
2500 u->field_set |= (1 << uf);
2501 old_uf = uf;
2502 }
2503
2504 /* host must be present if there is a schema */
2505 /* parsing http:///toto will fail */
2506 if ((u->field_set & (1 << UF_SCHEMA)) &&
2507 (u->field_set & (1 << UF_HOST)) == 0) {
2508 return 1;
2509 }
2510
2511 if (u->field_set & (1 << UF_HOST)) {
2512 if (http_parse_host(buf, u, found_at) != 0) {
2513 return 1;
2514 }
2515 }
2516
2517 /* CONNECT requests can only contain "hostname:port" */
2518 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2519 return 1;
2520 }
2521
2522 if (u->field_set & (1 << UF_PORT)) {
2523 uint16_t off;
2524 uint16_t len;
2525 const char* p;
2526 const char* end;
2527 unsigned long v;
2528
2529 off = u->field_data[UF_PORT].off;
2530 len = u->field_data[UF_PORT].len;
2531 end = buf + off + len;
2532
2533 /* NOTE: The characters are already validated and are in the [0-9] range */
2534 assert((size_t) (off + len) <= buflen && "Port number overflow");
2535 v = 0;
2536 for (p = buf + off; p < end; p++) {
2537 v *= 10;
2538 v += *p - '0';
2539
2540 /* Ports have a max value of 2^16 */
2541 if (v > 0xffff) {
2542 return 1;
2543 }
2544 }
2545
2546 u->port = (uint16_t) v;
2547 }
2548
2549 return 0;
2550 }
2551
2552 void
http_parser_pause(http_parser * parser,int paused)2553 http_parser_pause(http_parser *parser, int paused) {
2554 /* Users should only be pausing/unpausing a parser that is not in an error
2555 * state. In non-debug builds, there's not much that we can do about this
2556 * other than ignore it.
2557 */
2558 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2559 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2560 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2561 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2562 } else {
2563 assert(0 && "Attempting to pause parser in error state");
2564 }
2565 }
2566
2567 int
http_body_is_final(const struct http_parser * parser)2568 http_body_is_final(const struct http_parser *parser) {
2569 return parser->state == s_message_done;
2570 }
2571
2572 unsigned long
http_parser_version(void)2573 http_parser_version(void) {
2574 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2575 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2576 HTTP_PARSER_VERSION_PATCH * 0x00001;
2577 }
2578
2579 void
http_parser_set_max_header_size(uint32_t size)2580 http_parser_set_max_header_size(uint32_t size) {
2581 max_header_size = size;
2582 }
2583