1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43
44 #ifndef BIT_AT
45 # define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
48 #endif
49
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53
54 #define SET_ERRNO(e) \
55 do { \
56 parser->http_errno = (e); \
57 } while(0)
58
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
61 #define RETURN(V) \
62 do { \
63 parser->state = CURRENT_STATE(); \
64 return (V); \
65 } while (0);
66 #define REEXECUTE() \
67 goto reexecute; \
68
69
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77
78
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
81 do { \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83 \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
88 } \
89 UPDATE_STATE(parser->state); \
90 \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93 return (ER); \
94 } \
95 } \
96 } while (0)
97
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
106 do { \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108 \
109 if (FOR##_mark) { \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
112 if (UNLIKELY(0 != \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
115 } \
116 UPDATE_STATE(parser->state); \
117 \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120 return (ER); \
121 } \
122 } \
123 FOR##_mark = NULL; \
124 } \
125 } while (0)
126
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR) \
137 do { \
138 if (!FOR##_mark) { \
139 FOR##_mark = p; \
140 } \
141 } while (0)
142
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
147 *
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
153 */
154 #define COUNT_HEADER_SIZE(V) \
155 do { \
156 parser->nread += (V); \
157 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
159 goto error; \
160 } \
161 } while (0)
162
163
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172
173
174 static const char *method_strings[] =
175 {
176 #define XX(num, name, string) #string,
177 HTTP_METHOD_MAP(XX)
178 #undef XX
179 };
180
181
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
188 */
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 0, '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
223
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233 };
234
235
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241
242
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
277 #undef T
278
279 enum state
280 { s_dead = 1 /* important that this is > 0 */
281
282 , s_start_req_or_res
283 , s_res_or_resp_H
284 , s_start_res
285 , s_res_H
286 , s_res_HT
287 , s_res_HTT
288 , s_res_HTTP
289 , s_res_first_http_major
290 , s_res_http_major
291 , s_res_first_http_minor
292 , s_res_http_minor
293 , s_res_first_status_code
294 , s_res_status_code
295 , s_res_status_start
296 , s_res_status
297 , s_res_line_almost_done
298
299 , s_start_req
300
301 , s_req_method
302 , s_req_spaces_before_url
303 , s_req_schema
304 , s_req_schema_slash
305 , s_req_schema_slash_slash
306 , s_req_server_start
307 , s_req_server
308 , s_req_server_with_at
309 , s_req_path
310 , s_req_query_string_start
311 , s_req_query_string
312 , s_req_fragment_start
313 , s_req_fragment
314 , s_req_http_start
315 , s_req_http_H
316 , s_req_http_HT
317 , s_req_http_HTT
318 , s_req_http_HTTP
319 , s_req_first_http_major
320 , s_req_http_major
321 , s_req_first_http_minor
322 , s_req_http_minor
323 , s_req_line_almost_done
324
325 , s_header_field_start
326 , s_header_field
327 , s_header_value_discard_ws
328 , s_header_value_discard_ws_almost_done
329 , s_header_value_discard_lws
330 , s_header_value_start
331 , s_header_value
332 , s_header_value_lws
333
334 , s_header_almost_done
335
336 , s_chunk_size_start
337 , s_chunk_size
338 , s_chunk_parameters
339 , s_chunk_size_almost_done
340
341 , s_headers_almost_done
342 , s_headers_done
343
344 /* Important: 's_headers_done' must be the last 'header' state. All
345 * states beyond this must be 'body' states. It is used for overflow
346 * checking. See the PARSING_HEADER() macro.
347 */
348
349 , s_chunk_data
350 , s_chunk_data_almost_done
351 , s_chunk_data_done
352
353 , s_body_identity
354 , s_body_identity_eof
355
356 , s_message_done
357 };
358
359
360 #define PARSING_HEADER(state) (state <= s_headers_done)
361
362
363 enum header_states
364 { h_general = 0
365 , h_C
366 , h_CO
367 , h_CON
368
369 , h_matching_connection
370 , h_matching_proxy_connection
371 , h_matching_content_length
372 , h_matching_transfer_encoding
373 , h_matching_upgrade
374
375 , h_connection
376 , h_content_length
377 , h_transfer_encoding
378 , h_upgrade
379
380 , h_matching_transfer_encoding_chunked
381 , h_matching_connection_token_start
382 , h_matching_connection_keep_alive
383 , h_matching_connection_close
384 , h_matching_connection_upgrade
385 , h_matching_connection_token
386
387 , h_transfer_encoding_chunked
388 , h_connection_keep_alive
389 , h_connection_close
390 , h_connection_upgrade
391 };
392
393 enum http_host_state
394 {
395 s_http_host_dead = 1
396 , s_http_userinfo_start
397 , s_http_userinfo
398 , s_http_host_start
399 , s_http_host_v6_start
400 , s_http_host
401 , s_http_host_v6
402 , s_http_host_v6_end
403 , s_http_host_v6_zone_start
404 , s_http_host_v6_zone
405 , s_http_host_port_start
406 , s_http_host_port
407 };
408
409 /* Macros for character classes; depends on strict-mode */
410 #define CR '\r'
411 #define LF '\n'
412 #define LOWER(c) (unsigned char)(c | 0x20)
413 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
419 (c) == ')')
420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422 (c) == '$' || (c) == ',')
423
424 #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
425
426 #if HTTP_PARSER_STRICT
427 #define TOKEN(c) (tokens[(unsigned char)c])
428 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
430 #else
431 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432 #define IS_URL_CHAR(c) \
433 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434 #define IS_HOST_CHAR(c) \
435 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
436 #endif
437
438 /**
439 * Verify that a char is a valid visible (printable) US-ASCII
440 * character or %x80-FF
441 **/
442 #define IS_HEADER_CHAR(ch) \
443 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
444
445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
446
447
448 #if HTTP_PARSER_STRICT
449 # define STRICT_CHECK(cond) \
450 do { \
451 if (cond) { \
452 SET_ERRNO(HPE_STRICT); \
453 goto error; \
454 } \
455 } while (0)
456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
457 #else
458 # define STRICT_CHECK(cond)
459 # define NEW_MESSAGE() start_state
460 #endif
461
462
463 /* Map errno values to strings for human-readable output */
464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
465 static struct {
466 const char *name;
467 const char *description;
468 } http_strerror_tab[] = {
469 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
470 };
471 #undef HTTP_STRERROR_GEN
472
473 int http_message_needs_eof(const http_parser *parser);
474
475 /* Our URL parser.
476 *
477 * This is designed to be shared by http_parser_execute() for URL validation,
478 * hence it has a state transition + byte-for-byte interface. In addition, it
479 * is meant to be embedded in http_parser_parse_url(), which does the dirty
480 * work of turning state transitions URL components for its API.
481 *
482 * This function should only be invoked with non-space characters. It is
483 * assumed that the caller cares about (and can detect) the transition between
484 * URL and non-URL states by looking for these.
485 */
486 static enum state
parse_url_char(enum state s,const char ch)487 parse_url_char(enum state s, const char ch)
488 {
489 if (ch == ' ' || ch == '\r' || ch == '\n') {
490 return s_dead;
491 }
492
493 #if HTTP_PARSER_STRICT
494 if (ch == '\t' || ch == '\f') {
495 return s_dead;
496 }
497 #endif
498
499 switch (s) {
500 case s_req_spaces_before_url:
501 /* Proxied requests are followed by scheme of an absolute URI (alpha).
502 * All methods except CONNECT are followed by '/' or '*'.
503 */
504
505 if (ch == '/' || ch == '*') {
506 return s_req_path;
507 }
508
509 if (IS_ALPHA(ch)) {
510 return s_req_schema;
511 }
512
513 break;
514
515 case s_req_schema:
516 if (IS_ALPHA(ch)) {
517 return s;
518 }
519
520 if (ch == ':') {
521 return s_req_schema_slash;
522 }
523
524 break;
525
526 case s_req_schema_slash:
527 if (ch == '/') {
528 return s_req_schema_slash_slash;
529 }
530
531 break;
532
533 case s_req_schema_slash_slash:
534 if (ch == '/') {
535 return s_req_server_start;
536 }
537
538 break;
539
540 case s_req_server_with_at:
541 if (ch == '@') {
542 return s_dead;
543 }
544
545 /* FALLTHROUGH */
546 case s_req_server_start:
547 case s_req_server:
548 if (ch == '/') {
549 return s_req_path;
550 }
551
552 if (ch == '?') {
553 return s_req_query_string_start;
554 }
555
556 if (ch == '@') {
557 return s_req_server_with_at;
558 }
559
560 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
561 return s_req_server;
562 }
563
564 break;
565
566 case s_req_path:
567 if (IS_URL_CHAR(ch)) {
568 return s;
569 }
570
571 switch (ch) {
572 case '?':
573 return s_req_query_string_start;
574
575 case '#':
576 return s_req_fragment_start;
577 }
578
579 break;
580
581 case s_req_query_string_start:
582 case s_req_query_string:
583 if (IS_URL_CHAR(ch)) {
584 return s_req_query_string;
585 }
586
587 switch (ch) {
588 case '?':
589 /* allow extra '?' in query string */
590 return s_req_query_string;
591
592 case '#':
593 return s_req_fragment_start;
594 }
595
596 break;
597
598 case s_req_fragment_start:
599 if (IS_URL_CHAR(ch)) {
600 return s_req_fragment;
601 }
602
603 switch (ch) {
604 case '?':
605 return s_req_fragment;
606
607 case '#':
608 return s;
609 }
610
611 break;
612
613 case s_req_fragment:
614 if (IS_URL_CHAR(ch)) {
615 return s;
616 }
617
618 switch (ch) {
619 case '?':
620 case '#':
621 return s;
622 }
623
624 break;
625
626 default:
627 break;
628 }
629
630 /* We should never fall out of the switch above unless there's an error */
631 return s_dead;
632 }
633
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)634 size_t http_parser_execute (http_parser *parser,
635 const http_parser_settings *settings,
636 const char *data,
637 size_t len)
638 {
639 char c, ch;
640 int8_t unhex_val;
641 const char *p = data;
642 const char *header_field_mark = 0;
643 const char *header_value_mark = 0;
644 const char *url_mark = 0;
645 const char *body_mark = 0;
646 const char *status_mark = 0;
647 enum state p_state = (enum state) parser->state;
648 const unsigned int lenient = parser->lenient_http_headers;
649
650 /* We're in an error state. Don't bother doing anything. */
651 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
652 return 0;
653 }
654
655 if (len == 0) {
656 switch (CURRENT_STATE()) {
657 case s_body_identity_eof:
658 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
659 * we got paused.
660 */
661 CALLBACK_NOTIFY_NOADVANCE(message_complete);
662 return 0;
663
664 case s_dead:
665 case s_start_req_or_res:
666 case s_start_res:
667 case s_start_req:
668 return 0;
669
670 default:
671 SET_ERRNO(HPE_INVALID_EOF_STATE);
672 return 1;
673 }
674 }
675
676
677 if (CURRENT_STATE() == s_header_field)
678 header_field_mark = data;
679 if (CURRENT_STATE() == s_header_value)
680 header_value_mark = data;
681 switch (CURRENT_STATE()) {
682 case s_req_path:
683 case s_req_schema:
684 case s_req_schema_slash:
685 case s_req_schema_slash_slash:
686 case s_req_server_start:
687 case s_req_server:
688 case s_req_server_with_at:
689 case s_req_query_string_start:
690 case s_req_query_string:
691 case s_req_fragment_start:
692 case s_req_fragment:
693 url_mark = data;
694 break;
695 case s_res_status:
696 status_mark = data;
697 break;
698 default:
699 break;
700 }
701
702 for (p=data; p != data + len; p++) {
703 ch = *p;
704
705 if (PARSING_HEADER(CURRENT_STATE()))
706 COUNT_HEADER_SIZE(1);
707
708 reexecute:
709 switch (CURRENT_STATE()) {
710
711 case s_dead:
712 /* this state is used after a 'Connection: close' message
713 * the parser will error out if it reads another message
714 */
715 if (LIKELY(ch == CR || ch == LF))
716 break;
717
718 SET_ERRNO(HPE_CLOSED_CONNECTION);
719 goto error;
720
721 case s_start_req_or_res:
722 {
723 if (ch == CR || ch == LF)
724 break;
725 parser->flags = 0;
726 parser->content_length = ULLONG_MAX;
727
728 if (ch == 'H') {
729 UPDATE_STATE(s_res_or_resp_H);
730
731 CALLBACK_NOTIFY(message_begin);
732 } else {
733 parser->type = HTTP_REQUEST;
734 UPDATE_STATE(s_start_req);
735 REEXECUTE();
736 }
737
738 break;
739 }
740
741 case s_res_or_resp_H:
742 if (ch == 'T') {
743 parser->type = HTTP_RESPONSE;
744 UPDATE_STATE(s_res_HT);
745 } else {
746 if (UNLIKELY(ch != 'E')) {
747 SET_ERRNO(HPE_INVALID_CONSTANT);
748 goto error;
749 }
750
751 parser->type = HTTP_REQUEST;
752 parser->method = HTTP_HEAD;
753 parser->index = 2;
754 UPDATE_STATE(s_req_method);
755 }
756 break;
757
758 case s_start_res:
759 {
760 parser->flags = 0;
761 parser->content_length = ULLONG_MAX;
762
763 switch (ch) {
764 case 'H':
765 UPDATE_STATE(s_res_H);
766 break;
767
768 case CR:
769 case LF:
770 break;
771
772 default:
773 SET_ERRNO(HPE_INVALID_CONSTANT);
774 goto error;
775 }
776
777 CALLBACK_NOTIFY(message_begin);
778 break;
779 }
780
781 case s_res_H:
782 STRICT_CHECK(ch != 'T');
783 UPDATE_STATE(s_res_HT);
784 break;
785
786 case s_res_HT:
787 STRICT_CHECK(ch != 'T');
788 UPDATE_STATE(s_res_HTT);
789 break;
790
791 case s_res_HTT:
792 STRICT_CHECK(ch != 'P');
793 UPDATE_STATE(s_res_HTTP);
794 break;
795
796 case s_res_HTTP:
797 STRICT_CHECK(ch != '/');
798 UPDATE_STATE(s_res_first_http_major);
799 break;
800
801 case s_res_first_http_major:
802 if (UNLIKELY(ch < '0' || ch > '9')) {
803 SET_ERRNO(HPE_INVALID_VERSION);
804 goto error;
805 }
806
807 parser->http_major = ch - '0';
808 UPDATE_STATE(s_res_http_major);
809 break;
810
811 /* major HTTP version or dot */
812 case s_res_http_major:
813 {
814 if (ch == '.') {
815 UPDATE_STATE(s_res_first_http_minor);
816 break;
817 }
818
819 if (!IS_NUM(ch)) {
820 SET_ERRNO(HPE_INVALID_VERSION);
821 goto error;
822 }
823
824 parser->http_major *= 10;
825 parser->http_major += ch - '0';
826
827 if (UNLIKELY(parser->http_major > 999)) {
828 SET_ERRNO(HPE_INVALID_VERSION);
829 goto error;
830 }
831
832 break;
833 }
834
835 /* first digit of minor HTTP version */
836 case s_res_first_http_minor:
837 if (UNLIKELY(!IS_NUM(ch))) {
838 SET_ERRNO(HPE_INVALID_VERSION);
839 goto error;
840 }
841
842 parser->http_minor = ch - '0';
843 UPDATE_STATE(s_res_http_minor);
844 break;
845
846 /* minor HTTP version or end of request line */
847 case s_res_http_minor:
848 {
849 if (ch == ' ') {
850 UPDATE_STATE(s_res_first_status_code);
851 break;
852 }
853
854 if (UNLIKELY(!IS_NUM(ch))) {
855 SET_ERRNO(HPE_INVALID_VERSION);
856 goto error;
857 }
858
859 parser->http_minor *= 10;
860 parser->http_minor += ch - '0';
861
862 if (UNLIKELY(parser->http_minor > 999)) {
863 SET_ERRNO(HPE_INVALID_VERSION);
864 goto error;
865 }
866
867 break;
868 }
869
870 case s_res_first_status_code:
871 {
872 if (!IS_NUM(ch)) {
873 if (ch == ' ') {
874 break;
875 }
876
877 SET_ERRNO(HPE_INVALID_STATUS);
878 goto error;
879 }
880 parser->status_code = ch - '0';
881 UPDATE_STATE(s_res_status_code);
882 break;
883 }
884
885 case s_res_status_code:
886 {
887 if (!IS_NUM(ch)) {
888 switch (ch) {
889 case ' ':
890 UPDATE_STATE(s_res_status_start);
891 break;
892 case CR:
893 UPDATE_STATE(s_res_line_almost_done);
894 break;
895 case LF:
896 UPDATE_STATE(s_header_field_start);
897 break;
898 default:
899 SET_ERRNO(HPE_INVALID_STATUS);
900 goto error;
901 }
902 break;
903 }
904
905 parser->status_code *= 10;
906 parser->status_code += ch - '0';
907
908 if (UNLIKELY(parser->status_code > 999)) {
909 SET_ERRNO(HPE_INVALID_STATUS);
910 goto error;
911 }
912
913 break;
914 }
915
916 case s_res_status_start:
917 {
918 if (ch == CR) {
919 UPDATE_STATE(s_res_line_almost_done);
920 break;
921 }
922
923 if (ch == LF) {
924 UPDATE_STATE(s_header_field_start);
925 break;
926 }
927
928 MARK(status);
929 UPDATE_STATE(s_res_status);
930 parser->index = 0;
931 break;
932 }
933
934 case s_res_status:
935 if (ch == CR) {
936 UPDATE_STATE(s_res_line_almost_done);
937 CALLBACK_DATA(status);
938 break;
939 }
940
941 if (ch == LF) {
942 UPDATE_STATE(s_header_field_start);
943 CALLBACK_DATA(status);
944 break;
945 }
946
947 break;
948
949 case s_res_line_almost_done:
950 STRICT_CHECK(ch != LF);
951 UPDATE_STATE(s_header_field_start);
952 break;
953
954 case s_start_req:
955 {
956 if (ch == CR || ch == LF)
957 break;
958 parser->flags = 0;
959 parser->content_length = ULLONG_MAX;
960
961 if (UNLIKELY(!IS_ALPHA(ch))) {
962 SET_ERRNO(HPE_INVALID_METHOD);
963 goto error;
964 }
965
966 parser->method = (enum http_method) 0;
967 parser->index = 1;
968 switch (ch) {
969 case 'A': parser->method = HTTP_ACL; break;
970 case 'B': parser->method = HTTP_BIND; break;
971 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
972 case 'D': parser->method = HTTP_DELETE; break;
973 case 'G': parser->method = HTTP_GET; break;
974 case 'H': parser->method = HTTP_HEAD; break;
975 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
976 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
977 case 'N': parser->method = HTTP_NOTIFY; break;
978 case 'O': parser->method = HTTP_OPTIONS; break;
979 case 'P': parser->method = HTTP_POST;
980 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
981 break;
982 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
983 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
984 case 'T': parser->method = HTTP_TRACE; break;
985 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
986 default:
987 SET_ERRNO(HPE_INVALID_METHOD);
988 goto error;
989 }
990 UPDATE_STATE(s_req_method);
991
992 CALLBACK_NOTIFY(message_begin);
993
994 break;
995 }
996
997 case s_req_method:
998 {
999 const char *matcher;
1000 if (UNLIKELY(ch == '\0')) {
1001 SET_ERRNO(HPE_INVALID_METHOD);
1002 goto error;
1003 }
1004
1005 matcher = method_strings[parser->method];
1006 if (ch == ' ' && matcher[parser->index] == '\0') {
1007 UPDATE_STATE(s_req_spaces_before_url);
1008 } else if (ch == matcher[parser->index]) {
1009 ; /* nada */
1010 } else if (IS_ALPHA(ch)) {
1011
1012 switch (parser->method << 16 | parser->index << 8 | ch) {
1013 #define XX(meth, pos, ch, new_meth) \
1014 case (HTTP_##meth << 16 | pos << 8 | ch): \
1015 parser->method = HTTP_##new_meth; break;
1016
1017 XX(POST, 1, 'U', PUT)
1018 XX(POST, 1, 'A', PATCH)
1019 XX(CONNECT, 1, 'H', CHECKOUT)
1020 XX(CONNECT, 2, 'P', COPY)
1021 XX(MKCOL, 1, 'O', MOVE)
1022 XX(MKCOL, 1, 'E', MERGE)
1023 XX(MKCOL, 2, 'A', MKACTIVITY)
1024 XX(MKCOL, 3, 'A', MKCALENDAR)
1025 XX(SUBSCRIBE, 1, 'E', SEARCH)
1026 XX(REPORT, 2, 'B', REBIND)
1027 XX(POST, 1, 'R', PROPFIND)
1028 XX(PROPFIND, 4, 'P', PROPPATCH)
1029 XX(PUT, 2, 'R', PURGE)
1030 XX(LOCK, 1, 'I', LINK)
1031 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1032 XX(UNLOCK, 2, 'B', UNBIND)
1033 XX(UNLOCK, 3, 'I', UNLINK)
1034 #undef XX
1035
1036 default:
1037 SET_ERRNO(HPE_INVALID_METHOD);
1038 goto error;
1039 }
1040 } else if (ch == '-' &&
1041 parser->index == 1 &&
1042 parser->method == HTTP_MKCOL) {
1043 parser->method = HTTP_MSEARCH;
1044 } else {
1045 SET_ERRNO(HPE_INVALID_METHOD);
1046 goto error;
1047 }
1048
1049 ++parser->index;
1050 break;
1051 }
1052
1053 case s_req_spaces_before_url:
1054 {
1055 if (ch == ' ') break;
1056
1057 MARK(url);
1058 if (parser->method == HTTP_CONNECT) {
1059 UPDATE_STATE(s_req_server_start);
1060 }
1061
1062 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1063 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1064 SET_ERRNO(HPE_INVALID_URL);
1065 goto error;
1066 }
1067
1068 break;
1069 }
1070
1071 case s_req_schema:
1072 case s_req_schema_slash:
1073 case s_req_schema_slash_slash:
1074 case s_req_server_start:
1075 {
1076 switch (ch) {
1077 /* No whitespace allowed here */
1078 case ' ':
1079 case CR:
1080 case LF:
1081 SET_ERRNO(HPE_INVALID_URL);
1082 goto error;
1083 default:
1084 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1085 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1086 SET_ERRNO(HPE_INVALID_URL);
1087 goto error;
1088 }
1089 }
1090
1091 break;
1092 }
1093
1094 case s_req_server:
1095 case s_req_server_with_at:
1096 case s_req_path:
1097 case s_req_query_string_start:
1098 case s_req_query_string:
1099 case s_req_fragment_start:
1100 case s_req_fragment:
1101 {
1102 switch (ch) {
1103 case ' ':
1104 UPDATE_STATE(s_req_http_start);
1105 CALLBACK_DATA(url);
1106 break;
1107 case CR:
1108 case LF:
1109 parser->http_major = 0;
1110 parser->http_minor = 9;
1111 UPDATE_STATE((ch == CR) ?
1112 s_req_line_almost_done :
1113 s_header_field_start);
1114 CALLBACK_DATA(url);
1115 break;
1116 default:
1117 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1118 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1119 SET_ERRNO(HPE_INVALID_URL);
1120 goto error;
1121 }
1122 }
1123 break;
1124 }
1125
1126 case s_req_http_start:
1127 switch (ch) {
1128 case 'H':
1129 UPDATE_STATE(s_req_http_H);
1130 break;
1131 case ' ':
1132 break;
1133 default:
1134 SET_ERRNO(HPE_INVALID_CONSTANT);
1135 goto error;
1136 }
1137 break;
1138
1139 case s_req_http_H:
1140 STRICT_CHECK(ch != 'T');
1141 UPDATE_STATE(s_req_http_HT);
1142 break;
1143
1144 case s_req_http_HT:
1145 STRICT_CHECK(ch != 'T');
1146 UPDATE_STATE(s_req_http_HTT);
1147 break;
1148
1149 case s_req_http_HTT:
1150 STRICT_CHECK(ch != 'P');
1151 UPDATE_STATE(s_req_http_HTTP);
1152 break;
1153
1154 case s_req_http_HTTP:
1155 STRICT_CHECK(ch != '/');
1156 UPDATE_STATE(s_req_first_http_major);
1157 break;
1158
1159 /* first digit of major HTTP version */
1160 case s_req_first_http_major:
1161 if (UNLIKELY(ch < '1' || ch > '9')) {
1162 SET_ERRNO(HPE_INVALID_VERSION);
1163 goto error;
1164 }
1165
1166 parser->http_major = ch - '0';
1167 UPDATE_STATE(s_req_http_major);
1168 break;
1169
1170 /* major HTTP version or dot */
1171 case s_req_http_major:
1172 {
1173 if (ch == '.') {
1174 UPDATE_STATE(s_req_first_http_minor);
1175 break;
1176 }
1177
1178 if (UNLIKELY(!IS_NUM(ch))) {
1179 SET_ERRNO(HPE_INVALID_VERSION);
1180 goto error;
1181 }
1182
1183 parser->http_major *= 10;
1184 parser->http_major += ch - '0';
1185
1186 if (UNLIKELY(parser->http_major > 999)) {
1187 SET_ERRNO(HPE_INVALID_VERSION);
1188 goto error;
1189 }
1190
1191 break;
1192 }
1193
1194 /* first digit of minor HTTP version */
1195 case s_req_first_http_minor:
1196 if (UNLIKELY(!IS_NUM(ch))) {
1197 SET_ERRNO(HPE_INVALID_VERSION);
1198 goto error;
1199 }
1200
1201 parser->http_minor = ch - '0';
1202 UPDATE_STATE(s_req_http_minor);
1203 break;
1204
1205 /* minor HTTP version or end of request line */
1206 case s_req_http_minor:
1207 {
1208 if (ch == CR) {
1209 UPDATE_STATE(s_req_line_almost_done);
1210 break;
1211 }
1212
1213 if (ch == LF) {
1214 UPDATE_STATE(s_header_field_start);
1215 break;
1216 }
1217
1218 /* XXX allow spaces after digit? */
1219
1220 if (UNLIKELY(!IS_NUM(ch))) {
1221 SET_ERRNO(HPE_INVALID_VERSION);
1222 goto error;
1223 }
1224
1225 parser->http_minor *= 10;
1226 parser->http_minor += ch - '0';
1227
1228 if (UNLIKELY(parser->http_minor > 999)) {
1229 SET_ERRNO(HPE_INVALID_VERSION);
1230 goto error;
1231 }
1232
1233 break;
1234 }
1235
1236 /* end of request line */
1237 case s_req_line_almost_done:
1238 {
1239 if (UNLIKELY(ch != LF)) {
1240 SET_ERRNO(HPE_LF_EXPECTED);
1241 goto error;
1242 }
1243
1244 UPDATE_STATE(s_header_field_start);
1245 break;
1246 }
1247
1248 case s_header_field_start:
1249 {
1250 if (ch == CR) {
1251 UPDATE_STATE(s_headers_almost_done);
1252 break;
1253 }
1254
1255 if (ch == LF) {
1256 /* they might be just sending \n instead of \r\n so this would be
1257 * the second \n to denote the end of headers*/
1258 UPDATE_STATE(s_headers_almost_done);
1259 REEXECUTE();
1260 }
1261
1262 c = TOKEN(ch);
1263
1264 if (UNLIKELY(!c)) {
1265 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1266 goto error;
1267 }
1268
1269 MARK(header_field);
1270
1271 parser->index = 0;
1272 UPDATE_STATE(s_header_field);
1273
1274 switch (c) {
1275 case 'c':
1276 parser->header_state = h_C;
1277 break;
1278
1279 case 'p':
1280 parser->header_state = h_matching_proxy_connection;
1281 break;
1282
1283 case 't':
1284 parser->header_state = h_matching_transfer_encoding;
1285 break;
1286
1287 case 'u':
1288 parser->header_state = h_matching_upgrade;
1289 break;
1290
1291 default:
1292 parser->header_state = h_general;
1293 break;
1294 }
1295 break;
1296 }
1297
1298 case s_header_field:
1299 {
1300 const char* start = p;
1301 for (; p != data + len; p++) {
1302 ch = *p;
1303 c = TOKEN(ch);
1304
1305 if (!c)
1306 break;
1307
1308 switch (parser->header_state) {
1309 case h_general:
1310 break;
1311
1312 case h_C:
1313 parser->index++;
1314 parser->header_state = (c == 'o' ? h_CO : h_general);
1315 break;
1316
1317 case h_CO:
1318 parser->index++;
1319 parser->header_state = (c == 'n' ? h_CON : h_general);
1320 break;
1321
1322 case h_CON:
1323 parser->index++;
1324 switch (c) {
1325 case 'n':
1326 parser->header_state = h_matching_connection;
1327 break;
1328 case 't':
1329 parser->header_state = h_matching_content_length;
1330 break;
1331 default:
1332 parser->header_state = h_general;
1333 break;
1334 }
1335 break;
1336
1337 /* connection */
1338
1339 case h_matching_connection:
1340 parser->index++;
1341 if (parser->index > sizeof(CONNECTION)-1
1342 || c != CONNECTION[parser->index]) {
1343 parser->header_state = h_general;
1344 } else if (parser->index == sizeof(CONNECTION)-2) {
1345 parser->header_state = h_connection;
1346 }
1347 break;
1348
1349 /* proxy-connection */
1350
1351 case h_matching_proxy_connection:
1352 parser->index++;
1353 if (parser->index > sizeof(PROXY_CONNECTION)-1
1354 || c != PROXY_CONNECTION[parser->index]) {
1355 parser->header_state = h_general;
1356 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1357 parser->header_state = h_connection;
1358 }
1359 break;
1360
1361 /* content-length */
1362
1363 case h_matching_content_length:
1364 parser->index++;
1365 if (parser->index > sizeof(CONTENT_LENGTH)-1
1366 || c != CONTENT_LENGTH[parser->index]) {
1367 parser->header_state = h_general;
1368 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1369 if (parser->flags & F_CONTENTLENGTH) {
1370 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1371 goto error;
1372 }
1373 parser->header_state = h_content_length;
1374 parser->flags |= F_CONTENTLENGTH;
1375 }
1376 break;
1377
1378 /* transfer-encoding */
1379
1380 case h_matching_transfer_encoding:
1381 parser->index++;
1382 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1383 || c != TRANSFER_ENCODING[parser->index]) {
1384 parser->header_state = h_general;
1385 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1386 parser->header_state = h_transfer_encoding;
1387 }
1388 break;
1389
1390 /* upgrade */
1391
1392 case h_matching_upgrade:
1393 parser->index++;
1394 if (parser->index > sizeof(UPGRADE)-1
1395 || c != UPGRADE[parser->index]) {
1396 parser->header_state = h_general;
1397 } else if (parser->index == sizeof(UPGRADE)-2) {
1398 parser->header_state = h_upgrade;
1399 }
1400 break;
1401
1402 case h_connection:
1403 case h_content_length:
1404 case h_transfer_encoding:
1405 case h_upgrade:
1406 if (ch != ' ') parser->header_state = h_general;
1407 break;
1408
1409 default:
1410 assert(0 && "Unknown header_state");
1411 break;
1412 }
1413 }
1414
1415 COUNT_HEADER_SIZE(p - start);
1416
1417 if (p == data + len) {
1418 --p;
1419 break;
1420 }
1421
1422 if (ch == ':') {
1423 UPDATE_STATE(s_header_value_discard_ws);
1424 CALLBACK_DATA(header_field);
1425 break;
1426 }
1427
1428 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1429 goto error;
1430 }
1431
1432 case s_header_value_discard_ws:
1433 if (ch == ' ' || ch == '\t') break;
1434
1435 if (ch == CR) {
1436 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1437 break;
1438 }
1439
1440 if (ch == LF) {
1441 UPDATE_STATE(s_header_value_discard_lws);
1442 break;
1443 }
1444
1445 /* FALLTHROUGH */
1446
1447 case s_header_value_start:
1448 {
1449 MARK(header_value);
1450
1451 UPDATE_STATE(s_header_value);
1452 parser->index = 0;
1453
1454 c = LOWER(ch);
1455
1456 switch (parser->header_state) {
1457 case h_upgrade:
1458 parser->flags |= F_UPGRADE;
1459 parser->header_state = h_general;
1460 break;
1461
1462 case h_transfer_encoding:
1463 /* looking for 'Transfer-Encoding: chunked' */
1464 if ('c' == c) {
1465 parser->header_state = h_matching_transfer_encoding_chunked;
1466 } else {
1467 parser->header_state = h_general;
1468 }
1469 break;
1470
1471 case h_content_length:
1472 if (UNLIKELY(!IS_NUM(ch))) {
1473 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1474 goto error;
1475 }
1476
1477 parser->content_length = ch - '0';
1478 break;
1479
1480 case h_connection:
1481 /* looking for 'Connection: keep-alive' */
1482 if (c == 'k') {
1483 parser->header_state = h_matching_connection_keep_alive;
1484 /* looking for 'Connection: close' */
1485 } else if (c == 'c') {
1486 parser->header_state = h_matching_connection_close;
1487 } else if (c == 'u') {
1488 parser->header_state = h_matching_connection_upgrade;
1489 } else {
1490 parser->header_state = h_matching_connection_token;
1491 }
1492 break;
1493
1494 /* Multi-value `Connection` header */
1495 case h_matching_connection_token_start:
1496 break;
1497
1498 default:
1499 parser->header_state = h_general;
1500 break;
1501 }
1502 break;
1503 }
1504
1505 case s_header_value:
1506 {
1507 const char* start = p;
1508 enum header_states h_state = (enum header_states) parser->header_state;
1509 for (; p != data + len; p++) {
1510 ch = *p;
1511 if (ch == CR) {
1512 UPDATE_STATE(s_header_almost_done);
1513 parser->header_state = h_state;
1514 CALLBACK_DATA(header_value);
1515 break;
1516 }
1517
1518 if (ch == LF) {
1519 UPDATE_STATE(s_header_almost_done);
1520 COUNT_HEADER_SIZE(p - start);
1521 parser->header_state = h_state;
1522 CALLBACK_DATA_NOADVANCE(header_value);
1523 REEXECUTE();
1524 }
1525
1526 if (!lenient && !IS_HEADER_CHAR(ch)) {
1527 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1528 goto error;
1529 }
1530
1531 c = LOWER(ch);
1532
1533 switch (h_state) {
1534 case h_general:
1535 {
1536 const char* p_cr;
1537 const char* p_lf;
1538 size_t limit = data + len - p;
1539
1540 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1541
1542 p_cr = (const char*) memchr(p, CR, limit);
1543 p_lf = (const char*) memchr(p, LF, limit);
1544 if (p_cr != NULL) {
1545 if (p_lf != NULL && p_cr >= p_lf)
1546 p = p_lf;
1547 else
1548 p = p_cr;
1549 } else if (UNLIKELY(p_lf != NULL)) {
1550 p = p_lf;
1551 } else {
1552 p = data + len;
1553 }
1554 --p;
1555
1556 break;
1557 }
1558
1559 case h_connection:
1560 case h_transfer_encoding:
1561 assert(0 && "Shouldn't get here.");
1562 break;
1563
1564 case h_content_length:
1565 {
1566 uint64_t t;
1567
1568 if (ch == ' ') break;
1569
1570 if (UNLIKELY(!IS_NUM(ch))) {
1571 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1572 parser->header_state = h_state;
1573 goto error;
1574 }
1575
1576 t = parser->content_length;
1577 t *= 10;
1578 t += ch - '0';
1579
1580 /* Overflow? Test against a conservative limit for simplicity. */
1581 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1582 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1583 parser->header_state = h_state;
1584 goto error;
1585 }
1586
1587 parser->content_length = t;
1588 break;
1589 }
1590
1591 /* Transfer-Encoding: chunked */
1592 case h_matching_transfer_encoding_chunked:
1593 parser->index++;
1594 if (parser->index > sizeof(CHUNKED)-1
1595 || c != CHUNKED[parser->index]) {
1596 h_state = h_general;
1597 } else if (parser->index == sizeof(CHUNKED)-2) {
1598 h_state = h_transfer_encoding_chunked;
1599 }
1600 break;
1601
1602 case h_matching_connection_token_start:
1603 /* looking for 'Connection: keep-alive' */
1604 if (c == 'k') {
1605 h_state = h_matching_connection_keep_alive;
1606 /* looking for 'Connection: close' */
1607 } else if (c == 'c') {
1608 h_state = h_matching_connection_close;
1609 } else if (c == 'u') {
1610 h_state = h_matching_connection_upgrade;
1611 } else if (STRICT_TOKEN(c)) {
1612 h_state = h_matching_connection_token;
1613 } else if (c == ' ' || c == '\t') {
1614 /* Skip lws */
1615 } else {
1616 h_state = h_general;
1617 }
1618 break;
1619
1620 /* looking for 'Connection: keep-alive' */
1621 case h_matching_connection_keep_alive:
1622 parser->index++;
1623 if (parser->index > sizeof(KEEP_ALIVE)-1
1624 || c != KEEP_ALIVE[parser->index]) {
1625 h_state = h_matching_connection_token;
1626 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1627 h_state = h_connection_keep_alive;
1628 }
1629 break;
1630
1631 /* looking for 'Connection: close' */
1632 case h_matching_connection_close:
1633 parser->index++;
1634 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1635 h_state = h_matching_connection_token;
1636 } else if (parser->index == sizeof(CLOSE)-2) {
1637 h_state = h_connection_close;
1638 }
1639 break;
1640
1641 /* looking for 'Connection: upgrade' */
1642 case h_matching_connection_upgrade:
1643 parser->index++;
1644 if (parser->index > sizeof(UPGRADE) - 1 ||
1645 c != UPGRADE[parser->index]) {
1646 h_state = h_matching_connection_token;
1647 } else if (parser->index == sizeof(UPGRADE)-2) {
1648 h_state = h_connection_upgrade;
1649 }
1650 break;
1651
1652 case h_matching_connection_token:
1653 if (ch == ',') {
1654 h_state = h_matching_connection_token_start;
1655 parser->index = 0;
1656 }
1657 break;
1658
1659 case h_transfer_encoding_chunked:
1660 if (ch != ' ') h_state = h_general;
1661 break;
1662
1663 case h_connection_keep_alive:
1664 case h_connection_close:
1665 case h_connection_upgrade:
1666 if (ch == ',') {
1667 if (h_state == h_connection_keep_alive) {
1668 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1669 } else if (h_state == h_connection_close) {
1670 parser->flags |= F_CONNECTION_CLOSE;
1671 } else if (h_state == h_connection_upgrade) {
1672 parser->flags |= F_CONNECTION_UPGRADE;
1673 }
1674 h_state = h_matching_connection_token_start;
1675 parser->index = 0;
1676 } else if (ch != ' ') {
1677 h_state = h_matching_connection_token;
1678 }
1679 break;
1680
1681 default:
1682 UPDATE_STATE(s_header_value);
1683 h_state = h_general;
1684 break;
1685 }
1686 }
1687 parser->header_state = h_state;
1688
1689 COUNT_HEADER_SIZE(p - start);
1690
1691 if (p == data + len)
1692 --p;
1693 break;
1694 }
1695
1696 case s_header_almost_done:
1697 {
1698 if (UNLIKELY(ch != LF)) {
1699 SET_ERRNO(HPE_LF_EXPECTED);
1700 goto error;
1701 }
1702
1703 UPDATE_STATE(s_header_value_lws);
1704 break;
1705 }
1706
1707 case s_header_value_lws:
1708 {
1709 if (ch == ' ' || ch == '\t') {
1710 UPDATE_STATE(s_header_value_start);
1711 REEXECUTE();
1712 }
1713
1714 /* finished the header */
1715 switch (parser->header_state) {
1716 case h_connection_keep_alive:
1717 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1718 break;
1719 case h_connection_close:
1720 parser->flags |= F_CONNECTION_CLOSE;
1721 break;
1722 case h_transfer_encoding_chunked:
1723 parser->flags |= F_CHUNKED;
1724 break;
1725 case h_connection_upgrade:
1726 parser->flags |= F_CONNECTION_UPGRADE;
1727 break;
1728 default:
1729 break;
1730 }
1731
1732 UPDATE_STATE(s_header_field_start);
1733 REEXECUTE();
1734 }
1735
1736 case s_header_value_discard_ws_almost_done:
1737 {
1738 STRICT_CHECK(ch != LF);
1739 UPDATE_STATE(s_header_value_discard_lws);
1740 break;
1741 }
1742
1743 case s_header_value_discard_lws:
1744 {
1745 if (ch == ' ' || ch == '\t') {
1746 UPDATE_STATE(s_header_value_discard_ws);
1747 break;
1748 } else {
1749 switch (parser->header_state) {
1750 case h_connection_keep_alive:
1751 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1752 break;
1753 case h_connection_close:
1754 parser->flags |= F_CONNECTION_CLOSE;
1755 break;
1756 case h_connection_upgrade:
1757 parser->flags |= F_CONNECTION_UPGRADE;
1758 break;
1759 case h_transfer_encoding_chunked:
1760 parser->flags |= F_CHUNKED;
1761 break;
1762 default:
1763 break;
1764 }
1765
1766 /* header value was empty */
1767 MARK(header_value);
1768 UPDATE_STATE(s_header_field_start);
1769 CALLBACK_DATA_NOADVANCE(header_value);
1770 REEXECUTE();
1771 }
1772 }
1773
1774 case s_headers_almost_done:
1775 {
1776 STRICT_CHECK(ch != LF);
1777
1778 if (parser->flags & F_TRAILING) {
1779 /* End of a chunked request */
1780 UPDATE_STATE(s_message_done);
1781 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1782 REEXECUTE();
1783 }
1784
1785 /* Cannot use chunked encoding and a content-length header together
1786 per the HTTP specification. */
1787 if ((parser->flags & F_CHUNKED) &&
1788 (parser->flags & F_CONTENTLENGTH)) {
1789 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1790 goto error;
1791 }
1792
1793 UPDATE_STATE(s_headers_done);
1794
1795 /* Set this here so that on_headers_complete() callbacks can see it */
1796 parser->upgrade =
1797 ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1798 (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1799 parser->method == HTTP_CONNECT);
1800
1801 /* Here we call the headers_complete callback. This is somewhat
1802 * different than other callbacks because if the user returns 1, we
1803 * will interpret that as saying that this message has no body. This
1804 * is needed for the annoying case of recieving a response to a HEAD
1805 * request.
1806 *
1807 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1808 * we have to simulate it by handling a change in errno below.
1809 */
1810 if (settings->on_headers_complete) {
1811 switch (settings->on_headers_complete(parser)) {
1812 case 0:
1813 break;
1814
1815 case 2:
1816 parser->upgrade = 1;
1817
1818 case 1:
1819 parser->flags |= F_SKIPBODY;
1820 break;
1821
1822 default:
1823 SET_ERRNO(HPE_CB_headers_complete);
1824 RETURN(p - data); /* Error */
1825 }
1826 }
1827
1828 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1829 RETURN(p - data);
1830 }
1831
1832 REEXECUTE();
1833 }
1834
1835 case s_headers_done:
1836 {
1837 int hasBody;
1838 STRICT_CHECK(ch != LF);
1839
1840 parser->nread = 0;
1841
1842 hasBody = parser->flags & F_CHUNKED ||
1843 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1844 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1845 (parser->flags & F_SKIPBODY) || !hasBody)) {
1846 /* Exit, the rest of the message is in a different protocol. */
1847 UPDATE_STATE(NEW_MESSAGE());
1848 CALLBACK_NOTIFY(message_complete);
1849 RETURN((p - data) + 1);
1850 }
1851
1852 if (parser->flags & F_SKIPBODY) {
1853 UPDATE_STATE(NEW_MESSAGE());
1854 CALLBACK_NOTIFY(message_complete);
1855 } else if (parser->flags & F_CHUNKED) {
1856 /* chunked encoding - ignore Content-Length header */
1857 UPDATE_STATE(s_chunk_size_start);
1858 } else {
1859 if (parser->content_length == 0) {
1860 /* Content-Length header given but zero: Content-Length: 0\r\n */
1861 UPDATE_STATE(NEW_MESSAGE());
1862 CALLBACK_NOTIFY(message_complete);
1863 } else if (parser->content_length != ULLONG_MAX) {
1864 /* Content-Length header given and non-zero */
1865 UPDATE_STATE(s_body_identity);
1866 } else {
1867 if (!http_message_needs_eof(parser)) {
1868 /* Assume content-length 0 - read the next */
1869 UPDATE_STATE(NEW_MESSAGE());
1870 CALLBACK_NOTIFY(message_complete);
1871 } else {
1872 /* Read body until EOF */
1873 UPDATE_STATE(s_body_identity_eof);
1874 }
1875 }
1876 }
1877
1878 break;
1879 }
1880
1881 case s_body_identity:
1882 {
1883 uint64_t to_read = MIN(parser->content_length,
1884 (uint64_t) ((data + len) - p));
1885
1886 assert(parser->content_length != 0
1887 && parser->content_length != ULLONG_MAX);
1888
1889 /* The difference between advancing content_length and p is because
1890 * the latter will automaticaly advance on the next loop iteration.
1891 * Further, if content_length ends up at 0, we want to see the last
1892 * byte again for our message complete callback.
1893 */
1894 MARK(body);
1895 parser->content_length -= to_read;
1896 p += to_read - 1;
1897
1898 if (parser->content_length == 0) {
1899 UPDATE_STATE(s_message_done);
1900
1901 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1902 *
1903 * The alternative to doing this is to wait for the next byte to
1904 * trigger the data callback, just as in every other case. The
1905 * problem with this is that this makes it difficult for the test
1906 * harness to distinguish between complete-on-EOF and
1907 * complete-on-length. It's not clear that this distinction is
1908 * important for applications, but let's keep it for now.
1909 */
1910 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1911 REEXECUTE();
1912 }
1913
1914 break;
1915 }
1916
1917 /* read until EOF */
1918 case s_body_identity_eof:
1919 MARK(body);
1920 p = data + len - 1;
1921
1922 break;
1923
1924 case s_message_done:
1925 UPDATE_STATE(NEW_MESSAGE());
1926 CALLBACK_NOTIFY(message_complete);
1927 if (parser->upgrade) {
1928 /* Exit, the rest of the message is in a different protocol. */
1929 RETURN((p - data) + 1);
1930 }
1931 break;
1932
1933 case s_chunk_size_start:
1934 {
1935 assert(parser->nread == 1);
1936 assert(parser->flags & F_CHUNKED);
1937
1938 unhex_val = unhex[(unsigned char)ch];
1939 if (UNLIKELY(unhex_val == -1)) {
1940 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1941 goto error;
1942 }
1943
1944 parser->content_length = unhex_val;
1945 UPDATE_STATE(s_chunk_size);
1946 break;
1947 }
1948
1949 case s_chunk_size:
1950 {
1951 uint64_t t;
1952
1953 assert(parser->flags & F_CHUNKED);
1954
1955 if (ch == CR) {
1956 UPDATE_STATE(s_chunk_size_almost_done);
1957 break;
1958 }
1959
1960 unhex_val = unhex[(unsigned char)ch];
1961
1962 if (unhex_val == -1) {
1963 if (ch == ';' || ch == ' ') {
1964 UPDATE_STATE(s_chunk_parameters);
1965 break;
1966 }
1967
1968 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1969 goto error;
1970 }
1971
1972 t = parser->content_length;
1973 t *= 16;
1974 t += unhex_val;
1975
1976 /* Overflow? Test against a conservative limit for simplicity. */
1977 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1978 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1979 goto error;
1980 }
1981
1982 parser->content_length = t;
1983 break;
1984 }
1985
1986 case s_chunk_parameters:
1987 {
1988 assert(parser->flags & F_CHUNKED);
1989 /* just ignore this shit. TODO check for overflow */
1990 if (ch == CR) {
1991 UPDATE_STATE(s_chunk_size_almost_done);
1992 break;
1993 }
1994 break;
1995 }
1996
1997 case s_chunk_size_almost_done:
1998 {
1999 assert(parser->flags & F_CHUNKED);
2000 STRICT_CHECK(ch != LF);
2001
2002 parser->nread = 0;
2003
2004 if (parser->content_length == 0) {
2005 parser->flags |= F_TRAILING;
2006 UPDATE_STATE(s_header_field_start);
2007 } else {
2008 UPDATE_STATE(s_chunk_data);
2009 }
2010 CALLBACK_NOTIFY(chunk_header);
2011 break;
2012 }
2013
2014 case s_chunk_data:
2015 {
2016 uint64_t to_read = MIN(parser->content_length,
2017 (uint64_t) ((data + len) - p));
2018
2019 assert(parser->flags & F_CHUNKED);
2020 assert(parser->content_length != 0
2021 && parser->content_length != ULLONG_MAX);
2022
2023 /* See the explanation in s_body_identity for why the content
2024 * length and data pointers are managed this way.
2025 */
2026 MARK(body);
2027 parser->content_length -= to_read;
2028 p += to_read - 1;
2029
2030 if (parser->content_length == 0) {
2031 UPDATE_STATE(s_chunk_data_almost_done);
2032 }
2033
2034 break;
2035 }
2036
2037 case s_chunk_data_almost_done:
2038 assert(parser->flags & F_CHUNKED);
2039 assert(parser->content_length == 0);
2040 STRICT_CHECK(ch != CR);
2041 UPDATE_STATE(s_chunk_data_done);
2042 CALLBACK_DATA(body);
2043 break;
2044
2045 case s_chunk_data_done:
2046 assert(parser->flags & F_CHUNKED);
2047 STRICT_CHECK(ch != LF);
2048 parser->nread = 0;
2049 UPDATE_STATE(s_chunk_size_start);
2050 CALLBACK_NOTIFY(chunk_complete);
2051 break;
2052
2053 default:
2054 assert(0 && "unhandled state");
2055 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2056 goto error;
2057 }
2058 }
2059
2060 /* Run callbacks for any marks that we have leftover after we ran our of
2061 * bytes. There should be at most one of these set, so it's OK to invoke
2062 * them in series (unset marks will not result in callbacks).
2063 *
2064 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2065 * overflowed 'data' and this allows us to correct for the off-by-one that
2066 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2067 * value that's in-bounds).
2068 */
2069
2070 assert(((header_field_mark ? 1 : 0) +
2071 (header_value_mark ? 1 : 0) +
2072 (url_mark ? 1 : 0) +
2073 (body_mark ? 1 : 0) +
2074 (status_mark ? 1 : 0)) <= 1);
2075
2076 CALLBACK_DATA_NOADVANCE(header_field);
2077 CALLBACK_DATA_NOADVANCE(header_value);
2078 CALLBACK_DATA_NOADVANCE(url);
2079 CALLBACK_DATA_NOADVANCE(body);
2080 CALLBACK_DATA_NOADVANCE(status);
2081
2082 RETURN(len);
2083
2084 error:
2085 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2086 SET_ERRNO(HPE_UNKNOWN);
2087 }
2088
2089 RETURN(p - data);
2090 }
2091
2092
2093 /* Does the parser need to see an EOF to find the end of the message? */
2094 int
http_message_needs_eof(const http_parser * parser)2095 http_message_needs_eof (const http_parser *parser)
2096 {
2097 if (parser->type == HTTP_REQUEST) {
2098 return 0;
2099 }
2100
2101 /* See RFC 2616 section 4.4 */
2102 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2103 parser->status_code == 204 || /* No Content */
2104 parser->status_code == 304 || /* Not Modified */
2105 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2106 return 0;
2107 }
2108
2109 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2110 return 0;
2111 }
2112
2113 return 1;
2114 }
2115
2116
2117 int
http_should_keep_alive(const http_parser * parser)2118 http_should_keep_alive (const http_parser *parser)
2119 {
2120 if (parser->http_major > 0 && parser->http_minor > 0) {
2121 /* HTTP/1.1 */
2122 if (parser->flags & F_CONNECTION_CLOSE) {
2123 return 0;
2124 }
2125 } else {
2126 /* HTTP/1.0 or earlier */
2127 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2128 return 0;
2129 }
2130 }
2131
2132 return !http_message_needs_eof(parser);
2133 }
2134
2135
2136 const char *
http_method_str(enum http_method m)2137 http_method_str (enum http_method m)
2138 {
2139 return ELEM_AT(method_strings, m, "<unknown>");
2140 }
2141
2142
2143 void
http_parser_init(http_parser * parser,enum http_parser_type t)2144 http_parser_init (http_parser *parser, enum http_parser_type t)
2145 {
2146 void *data = parser->data; /* preserve application data */
2147 memset(parser, 0, sizeof(*parser));
2148 parser->data = data;
2149 parser->type = t;
2150 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2151 parser->http_errno = HPE_OK;
2152 }
2153
2154 void
http_parser_settings_init(http_parser_settings * settings)2155 http_parser_settings_init(http_parser_settings *settings)
2156 {
2157 memset(settings, 0, sizeof(*settings));
2158 }
2159
2160 const char *
http_errno_name(enum http_errno err)2161 http_errno_name(enum http_errno err) {
2162 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2163 return http_strerror_tab[err].name;
2164 }
2165
2166 const char *
http_errno_description(enum http_errno err)2167 http_errno_description(enum http_errno err) {
2168 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2169 return http_strerror_tab[err].description;
2170 }
2171
2172 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2173 http_parse_host_char(enum http_host_state s, const char ch) {
2174 switch(s) {
2175 case s_http_userinfo:
2176 case s_http_userinfo_start:
2177 if (ch == '@') {
2178 return s_http_host_start;
2179 }
2180
2181 if (IS_USERINFO_CHAR(ch)) {
2182 return s_http_userinfo;
2183 }
2184 break;
2185
2186 case s_http_host_start:
2187 if (ch == '[') {
2188 return s_http_host_v6_start;
2189 }
2190
2191 if (IS_HOST_CHAR(ch)) {
2192 return s_http_host;
2193 }
2194
2195 break;
2196
2197 case s_http_host:
2198 if (IS_HOST_CHAR(ch)) {
2199 return s_http_host;
2200 }
2201
2202 /* FALLTHROUGH */
2203 case s_http_host_v6_end:
2204 if (ch == ':') {
2205 return s_http_host_port_start;
2206 }
2207
2208 break;
2209
2210 case s_http_host_v6:
2211 if (ch == ']') {
2212 return s_http_host_v6_end;
2213 }
2214
2215 /* FALLTHROUGH */
2216 case s_http_host_v6_start:
2217 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2218 return s_http_host_v6;
2219 }
2220
2221 if (s == s_http_host_v6 && ch == '%') {
2222 return s_http_host_v6_zone_start;
2223 }
2224 break;
2225
2226 case s_http_host_v6_zone:
2227 if (ch == ']') {
2228 return s_http_host_v6_end;
2229 }
2230
2231 /* FALLTHROUGH */
2232 case s_http_host_v6_zone_start:
2233 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2234 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2235 ch == '~') {
2236 return s_http_host_v6_zone;
2237 }
2238 break;
2239
2240 case s_http_host_port:
2241 case s_http_host_port_start:
2242 if (IS_NUM(ch)) {
2243 return s_http_host_port;
2244 }
2245
2246 break;
2247
2248 default:
2249 break;
2250 }
2251 return s_http_host_dead;
2252 }
2253
2254 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2255 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2256 assert(u->field_set & (1 << UF_HOST));
2257 enum http_host_state s;
2258
2259 const char *p;
2260 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2261
2262 u->field_data[UF_HOST].len = 0;
2263
2264 s = found_at ? s_http_userinfo_start : s_http_host_start;
2265
2266 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2267 enum http_host_state new_s = http_parse_host_char(s, *p);
2268
2269 if (new_s == s_http_host_dead) {
2270 return 1;
2271 }
2272
2273 switch(new_s) {
2274 case s_http_host:
2275 if (s != s_http_host) {
2276 u->field_data[UF_HOST].off = p - buf;
2277 }
2278 u->field_data[UF_HOST].len++;
2279 break;
2280
2281 case s_http_host_v6:
2282 if (s != s_http_host_v6) {
2283 u->field_data[UF_HOST].off = p - buf;
2284 }
2285 u->field_data[UF_HOST].len++;
2286 break;
2287
2288 case s_http_host_v6_zone_start:
2289 case s_http_host_v6_zone:
2290 u->field_data[UF_HOST].len++;
2291 break;
2292
2293 case s_http_host_port:
2294 if (s != s_http_host_port) {
2295 u->field_data[UF_PORT].off = p - buf;
2296 u->field_data[UF_PORT].len = 0;
2297 u->field_set |= (1 << UF_PORT);
2298 }
2299 u->field_data[UF_PORT].len++;
2300 break;
2301
2302 case s_http_userinfo:
2303 if (s != s_http_userinfo) {
2304 u->field_data[UF_USERINFO].off = p - buf ;
2305 u->field_data[UF_USERINFO].len = 0;
2306 u->field_set |= (1 << UF_USERINFO);
2307 }
2308 u->field_data[UF_USERINFO].len++;
2309 break;
2310
2311 default:
2312 break;
2313 }
2314 s = new_s;
2315 }
2316
2317 /* Make sure we don't end somewhere unexpected */
2318 switch (s) {
2319 case s_http_host_start:
2320 case s_http_host_v6_start:
2321 case s_http_host_v6:
2322 case s_http_host_v6_zone_start:
2323 case s_http_host_v6_zone:
2324 case s_http_host_port_start:
2325 case s_http_userinfo:
2326 case s_http_userinfo_start:
2327 return 1;
2328 default:
2329 break;
2330 }
2331
2332 return 0;
2333 }
2334
2335 void
http_parser_url_init(struct http_parser_url * u)2336 http_parser_url_init(struct http_parser_url *u) {
2337 memset(u, 0, sizeof(*u));
2338 }
2339
2340 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2341 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2342 struct http_parser_url *u)
2343 {
2344 enum state s;
2345 const char *p;
2346 enum http_parser_url_fields uf, old_uf;
2347 int found_at = 0;
2348
2349 u->port = u->field_set = 0;
2350 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2351 old_uf = UF_MAX;
2352
2353 for (p = buf; p < buf + buflen; p++) {
2354 s = parse_url_char(s, *p);
2355
2356 /* Figure out the next field that we're operating on */
2357 switch (s) {
2358 case s_dead:
2359 return 1;
2360
2361 /* Skip delimeters */
2362 case s_req_schema_slash:
2363 case s_req_schema_slash_slash:
2364 case s_req_server_start:
2365 case s_req_query_string_start:
2366 case s_req_fragment_start:
2367 continue;
2368
2369 case s_req_schema:
2370 uf = UF_SCHEMA;
2371 break;
2372
2373 case s_req_server_with_at:
2374 found_at = 1;
2375
2376 /* FALLTROUGH */
2377 case s_req_server:
2378 uf = UF_HOST;
2379 break;
2380
2381 case s_req_path:
2382 uf = UF_PATH;
2383 break;
2384
2385 case s_req_query_string:
2386 uf = UF_QUERY;
2387 break;
2388
2389 case s_req_fragment:
2390 uf = UF_FRAGMENT;
2391 break;
2392
2393 default:
2394 assert(!"Unexpected state");
2395 return 1;
2396 }
2397
2398 /* Nothing's changed; soldier on */
2399 if (uf == old_uf) {
2400 u->field_data[uf].len++;
2401 continue;
2402 }
2403
2404 u->field_data[uf].off = p - buf;
2405 u->field_data[uf].len = 1;
2406
2407 u->field_set |= (1 << uf);
2408 old_uf = uf;
2409 }
2410
2411 /* host must be present if there is a schema */
2412 /* parsing http:///toto will fail */
2413 if ((u->field_set & (1 << UF_SCHEMA)) &&
2414 (u->field_set & (1 << UF_HOST)) == 0) {
2415 return 1;
2416 }
2417
2418 if (u->field_set & (1 << UF_HOST)) {
2419 if (http_parse_host(buf, u, found_at) != 0) {
2420 return 1;
2421 }
2422 }
2423
2424 /* CONNECT requests can only contain "hostname:port" */
2425 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2426 return 1;
2427 }
2428
2429 if (u->field_set & (1 << UF_PORT)) {
2430 /* Don't bother with endp; we've already validated the string */
2431 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2432
2433 /* Ports have a max value of 2^16 */
2434 if (v > 0xffff) {
2435 return 1;
2436 }
2437
2438 u->port = (uint16_t) v;
2439 }
2440
2441 return 0;
2442 }
2443
2444 void
http_parser_pause(http_parser * parser,int paused)2445 http_parser_pause(http_parser *parser, int paused) {
2446 /* Users should only be pausing/unpausing a parser that is not in an error
2447 * state. In non-debug builds, there's not much that we can do about this
2448 * other than ignore it.
2449 */
2450 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2451 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2452 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2453 } else {
2454 assert(0 && "Attempting to pause parser in error state");
2455 }
2456 }
2457
2458 int
http_body_is_final(const struct http_parser * parser)2459 http_body_is_final(const struct http_parser *parser) {
2460 return parser->state == s_message_done;
2461 }
2462
2463 unsigned long
http_parser_version(void)2464 http_parser_version(void) {
2465 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2466 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2467 HTTP_PARSER_VERSION_PATCH * 0x00001;
2468 }
2469