1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "quiche/balsa/balsa_frame.h"
6
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstring>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <utility>
15
16 #include "absl/strings/match.h"
17 #include "absl/strings/numbers.h"
18 #include "absl/strings/string_view.h"
19 #include "quiche/balsa/balsa_enums.h"
20 #include "quiche/balsa/balsa_headers.h"
21 #include "quiche/balsa/balsa_visitor_interface.h"
22 #include "quiche/balsa/header_properties.h"
23 #include "quiche/common/platform/api/quiche_logging.h"
24
25 // When comparing characters (other than == and !=), cast to unsigned char
26 // to make sure values above 127 rank as expected, even on platforms where char
27 // is signed and thus such values are represented as negative numbers before the
28 // cast.
29 #define CHAR_LT(a, b) \
30 (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
31 #define CHAR_LE(a, b) \
32 (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
33 #define CHAR_GT(a, b) \
34 (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
35 #define CHAR_GE(a, b) \
36 (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
37 #define QUICHE_DCHECK_CHAR_GE(a, b) \
38 QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
39
40 namespace quiche {
41
42 namespace {
43
44 constexpr size_t kContinueStatusCode = 100;
45 constexpr size_t kSwitchingProtocolsStatusCode = 101;
46
47 constexpr absl::string_view kChunked = "chunked";
48 constexpr absl::string_view kContentLength = "content-length";
49 constexpr absl::string_view kIdentity = "identity";
50 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
51
IsInterimResponse(size_t response_code)52 bool IsInterimResponse(size_t response_code) {
53 return response_code >= 100 && response_code < 200;
54 }
55
56 } // namespace
57
Reset()58 void BalsaFrame::Reset() {
59 last_char_was_slash_r_ = false;
60 saw_non_newline_char_ = false;
61 start_was_space_ = true;
62 chunk_length_character_extracted_ = false;
63 // is_request_ = true; // not reset between messages.
64 allow_reading_until_close_for_request_ = false;
65 // request_was_head_ = false; // not reset between messages.
66 // max_header_length_ = 16 * 1024; // not reset between messages.
67 // visitor_ = &do_nothing_visitor_; // not reset between messages.
68 chunk_length_remaining_ = 0;
69 content_length_remaining_ = 0;
70 last_slash_n_idx_ = 0;
71 term_chars_ = 0;
72 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
73 last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
74 invalid_chars_.clear();
75 lines_.clear();
76 if (continue_headers_ != nullptr) {
77 continue_headers_->Clear();
78 }
79 if (headers_ != nullptr) {
80 headers_->Clear();
81 }
82 trailer_lines_.clear();
83 start_of_trailer_line_ = 0;
84 trailer_length_ = 0;
85 if (trailer_ != nullptr) {
86 trailer_->Clear();
87 }
88 if (trailers_ != nullptr) {
89 trailers_->Clear();
90 }
91 }
92
93 namespace {
94
95 // Within the line bounded by [current, end), parses a single "island",
96 // comprising a (possibly empty) span of whitespace followed by a (possibly
97 // empty) span of non-whitespace.
98 //
99 // Returns a pointer to the first whitespace character beyond this island, or
100 // returns end if no additional whitespace characters are present after this
101 // island. (I.e., returnvalue == end || *returnvalue > ' ')
102 //
103 // Upon return, the whitespace span are the characters
104 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
105 // non-whitespace span are the characters whose indices fall in
106 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)107 inline const char* ParseOneIsland(const char* current, const char* begin,
108 const char* end, size_t* first_whitespace,
109 size_t* first_nonwhite) {
110 *first_whitespace = current - begin;
111 while (current < end && CHAR_LE(*current, ' ')) {
112 ++current;
113 }
114 *first_nonwhite = current - begin;
115 while (current < end && CHAR_GT(*current, ' ')) {
116 ++current;
117 }
118 return current;
119 }
120
121 } // namespace
122
123 // Summary:
124 // Parses the first line of either a request or response.
125 // Note that in the case of a detected warning, error_code will be set
126 // but the function will not return false.
127 // Exactly zero or one warning or error (but not both) may be detected
128 // by this function.
129 // Note that this function will not write the data of the first-line
130 // into the header's buffer (that should already have been done elsewhere).
131 //
132 // Pre-conditions:
133 // begin != end
134 // *begin should be a character which is > ' '. This implies that there
135 // is at least one non-whitespace characters between [begin, end).
136 // headers is a valid pointer to a BalsaHeaders class.
137 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
138 // Entire first line must exist between [begin, end)
139 // Exactly zero or one newlines -may- exist between [begin, end)
140 // [begin, end) should exist in the header's buffer.
141 //
142 // Side-effects:
143 // headers will be modified
144 // error_code may be modified if either a warning or error is detected
145 //
146 // Returns:
147 // True if no error (as opposed to warning) is detected.
148 // False if an error (as opposed to warning) is detected.
149
150 //
151 // If there is indeed non-whitespace in the line, then the following
152 // will take care of this for you:
153 // while (*begin <= ' ') ++begin;
154 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);
155 //
156
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)157 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
158 BalsaHeaders* headers,
159 BalsaFrameEnums::ErrorCode* error_code) {
160 while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
161 --end;
162 }
163
164 const char* current =
165 ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
166 &headers->non_whitespace_1_idx_);
167 current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
168 &headers->non_whitespace_2_idx_);
169 current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
170 &headers->non_whitespace_3_idx_);
171
172 // Clean up any trailing whitespace that comes after the third island
173 const char* last = end;
174 while (current <= last && CHAR_LE(*last, ' ')) {
175 --last;
176 }
177 headers->whitespace_4_idx_ = last - begin + 1;
178
179 // Either the passed-in line is empty, or it starts with a non-whitespace
180 // character.
181 QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
182
183 QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
184 QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
185
186 // If the line isn't empty, it has at least one non-whitespace character (see
187 // first QUICHE_DCHECK), which will have been identified as a non-empty
188 // [non_whitespace_1_idx_, whitespace_2_idx_).
189 QUICHE_DCHECK(begin == end ||
190 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
191
192 if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
193 // This error may be triggered if the second token is empty, OR there's no
194 // WS after the first token; we don't bother to distinguish exactly which.
195 // (I'm not sure why we distinguish different kinds of parse error at all,
196 // actually.)
197 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
198 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
199 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
200 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
201 static_cast<int>(is_request));
202 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
203 return false;
204 }
205 }
206 if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
207 if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
208 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
209 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
210 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
211 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
212 static_cast<int>(is_request));
213 }
214 }
215
216 if (!is_request) {
217 headers->parsed_response_code_ = 0;
218 // If the response code is non-empty:
219 if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
220 if (!absl::SimpleAtoi(
221 absl::string_view(begin + headers->non_whitespace_2_idx_,
222 headers->non_whitespace_3_idx_ -
223 headers->non_whitespace_2_idx_),
224 &headers->parsed_response_code_)) {
225 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
226 return false;
227 }
228 }
229 }
230
231 return true;
232 }
233
234 // begin - beginning of the firstline
235 // end - end of the firstline
236 //
237 // A precondition for this function is that there is non-whitespace between
238 // [begin, end). If this precondition is not met, the function will not perform
239 // as expected (and bad things may happen, and it will eat your first, second,
240 // and third unborn children!).
241 //
242 // Another precondition for this function is that [begin, end) includes
243 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)244 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
245 BalsaFrameEnums::ErrorCode previous_error = last_error_;
246 if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
247 parse_state_ = BalsaFrameEnums::ERROR;
248 HandleError(last_error_);
249 return;
250 }
251 if (previous_error != last_error_) {
252 HandleWarning(last_error_);
253 }
254
255 const absl::string_view line_input(
256 begin + headers_->non_whitespace_1_idx_,
257 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
258 const absl::string_view part1(
259 begin + headers_->non_whitespace_1_idx_,
260 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
261 const absl::string_view part2(
262 begin + headers_->non_whitespace_2_idx_,
263 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
264 const absl::string_view part3(
265 begin + headers_->non_whitespace_3_idx_,
266 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
267
268 if (is_request_) {
269 visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
270 if (part3.empty()) {
271 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
272 }
273 return;
274 }
275
276 visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
277 }
278
279 // 'stream_begin' points to the first character of the headers buffer.
280 // 'line_begin' points to the first character of the line.
281 // 'current' points to a char which is ':'.
282 // 'line_end' points to the position of '\n' + 1.
283 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)284 void BalsaFrame::CleanUpKeyValueWhitespace(
285 const char* stream_begin, const char* line_begin, const char* current,
286 const char* line_end, HeaderLineDescription* current_header_line) {
287 const char* colon_loc = current;
288 QUICHE_DCHECK_LT(colon_loc, line_end);
289 QUICHE_DCHECK_EQ(':', *colon_loc);
290 QUICHE_DCHECK_EQ(':', *current);
291 QUICHE_DCHECK_CHAR_GE(' ', *line_end)
292 << "\"" << std::string(line_begin, line_end) << "\"";
293
294 --current;
295 while (current > line_begin && CHAR_LE(*current, ' ')) {
296 --current;
297 }
298 current += static_cast<int>(current != colon_loc);
299 current_header_line->key_end_idx = current - stream_begin;
300
301 current = colon_loc;
302 QUICHE_DCHECK_EQ(':', *current);
303 ++current;
304 while (current < line_end && CHAR_LE(*current, ' ')) {
305 ++current;
306 }
307 current_header_line->value_begin_idx = current - stream_begin;
308
309 QUICHE_DCHECK_GE(current_header_line->key_end_idx,
310 current_header_line->first_char_idx);
311 QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
312 current_header_line->key_end_idx);
313 QUICHE_DCHECK_GE(current_header_line->last_char_idx,
314 current_header_line->value_begin_idx);
315 }
316
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)317 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
318 bool is_trailer,
319 BalsaHeaders* headers) {
320 QUICHE_DCHECK(!lines.empty());
321 const char* stream_begin = headers->OriginalHeaderStreamBegin();
322 // The last line is always just a newline (and is uninteresting).
323 const Lines::size_type lines_size_m1 = lines.size() - 1;
324 // For a trailer, there is no first line, so lines[0] is the first header.
325 // For real headers, the first line takes lines[0], so real header starts
326 // at index 1.
327 int first_header_idx = (is_trailer ? 0 : 1);
328 const char* current = stream_begin + lines[first_header_idx].first;
329 // This code is a bit more subtle than it may appear at first glance.
330 // This code looks for a colon in the current line... but it also looks
331 // beyond the current line. If there is no colon in the current line, then
332 // for each subsequent line (until the colon which -has- been found is
333 // associated with a line), no searching for a colon will be performed. In
334 // this way, we minimize the amount of bytes we have scanned for a colon.
335 for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
336 const char* line_begin = stream_begin + lines[i].first;
337
338 // Here we handle possible continuations. Note that we do not replace
339 // the '\n' in the line before a continuation (at least, as of now),
340 // which implies that any code which looks for a value must deal with
341 // "\r\n", etc -within- the line (and not just at the end of it).
342 for (++i; i < lines_size_m1; ++i) {
343 const char c = *(stream_begin + lines[i].first);
344 if (CHAR_GT(c, ' ')) {
345 // Not a continuation, so stop. Note that if the 'original' i = 1,
346 // and the next line is not a continuation, we'll end up with i = 2
347 // when we break. This handles the incrementing of i for the outer
348 // loop.
349 break;
350 }
351
352 // Space and tab are valid starts to continuation lines.
353 // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
354 // can choose to reject or normalize continuation lines.
355 if ((c != ' ' && c != '\t') ||
356 http_validation_policy().disallow_header_continuation_lines) {
357 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
358 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
359 return false;
360 }
361
362 // If disallow_header_continuation_lines() is false, we neither reject nor
363 // normalize continuation lines, in violation of RFC7230.
364 }
365 const char* line_end = stream_begin + lines[i - 1].second;
366 QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
367
368 // We cleanup the whitespace at the end of the line before doing anything
369 // else of interest as it allows us to do nothing when irregularly formatted
370 // headers are parsed (e.g. those with only keys, only values, or no colon).
371 //
372 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
373 --line_end;
374 QUICHE_DCHECK_EQ('\n', *line_end)
375 << "\"" << std::string(line_begin, line_end) << "\"";
376 while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
377 --line_end;
378 }
379 ++line_end;
380 QUICHE_DCHECK_CHAR_GE(' ', *line_end);
381 QUICHE_DCHECK_LT(line_begin, line_end);
382
383 // We use '0' for the block idx, because we're always writing to the first
384 // block from the framer (we do this because the framer requires that the
385 // entire header sequence be in a contiguous buffer).
386 headers->header_lines_.push_back(HeaderLineDescription(
387 line_begin - stream_begin, line_end - stream_begin,
388 line_end - stream_begin, line_end - stream_begin, 0));
389 if (current >= line_end) {
390 if (http_validation_policy().require_header_colon) {
391 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
392 : BalsaFrameEnums::HEADER_MISSING_COLON);
393 return false;
394 }
395 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
396 : BalsaFrameEnums::HEADER_MISSING_COLON);
397 // Then the next colon will not be found within this header line-- time
398 // to try again with another header-line.
399 continue;
400 }
401 if (current < line_begin) {
402 // When this condition is true, the last detected colon was part of a
403 // previous line. We reset to the beginning of the line as we don't care
404 // about the presence of any colon before the beginning of the current
405 // line.
406 current = line_begin;
407 }
408 for (; current < line_end; ++current) {
409 if (*current == ':') {
410 break;
411 }
412
413 // Generally invalid characters were found earlier.
414 if (http_validation_policy().disallow_double_quote_in_header_name) {
415 if (header_properties::IsInvalidHeaderKeyChar(*current)) {
416 HandleError(is_trailer
417 ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
418 : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
419 return false;
420 }
421 } else if (header_properties::IsInvalidHeaderKeyCharAllowDoubleQuote(
422 *current)) {
423 HandleError(is_trailer
424 ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
425 : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
426 return false;
427 }
428 }
429
430 if (current == line_end) {
431 // There was no colon in the line. The arguments we passed into the
432 // construction for the HeaderLineDescription object should be OK-- it
433 // assumes that the entire content is 'key' by default (which is true, as
434 // there was no colon, there can be no value). Note that this is a
435 // construct which is technically not allowed by the spec.
436
437 // In strict mode, we do treat this invalid value-less key as an error.
438 if (http_validation_policy().require_header_colon) {
439 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
440 : BalsaFrameEnums::HEADER_MISSING_COLON);
441 return false;
442 }
443 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
444 : BalsaFrameEnums::HEADER_MISSING_COLON);
445 continue;
446 }
447
448 QUICHE_DCHECK_EQ(*current, ':');
449 QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
450 QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
451
452 HeaderLineDescription& current_header_line = headers->header_lines_.back();
453 current_header_line.key_end_idx = current - stream_begin;
454 current_header_line.value_begin_idx = current_header_line.key_end_idx;
455 if (current < line_end) {
456 ++current_header_line.key_end_idx;
457
458 CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
459 ¤t_header_line);
460 }
461
462 const absl::string_view key(
463 stream_begin + current_header_line.first_char_idx,
464 current_header_line.key_end_idx - current_header_line.first_char_idx);
465 const absl::string_view value(
466 stream_begin + current_header_line.value_begin_idx,
467 current_header_line.last_char_idx -
468 current_header_line.value_begin_idx);
469 visitor_->OnHeader(key, value);
470 }
471
472 return true;
473 }
474
HandleWarning(BalsaFrameEnums::ErrorCode error_code)475 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
476 last_error_ = error_code;
477 visitor_->HandleWarning(last_error_);
478 }
479
HandleError(BalsaFrameEnums::ErrorCode error_code)480 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
481 last_error_ = error_code;
482 parse_state_ = BalsaFrameEnums::ERROR;
483 visitor_->HandleError(last_error_);
484 }
485
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)486 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
487 HeaderLines::size_type line_idx, size_t* length) {
488 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
489 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
490 const char* line_end = stream_begin + header_line.last_char_idx;
491 const char* value_begin = (stream_begin + header_line.value_begin_idx);
492
493 if (value_begin >= line_end) {
494 // There is no non-whitespace value data.
495 QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
496 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
497 }
498
499 *length = 0;
500 while (value_begin < line_end) {
501 if (*value_begin < '0' || *value_begin > '9') {
502 // bad! content-length found, and couldn't parse all of it!
503 QUICHE_DVLOG(1)
504 << "invalid content-length - non numeric character detected";
505 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
506 }
507 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
508 size_t length_x_10 = *length * 10;
509 const size_t c = *value_begin - '0';
510 if (*length > kMaxDiv10 ||
511 (std::numeric_limits<size_t>::max() - length_x_10) < c) {
512 QUICHE_DVLOG(1) << "content-length overflow";
513 return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
514 }
515 *length = length_x_10 + c;
516 ++value_begin;
517 }
518 QUICHE_DVLOG(1) << "content_length parsed: " << *length;
519 return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
520 }
521
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)522 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
523 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
524 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
525 const absl::string_view transfer_encoding(
526 stream_begin + header_line.value_begin_idx,
527 header_line.last_char_idx - header_line.value_begin_idx);
528
529 if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
530 headers_->transfer_encoding_is_chunked_ = true;
531 return;
532 }
533
534 if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
535 headers_->transfer_encoding_is_chunked_ = false;
536 return;
537 }
538
539 if (http_validation_policy().validate_transfer_encoding) {
540 HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
541 }
542 }
543
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)544 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
545 const BalsaHeaders* headers) {
546 // Read from the beginning of the first line to the end of the last line.
547 // Note we need to add the first line's offset as in the case of a trailer
548 // it's non-zero.
549 const char* stream_begin =
550 headers->OriginalHeaderStreamBegin() + lines.front().first;
551 const char* stream_end =
552 headers->OriginalHeaderStreamBegin() + lines.back().second;
553 bool found_invalid = false;
554
555 for (const char* c = stream_begin; c < stream_end; c++) {
556 if (header_properties::IsInvalidHeaderChar(*c)) {
557 found_invalid = true;
558 invalid_chars_[*c]++;
559 }
560 }
561
562 return found_invalid;
563 }
564
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)565 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
566 BalsaHeaders* headers) {
567 QUICHE_DCHECK(!lines.empty());
568 QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
569
570 if ((is_request() || http_validation_policy()
571 .disallow_invalid_header_characters_in_response) &&
572 track_invalid_chars()) {
573 if (CheckHeaderLinesForInvalidChars(lines, headers)) {
574 if (invalid_chars_error_enabled()) {
575 HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
576 return;
577 }
578
579 HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
580 }
581 }
582
583 // There is no need to attempt to process headers (resp. trailers)
584 // if no header (resp. trailer) lines exist.
585 //
586 // The last line of the message, which is an empty line, is never a header
587 // (resp. trailer) line. Furthermore, the first line of the message is not
588 // a header line. Therefore there are at least two (resp. one) lines in the
589 // message which are not header (resp. trailer) lines.
590 //
591 // Thus, we test to see if we have more than two (resp. one) lines total
592 // before attempting to parse any header (resp. trailer) lines.
593 if (lines.size() <= (is_trailer ? 1 : 2)) {
594 return;
595 }
596
597 HeaderLines::size_type content_length_idx = 0;
598 HeaderLines::size_type transfer_encoding_idx = 0;
599 const char* stream_begin = headers->OriginalHeaderStreamBegin();
600 // Parse the rest of the header or trailer data into key-value pairs.
601 if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
602 return;
603 }
604 // At this point, we've parsed all of the headers/trailers. Time to look
605 // for those headers which we require for framing or for format errors.
606 const HeaderLines::size_type lines_size = headers->header_lines_.size();
607 for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
608 const HeaderLineDescription& line = headers->header_lines_[i];
609 const absl::string_view key(stream_begin + line.first_char_idx,
610 line.key_end_idx - line.first_char_idx);
611 QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
612
613 // If a header begins with either lowercase or uppercase 'c' or 't', then
614 // the header may be one of content-length, connection, content-encoding
615 // or transfer-encoding. These headers are special, as they change the way
616 // that the message is framed, and so the framer is required to search
617 // for them. However, first check for a formatting error, and skip
618 // special header treatment on trailer lines (when is_trailer is true).
619 if (key.empty() || key[0] == ' ') {
620 parse_state_ = BalsaFrameEnums::ERROR;
621 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
622 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
623 return;
624 }
625 if (is_trailer) {
626 continue;
627 }
628 if (absl::EqualsIgnoreCase(key, kContentLength)) {
629 size_t length = 0;
630 BalsaHeadersEnums::ContentLengthStatus content_length_status =
631 ProcessContentLengthLine(i, &length);
632 if (content_length_idx == 0) {
633 content_length_idx = i + 1;
634 headers->content_length_status_ = content_length_status;
635 headers->content_length_ = length;
636 content_length_remaining_ = length;
637 continue;
638 }
639 if ((headers->content_length_status_ != content_length_status) ||
640 ((headers->content_length_status_ ==
641 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
642 (http_validation_policy().disallow_multiple_content_length ||
643 length != headers->content_length_))) {
644 HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
645 return;
646 }
647 continue;
648 }
649 if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
650 if (http_validation_policy().validate_transfer_encoding &&
651 transfer_encoding_idx != 0) {
652 HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
653 return;
654 }
655 transfer_encoding_idx = i + 1;
656 }
657 }
658
659 if (!is_trailer) {
660 if (http_validation_policy().validate_transfer_encoding &&
661 http_validation_policy()
662 .disallow_transfer_encoding_with_content_length &&
663 content_length_idx != 0 && transfer_encoding_idx != 0) {
664 HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
665 return;
666 }
667 if (headers->transfer_encoding_is_chunked_) {
668 headers->content_length_ = 0;
669 headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
670 content_length_remaining_ = 0;
671 }
672 if (transfer_encoding_idx != 0) {
673 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
674 }
675 }
676 }
677
AssignParseStateAfterHeadersHaveBeenParsed()678 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
679 // For responses, can't have a body if the request was a HEAD, or if it is
680 // one of these response-codes. rfc2616 section 4.3
681 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
682 int response_code = headers_->parsed_response_code_;
683 if (!is_request_ && (request_was_head_ ||
684 !BalsaHeaders::ResponseCanHaveBody(response_code))) {
685 // There is no body.
686 return;
687 }
688
689 if (headers_->transfer_encoding_is_chunked_) {
690 // Note that
691 // if ( Transfer-Encoding: chunked && Content-length: )
692 // then Transfer-Encoding: chunked trumps.
693 // This is as specified in the spec.
694 // rfc2616 section 4.4.3
695 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
696 return;
697 }
698
699 // Errors parsing content-length definitely can cause
700 // protocol errors/warnings
701 switch (headers_->content_length_status_) {
702 // If we have a content-length, and it is parsed
703 // properly, there are two options.
704 // 1) zero content, in which case the message is done, and
705 // 2) nonzero content, in which case we have to
706 // consume the body.
707 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
708 if (headers_->content_length_ == 0) {
709 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
710 } else {
711 parse_state_ = BalsaFrameEnums::READING_CONTENT;
712 }
713 break;
714 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
715 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
716 // If there were characters left-over after parsing the
717 // content length, we should flag an error and stop.
718 HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
719 break;
720 // We can have: no transfer-encoding, no content length, and no
721 // connection: close...
722 // Unfortunately, this case doesn't seem to be covered in the spec.
723 // We'll assume that the safest thing to do here is what the google
724 // binaries before 2008 already do, which is to assume that
725 // everything until the connection is closed is body.
726 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
727 if (is_request_) {
728 const absl::string_view method = headers_->request_method();
729 // POSTs and PUTs should have a detectable body length. If they
730 // do not we consider it an error.
731 if ((method != "POST" && method != "PUT") ||
732 !http_validation_policy().require_content_length_if_body_required) {
733 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
734 break;
735 } else if (!allow_reading_until_close_for_request_) {
736 HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
737 break;
738 }
739 }
740 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
741 HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
742 break;
743 // The COV_NF_... statements here provide hints to the apparatus
744 // which computes coverage reports/ratios that this code is never
745 // intended to be executed, and should technically be impossible.
746 // COV_NF_START
747 default:
748 QUICHE_LOG(FATAL) << "Saw a content_length_status: "
749 << headers_->content_length_status_
750 << " which is unknown.";
751 // COV_NF_END
752 }
753 }
754
ProcessHeaders(const char * message_start,size_t message_length)755 size_t BalsaFrame::ProcessHeaders(const char* message_start,
756 size_t message_length) {
757 const char* const original_message_start = message_start;
758 const char* const message_end = message_start + message_length;
759 const char* message_current = message_start;
760 const char* checkpoint = message_start;
761
762 if (message_length == 0) {
763 return message_current - original_message_start;
764 }
765
766 while (message_current < message_end) {
767 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
768
769 // Yes, we could use strchr (assuming null termination), or
770 // memchr, but as it turns out that is slower than this tight loop
771 // for the input that we see.
772 if (!saw_non_newline_char_) {
773 do {
774 const char c = *message_current;
775 if (c != '\r' && c != '\n') {
776 if (CHAR_LE(c, ' ')) {
777 HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
778 return message_current - original_message_start;
779 }
780 break;
781 }
782 ++message_current;
783 if (message_current == message_end) {
784 return message_current - original_message_start;
785 }
786 } while (true);
787 saw_non_newline_char_ = true;
788 message_start = message_current;
789 checkpoint = message_current;
790 }
791 while (message_current < message_end) {
792 if (*message_current != '\n') {
793 ++message_current;
794 continue;
795 }
796 const size_t relative_idx = message_current - message_start;
797 const size_t message_current_idx = 1 + base_idx + relative_idx;
798 lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
799 if (lines_.size() == 1) {
800 headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
801 checkpoint = message_current + 1;
802 const char* begin = headers_->OriginalHeaderStreamBegin();
803
804 QUICHE_DVLOG(1) << "First line "
805 << std::string(begin, lines_[0].second);
806 QUICHE_DVLOG(1) << "is_request_: " << is_request_;
807 ProcessFirstLine(begin, begin + lines_[0].second);
808 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
809 break;
810 }
811
812 if (parse_state_ == BalsaFrameEnums::ERROR) {
813 return message_current - original_message_start;
814 }
815 }
816 const size_t chars_since_last_slash_n =
817 (message_current_idx - last_slash_n_idx_);
818 last_slash_n_idx_ = message_current_idx;
819 if (chars_since_last_slash_n > 2) {
820 // false positive.
821 ++message_current;
822 continue;
823 }
824 if ((chars_since_last_slash_n == 1) ||
825 (((message_current > message_start) &&
826 (*(message_current - 1) == '\r')) ||
827 (last_char_was_slash_r_))) {
828 break;
829 }
830 ++message_current;
831 }
832
833 if (message_current == message_end) {
834 continue;
835 }
836
837 ++message_current;
838 QUICHE_DCHECK(message_current >= message_start);
839 if (message_current > message_start) {
840 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
841 }
842
843 // Check if we have exceeded maximum headers length
844 // Although we check for this limit before and after we call this function
845 // we check it here as well to make sure that in case the visitor changed
846 // the max_header_length_ (for example after processing the first line)
847 // we handle it gracefully.
848 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
849 HandleHeadersTooLongError();
850 return message_current - original_message_start;
851 }
852
853 // Since we know that we won't be writing any more bytes of the header,
854 // we tell that to the headers object. The headers object may make
855 // more efficient allocation decisions when this is signaled.
856 headers_->DoneWritingFromFramer();
857 visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
858
859 // Ok, now that we've written everything into our header buffer, it is
860 // time to process the header lines (extract proper values for headers
861 // which are important for framing).
862 ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
863 if (parse_state_ == BalsaFrameEnums::ERROR) {
864 return message_current - original_message_start;
865 }
866
867 if (use_interim_headers_callback_ &&
868 IsInterimResponse(headers_->parsed_response_code()) &&
869 headers_->parsed_response_code() != kSwitchingProtocolsStatusCode) {
870 // Deliver headers from this interim response but reset everything else to
871 // prepare for the next set of headers. Skip 101 Switching Protocols
872 // because these are considered final headers for the current protocol.
873 visitor_->OnInterimHeaders(
874 std::make_unique<BalsaHeaders>(std::move(*headers_)));
875 Reset();
876 checkpoint = message_start = message_current;
877 continue;
878 }
879 if (continue_headers_ != nullptr &&
880 headers_->parsed_response_code_ == kContinueStatusCode) {
881 // Save the headers from this 100 Continue response but reset everything
882 // else to prepare for the next set of headers.
883 BalsaHeaders saved_continue_headers = std::move(*headers_);
884 Reset();
885 *continue_headers_ = std::move(saved_continue_headers);
886 visitor_->ContinueHeaderDone();
887 checkpoint = message_start = message_current;
888 continue;
889 }
890 AssignParseStateAfterHeadersHaveBeenParsed();
891 if (parse_state_ == BalsaFrameEnums::ERROR) {
892 return message_current - original_message_start;
893 }
894 visitor_->ProcessHeaders(*headers_);
895 visitor_->HeaderDone();
896 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
897 visitor_->MessageDone();
898 }
899 return message_current - original_message_start;
900 }
901 // If we've gotten to here, it means that we've consumed all of the
902 // available input. We need to record whether or not the last character we
903 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
904 // a header framing that is split across the two calls.
905 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
906 QUICHE_DCHECK(message_current >= message_start);
907 if (message_current > message_start) {
908 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
909 }
910 return message_current - original_message_start;
911 }
912
BytesSafeToSplice() const913 size_t BalsaFrame::BytesSafeToSplice() const {
914 switch (parse_state_) {
915 case BalsaFrameEnums::READING_CHUNK_DATA:
916 return chunk_length_remaining_;
917 case BalsaFrameEnums::READING_UNTIL_CLOSE:
918 return std::numeric_limits<size_t>::max();
919 case BalsaFrameEnums::READING_CONTENT:
920 return content_length_remaining_;
921 default:
922 return 0;
923 }
924 }
925
BytesSpliced(size_t bytes_spliced)926 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
927 switch (parse_state_) {
928 case BalsaFrameEnums::READING_CHUNK_DATA:
929 if (chunk_length_remaining_ < bytes_spliced) {
930 HandleError(BalsaFrameEnums::
931 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
932 return;
933 }
934 chunk_length_remaining_ -= bytes_spliced;
935 if (chunk_length_remaining_ == 0) {
936 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
937 }
938 return;
939
940 case BalsaFrameEnums::READING_UNTIL_CLOSE:
941 return;
942
943 case BalsaFrameEnums::READING_CONTENT:
944 if (content_length_remaining_ < bytes_spliced) {
945 HandleError(BalsaFrameEnums::
946 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
947 return;
948 }
949 content_length_remaining_ -= bytes_spliced;
950 if (content_length_remaining_ == 0) {
951 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
952 visitor_->MessageDone();
953 }
954 return;
955
956 default:
957 HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
958 return;
959 }
960 }
961
ProcessInput(const char * input,size_t size)962 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
963 const char* current = input;
964 const char* on_entry = current;
965 const char* end = current + size;
966
967 QUICHE_DCHECK(headers_ != nullptr);
968 if (headers_ == nullptr) {
969 return 0;
970 }
971
972 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
973 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
974 // Yes, we still have to check this here as the user can change the
975 // max_header_length amount!
976 // Also it is possible that we have reached the maximum allowed header size,
977 // and we have more to consume (remember we are still inside
978 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
979 if (header_length > max_header_length_ ||
980 (header_length == max_header_length_ && size > 0)) {
981 HandleHeadersTooLongError();
982 return current - input;
983 }
984 const size_t bytes_to_process =
985 std::min(max_header_length_ - header_length, size);
986 current += ProcessHeaders(input, bytes_to_process);
987 // If we are still reading headers check if we have crossed the headers
988 // limit. Note that we check for >= as opposed to >. This is because if
989 // header_length_after equals max_header_length_ and we are still in the
990 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
991 // sure that the headers limit will be crossed later on
992 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
993 // Note that headers_ is valid only if we are still reading headers.
994 const size_t header_length_after =
995 headers_->GetReadableBytesFromHeaderStream();
996 if (header_length_after >= max_header_length_) {
997 HandleHeadersTooLongError();
998 }
999 }
1000 return current - input;
1001 }
1002
1003 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1004 parse_state_ == BalsaFrameEnums::ERROR) {
1005 // Can do nothing more 'till we're reset.
1006 return current - input;
1007 }
1008
1009 QUICHE_DCHECK_LE(current, end);
1010 if (current == end) {
1011 return current - input;
1012 }
1013
1014 while (true) {
1015 switch (parse_state_) {
1016 case BalsaFrameEnums::READING_CHUNK_LENGTH:
1017 // In this state we read the chunk length.
1018 // Note that once we hit a character which is not in:
1019 // [0-9;A-Fa-f\n], we transition to a different state.
1020 //
1021 QUICHE_DCHECK_LE(current, end);
1022 while (true) {
1023 if (current == end) {
1024 visitor_->OnRawBodyInput(
1025 absl::string_view(on_entry, current - on_entry));
1026 return current - input;
1027 }
1028
1029 const char c = *current;
1030 ++current;
1031
1032 static const signed char kBad = -1;
1033 static const signed char kDelimiter = -2;
1034
1035 // valid cases:
1036 // "09123\n" // -> 09123
1037 // "09123\r\n" // -> 09123
1038 // "09123 \n" // -> 09123
1039 // "09123 \r\n" // -> 09123
1040 // "09123 12312\n" // -> 09123
1041 // "09123 12312\r\n" // -> 09123
1042 // "09123; foo=bar\n" // -> 09123
1043 // "09123; foo=bar\r\n" // -> 09123
1044 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
1045 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
1046 // invalid cases:
1047 // "[ \t]+[^\n]*\n"
1048 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
1049 // "\r\n"
1050 // "\n"
1051 signed char addition = kBad;
1052 // clang-format off
1053 switch (c) {
1054 case '0': addition = 0; break;
1055 case '1': addition = 1; break;
1056 case '2': addition = 2; break;
1057 case '3': addition = 3; break;
1058 case '4': addition = 4; break;
1059 case '5': addition = 5; break;
1060 case '6': addition = 6; break;
1061 case '7': addition = 7; break;
1062 case '8': addition = 8; break;
1063 case '9': addition = 9; break;
1064 case 'a': addition = 0xA; break;
1065 case 'b': addition = 0xB; break;
1066 case 'c': addition = 0xC; break;
1067 case 'd': addition = 0xD; break;
1068 case 'e': addition = 0xE; break;
1069 case 'f': addition = 0xF; break;
1070 case 'A': addition = 0xA; break;
1071 case 'B': addition = 0xB; break;
1072 case 'C': addition = 0xC; break;
1073 case 'D': addition = 0xD; break;
1074 case 'E': addition = 0xE; break;
1075 case 'F': addition = 0xF; break;
1076 case '\t':
1077 case '\n':
1078 case '\r':
1079 case ' ':
1080 case ';':
1081 addition = kDelimiter;
1082 break;
1083 default:
1084 // Leave addition == kBad
1085 break;
1086 }
1087 // clang-format on
1088 if (addition >= 0) {
1089 chunk_length_character_extracted_ = true;
1090 size_t length_x_16 = chunk_length_remaining_ * 16;
1091 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1092 if ((chunk_length_remaining_ > kMaxDiv16) ||
1093 (std::numeric_limits<size_t>::max() - length_x_16) <
1094 static_cast<size_t>(addition)) {
1095 // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1096 visitor_->OnRawBodyInput(
1097 absl::string_view(on_entry, current - on_entry));
1098 HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1099 return current - input;
1100 }
1101 chunk_length_remaining_ = length_x_16 + addition;
1102 continue;
1103 }
1104
1105 if (!chunk_length_character_extracted_ || addition == kBad) {
1106 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1107 // characters were converted, or an unexpected character was
1108 // seen.
1109 visitor_->OnRawBodyInput(
1110 absl::string_view(on_entry, current - on_entry));
1111 HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1112 return current - input;
1113 }
1114
1115 break;
1116 }
1117
1118 --current;
1119 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1120 visitor_->OnChunkLength(chunk_length_remaining_);
1121 continue;
1122
1123 case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1124 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1125 // there is data to be read.
1126 const char* extensions_start = current;
1127 size_t extensions_length = 0;
1128 QUICHE_DCHECK_LE(current, end);
1129 while (true) {
1130 if (current == end) {
1131 visitor_->OnChunkExtensionInput(
1132 absl::string_view(extensions_start, extensions_length));
1133 visitor_->OnRawBodyInput(
1134 absl::string_view(on_entry, current - on_entry));
1135 return current - input;
1136 }
1137 const char c = *current;
1138 if (c == '\r' || c == '\n') {
1139 extensions_length = (extensions_start == current)
1140 ? 0
1141 : current - extensions_start - 1;
1142 }
1143
1144 ++current;
1145 if (c == '\n') {
1146 break;
1147 }
1148 }
1149
1150 chunk_length_character_extracted_ = false;
1151 visitor_->OnChunkExtensionInput(
1152 absl::string_view(extensions_start, extensions_length));
1153
1154 if (chunk_length_remaining_ != 0) {
1155 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1156 continue;
1157 }
1158
1159 HeaderFramingFound('\n');
1160 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1161 continue;
1162 }
1163
1164 case BalsaFrameEnums::READING_CHUNK_DATA:
1165 while (current < end) {
1166 if (chunk_length_remaining_ == 0) {
1167 break;
1168 }
1169 // read in the chunk
1170 size_t bytes_remaining = end - current;
1171 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1172 ? chunk_length_remaining_
1173 : bytes_remaining;
1174 const char* tmp_current = current + consumed_bytes;
1175 visitor_->OnRawBodyInput(
1176 absl::string_view(on_entry, tmp_current - on_entry));
1177 visitor_->OnBodyChunkInput(
1178 absl::string_view(current, consumed_bytes));
1179 on_entry = current = tmp_current;
1180 chunk_length_remaining_ -= consumed_bytes;
1181 }
1182
1183 if (chunk_length_remaining_ == 0) {
1184 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1185 continue;
1186 }
1187
1188 visitor_->OnRawBodyInput(
1189 absl::string_view(on_entry, current - on_entry));
1190 return current - input;
1191
1192 case BalsaFrameEnums::READING_CHUNK_TERM:
1193 QUICHE_DCHECK_LE(current, end);
1194 while (true) {
1195 if (current == end) {
1196 visitor_->OnRawBodyInput(
1197 absl::string_view(on_entry, current - on_entry));
1198 return current - input;
1199 }
1200
1201 const char c = *current;
1202 ++current;
1203
1204 if (c == '\n') {
1205 break;
1206 }
1207 }
1208 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1209 continue;
1210
1211 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1212 QUICHE_DCHECK_LE(current, end);
1213 while (true) {
1214 if (current == end) {
1215 visitor_->OnRawBodyInput(
1216 absl::string_view(on_entry, current - on_entry));
1217 return current - input;
1218 }
1219
1220 const char c = *current;
1221 if (HeaderFramingFound(c) != 0) {
1222 // If we've found a "\r\n\r\n", then the message
1223 // is done.
1224 ++current;
1225 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1226 visitor_->OnRawBodyInput(
1227 absl::string_view(on_entry, current - on_entry));
1228 visitor_->MessageDone();
1229 return current - input;
1230 }
1231
1232 // If not, however, since the spec only suggests that the
1233 // client SHOULD indicate the presence of trailers, we get to
1234 // *test* that they did or didn't.
1235 // If all of the bytes we've seen since:
1236 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1237 // are either '\r', or '\n', then we can assume that we don't yet
1238 // know if we need to parse headers, or if the next byte will make
1239 // the HeaderFramingFound condition (above) true.
1240 if (!HeaderFramingMayBeFound()) {
1241 break;
1242 }
1243
1244 // If HeaderFramingMayBeFound(), then we have seen only characters
1245 // '\r' or '\n'.
1246 ++current;
1247
1248 // Lets try again! There is no state change here.
1249 }
1250
1251 // If (!HeaderFramingMayBeFound()), then we know that we must be
1252 // reading the first non CRLF character of a trailer.
1253 parse_state_ = BalsaFrameEnums::READING_TRAILER;
1254 visitor_->OnRawBodyInput(
1255 absl::string_view(on_entry, current - on_entry));
1256 on_entry = current;
1257 continue;
1258
1259 // TODO(yongfa): No leading whitespace is allowed before field-name per
1260 // RFC2616. Leading whitespace will cause header parsing error too.
1261 case BalsaFrameEnums::READING_TRAILER:
1262 while (current < end) {
1263 const char c = *current;
1264 ++current;
1265 ++trailer_length_;
1266 if (GetTrailers() != nullptr) {
1267 // Reuse the header length limit for trailer, which is just a bunch
1268 // of headers.
1269 if (trailer_length_ > max_header_length_) {
1270 --current;
1271 HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1272 return current - input;
1273 }
1274 if (LineFramingFound(c)) {
1275 trailer_lines_.push_back(
1276 std::make_pair(start_of_trailer_line_, trailer_length_));
1277 start_of_trailer_line_ = trailer_length_;
1278 }
1279 }
1280 if (HeaderFramingFound(c) != 0) {
1281 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1282 if (BalsaHeaders* trailers = GetTrailers(); trailers != nullptr) {
1283 trailers->WriteFromFramer(on_entry, current - on_entry);
1284 trailers->DoneWritingFromFramer();
1285 ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailers);
1286 if (parse_state_ == BalsaFrameEnums::ERROR) {
1287 return current - input;
1288 }
1289 if (trailers_ != nullptr) {
1290 visitor_->OnTrailers(std::move(trailers_));
1291
1292 // Allows trailers to be delivered without another call to
1293 // EnableTrailers() in case the framer is Reset().
1294 trailers_ = std::make_unique<BalsaHeaders>();
1295 } else {
1296 visitor_->ProcessTrailers(*trailer_);
1297 }
1298 }
1299 visitor_->OnTrailerInput(
1300 absl::string_view(on_entry, current - on_entry));
1301 visitor_->MessageDone();
1302 return current - input;
1303 }
1304 }
1305 if (BalsaHeaders* trailers = GetTrailers(); trailers != nullptr) {
1306 trailers->WriteFromFramer(on_entry, current - on_entry);
1307 }
1308 visitor_->OnTrailerInput(
1309 absl::string_view(on_entry, current - on_entry));
1310 return current - input;
1311
1312 case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1313 const size_t bytes_remaining = end - current;
1314 if (bytes_remaining > 0) {
1315 visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1316 visitor_->OnBodyChunkInput(
1317 absl::string_view(current, bytes_remaining));
1318 current += bytes_remaining;
1319 }
1320 return current - input;
1321 }
1322
1323 case BalsaFrameEnums::READING_CONTENT:
1324 while ((content_length_remaining_ != 0u) && current < end) {
1325 // read in the content
1326 const size_t bytes_remaining = end - current;
1327 const size_t consumed_bytes =
1328 (content_length_remaining_ < bytes_remaining)
1329 ? content_length_remaining_
1330 : bytes_remaining;
1331 visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1332 visitor_->OnBodyChunkInput(
1333 absl::string_view(current, consumed_bytes));
1334 current += consumed_bytes;
1335 content_length_remaining_ -= consumed_bytes;
1336 }
1337 if (content_length_remaining_ == 0) {
1338 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1339 visitor_->MessageDone();
1340 }
1341 return current - input;
1342
1343 default:
1344 // The state-machine should never be in a state that isn't handled
1345 // above. This is a glaring logic error, and we should do something
1346 // drastic to ensure that this gets looked-at and fixed.
1347 QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1348 << " memory corruption?!"; // COV_NF_LINE
1349 }
1350 }
1351 }
1352
HandleHeadersTooLongError()1353 void BalsaFrame::HandleHeadersTooLongError() {
1354 if (parse_truncated_headers_even_when_headers_too_long_) {
1355 const size_t len = headers_->GetReadableBytesFromHeaderStream();
1356 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
1357
1358 if (last_slash_n_idx_ < len && stream_begin[last_slash_n_idx_] != '\r') {
1359 // We write an end to the truncated line, and a blank line to end the
1360 // headers, to end up with something that will parse.
1361 static const absl::string_view kTwoLineEnds = "\r\n\r\n";
1362 headers_->WriteFromFramer(kTwoLineEnds.data(), kTwoLineEnds.size());
1363
1364 // This is the last, truncated line.
1365 lines_.push_back(std::make_pair(last_slash_n_idx_, len + 2));
1366 // A blank line to end the headers.
1367 lines_.push_back(std::make_pair(len + 2, len + 4));
1368 }
1369
1370 ProcessHeaderLines(lines_, /*is_trailer=*/false, headers_);
1371 }
1372
1373 HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
1374 }
1375
GetTrailers() const1376 BalsaHeaders* BalsaFrame::GetTrailers() const {
1377 if (trailers_ != nullptr) {
1378 return trailers_.get();
1379 }
1380 return trailer_;
1381 }
1382
1383 const int32_t BalsaFrame::kValidTerm1;
1384 const int32_t BalsaFrame::kValidTerm1Mask;
1385 const int32_t BalsaFrame::kValidTerm2;
1386 const int32_t BalsaFrame::kValidTerm2Mask;
1387
1388 } // namespace quiche
1389
1390 #undef CHAR_LT
1391 #undef CHAR_LE
1392 #undef CHAR_GT
1393 #undef CHAR_GE
1394 #undef QUICHE_DCHECK_CHAR_GE
1395