1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "quiche/balsa/balsa_frame.h"
6
7 #include <algorithm>
8 #include <array>
9 #include <cstddef>
10 #include <cstdint>
11 #include <cstring>
12 #include <limits>
13 #include <memory>
14 #include <ostream>
15 #include <string>
16 #include <utility>
17
18 #include "absl/strings/match.h"
19 #include "absl/strings/numbers.h"
20 #include "absl/strings/string_view.h"
21 #include "quiche/balsa/balsa_enums.h"
22 #include "quiche/balsa/balsa_headers.h"
23 #include "quiche/balsa/balsa_visitor_interface.h"
24 #include "quiche/balsa/header_properties.h"
25 #include "quiche/common/platform/api/quiche_logging.h"
26
27 // When comparing characters (other than == and !=), cast to unsigned char
28 // to make sure values above 127 rank as expected, even on platforms where char
29 // is signed and thus such values are represented as negative numbers before the
30 // cast.
31 #define CHAR_LT(a, b) \
32 (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
33 #define CHAR_LE(a, b) \
34 (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
35 #define CHAR_GT(a, b) \
36 (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
37 #define CHAR_GE(a, b) \
38 (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
39 #define QUICHE_DCHECK_CHAR_GE(a, b) \
40 QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
41
42 namespace quiche {
43
44 namespace {
45
46 const size_t kContinueStatusCode = 100;
47
48 constexpr absl::string_view kChunked = "chunked";
49 constexpr absl::string_view kContentLength = "content-length";
50 constexpr absl::string_view kIdentity = "identity";
51 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
52
IsInterimResponse(size_t response_code)53 bool IsInterimResponse(size_t response_code) {
54 return response_code >= 100 && response_code < 200;
55 }
56
57 } // namespace
58
Reset()59 void BalsaFrame::Reset() {
60 last_char_was_slash_r_ = false;
61 saw_non_newline_char_ = false;
62 start_was_space_ = true;
63 chunk_length_character_extracted_ = false;
64 // is_request_ = true; // not reset between messages.
65 allow_reading_until_close_for_request_ = false;
66 // request_was_head_ = false; // not reset between messages.
67 // max_header_length_ = 16 * 1024; // not reset between messages.
68 // visitor_ = &do_nothing_visitor_; // not reset between messages.
69 chunk_length_remaining_ = 0;
70 content_length_remaining_ = 0;
71 last_slash_n_loc_ = nullptr;
72 last_recorded_slash_n_loc_ = nullptr;
73 last_slash_n_idx_ = 0;
74 term_chars_ = 0;
75 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
76 last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
77 invalid_chars_.clear();
78 lines_.clear();
79 if (continue_headers_ != nullptr) {
80 continue_headers_->Clear();
81 }
82 if (headers_ != nullptr) {
83 headers_->Clear();
84 }
85 trailer_lines_.clear();
86 start_of_trailer_line_ = 0;
87 trailer_length_ = 0;
88 if (trailer_ != nullptr) {
89 trailer_->Clear();
90 }
91 }
92
93 namespace {
94
95 // Within the line bounded by [current, end), parses a single "island",
96 // comprising a (possibly empty) span of whitespace followed by a (possibly
97 // empty) span of non-whitespace.
98 //
99 // Returns a pointer to the first whitespace character beyond this island, or
100 // returns end if no additional whitespace characters are present after this
101 // island. (I.e., returnvalue == end || *returnvalue > ' ')
102 //
103 // Upon return, the whitespace span are the characters
104 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
105 // non-whitespace span are the characters whose indices fall in
106 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)107 inline const char* ParseOneIsland(const char* current, const char* begin,
108 const char* end, size_t* first_whitespace,
109 size_t* first_nonwhite) {
110 *first_whitespace = current - begin;
111 while (current < end && CHAR_LE(*current, ' ')) {
112 ++current;
113 }
114 *first_nonwhite = current - begin;
115 while (current < end && CHAR_GT(*current, ' ')) {
116 ++current;
117 }
118 return current;
119 }
120
121 } // namespace
122
123 // Summary:
124 // Parses the first line of either a request or response.
125 // Note that in the case of a detected warning, error_code will be set
126 // but the function will not return false.
127 // Exactly zero or one warning or error (but not both) may be detected
128 // by this function.
129 // Note that this function will not write the data of the first-line
130 // into the header's buffer (that should already have been done elsewhere).
131 //
132 // Pre-conditions:
133 // begin != end
134 // *begin should be a character which is > ' '. This implies that there
135 // is at least one non-whitespace characters between [begin, end).
136 // headers is a valid pointer to a BalsaHeaders class.
137 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
138 // Entire first line must exist between [begin, end)
139 // Exactly zero or one newlines -may- exist between [begin, end)
140 // [begin, end) should exist in the header's buffer.
141 //
142 // Side-effects:
143 // headers will be modified
144 // error_code may be modified if either a warning or error is detected
145 //
146 // Returns:
147 // True if no error (as opposed to warning) is detected.
148 // False if an error (as opposed to warning) is detected.
149
150 //
151 // If there is indeed non-whitespace in the line, then the following
152 // will take care of this for you:
153 // while (*begin <= ' ') ++begin;
154 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);
155 //
156
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)157 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
158 BalsaHeaders* headers,
159 BalsaFrameEnums::ErrorCode* error_code) {
160 while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
161 --end;
162 }
163
164 const char* current =
165 ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
166 &headers->non_whitespace_1_idx_);
167 current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
168 &headers->non_whitespace_2_idx_);
169 current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
170 &headers->non_whitespace_3_idx_);
171
172 // Clean up any trailing whitespace that comes after the third island
173 const char* last = end;
174 while (current <= last && CHAR_LE(*last, ' ')) {
175 --last;
176 }
177 headers->whitespace_4_idx_ = last - begin + 1;
178
179 // Either the passed-in line is empty, or it starts with a non-whitespace
180 // character.
181 QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
182
183 QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
184 QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
185
186 // If the line isn't empty, it has at least one non-whitespace character (see
187 // first QUICHE_DCHECK), which will have been identified as a non-empty
188 // [non_whitespace_1_idx_, whitespace_2_idx_).
189 QUICHE_DCHECK(begin == end ||
190 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
191
192 if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
193 // This error may be triggered if the second token is empty, OR there's no
194 // WS after the first token; we don't bother to distinguish exactly which.
195 // (I'm not sure why we distinguish different kinds of parse error at all,
196 // actually.)
197 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
198 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
199 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
200 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
201 static_cast<int>(is_request));
202 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
203 return false;
204 }
205 }
206 if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
207 if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
208 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
209 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
210 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
211 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
212 static_cast<int>(is_request));
213 }
214 }
215
216 if (!is_request) {
217 headers->parsed_response_code_ = 0;
218 // If the response code is non-empty:
219 if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
220 if (!absl::SimpleAtoi(
221 absl::string_view(begin + headers->non_whitespace_2_idx_,
222 headers->non_whitespace_3_idx_ -
223 headers->non_whitespace_2_idx_),
224 &headers->parsed_response_code_)) {
225 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
226 return false;
227 }
228 }
229 }
230
231 return true;
232 }
233
234 // begin - beginning of the firstline
235 // end - end of the firstline
236 //
237 // A precondition for this function is that there is non-whitespace between
238 // [begin, end). If this precondition is not met, the function will not perform
239 // as expected (and bad things may happen, and it will eat your first, second,
240 // and third unborn children!).
241 //
242 // Another precondition for this function is that [begin, end) includes
243 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)244 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
245 BalsaFrameEnums::ErrorCode previous_error = last_error_;
246 if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
247 parse_state_ = BalsaFrameEnums::ERROR;
248 HandleError(last_error_);
249 return;
250 }
251 if (previous_error != last_error_) {
252 HandleWarning(last_error_);
253 }
254
255 const absl::string_view line_input(
256 begin + headers_->non_whitespace_1_idx_,
257 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
258 const absl::string_view part1(
259 begin + headers_->non_whitespace_1_idx_,
260 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
261 const absl::string_view part2(
262 begin + headers_->non_whitespace_2_idx_,
263 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
264 const absl::string_view part3(
265 begin + headers_->non_whitespace_3_idx_,
266 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
267
268 if (is_request_) {
269 visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
270 if (part3.empty()) {
271 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
272 }
273 return;
274 }
275
276 visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
277 }
278
279 // 'stream_begin' points to the first character of the headers buffer.
280 // 'line_begin' points to the first character of the line.
281 // 'current' points to a char which is ':'.
282 // 'line_end' points to the position of '\n' + 1.
283 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)284 void BalsaFrame::CleanUpKeyValueWhitespace(
285 const char* stream_begin, const char* line_begin, const char* current,
286 const char* line_end, HeaderLineDescription* current_header_line) {
287 const char* colon_loc = current;
288 QUICHE_DCHECK_LT(colon_loc, line_end);
289 QUICHE_DCHECK_EQ(':', *colon_loc);
290 QUICHE_DCHECK_EQ(':', *current);
291 QUICHE_DCHECK_CHAR_GE(' ', *line_end)
292 << "\"" << std::string(line_begin, line_end) << "\"";
293
294 --current;
295 while (current > line_begin && CHAR_LE(*current, ' ')) {
296 --current;
297 }
298 current += static_cast<int>(current != colon_loc);
299 current_header_line->key_end_idx = current - stream_begin;
300
301 current = colon_loc;
302 QUICHE_DCHECK_EQ(':', *current);
303 ++current;
304 while (current < line_end && CHAR_LE(*current, ' ')) {
305 ++current;
306 }
307 current_header_line->value_begin_idx = current - stream_begin;
308
309 QUICHE_DCHECK_GE(current_header_line->key_end_idx,
310 current_header_line->first_char_idx);
311 QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
312 current_header_line->key_end_idx);
313 QUICHE_DCHECK_GE(current_header_line->last_char_idx,
314 current_header_line->value_begin_idx);
315 }
316
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)317 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
318 bool is_trailer,
319 BalsaHeaders* headers) {
320 QUICHE_DCHECK(!lines.empty());
321 const char* stream_begin = headers->OriginalHeaderStreamBegin();
322 // The last line is always just a newline (and is uninteresting).
323 const Lines::size_type lines_size_m1 = lines.size() - 1;
324 // For a trailer, there is no first line, so lines[0] is the first header.
325 // For real headers, the first line takes lines[0], so real header starts
326 // at index 1.
327 int first_header_idx = (is_trailer ? 0 : 1);
328 const char* current = stream_begin + lines[first_header_idx].first;
329 // This code is a bit more subtle than it may appear at first glance.
330 // This code looks for a colon in the current line... but it also looks
331 // beyond the current line. If there is no colon in the current line, then
332 // for each subsequent line (until the colon which -has- been found is
333 // associated with a line), no searching for a colon will be performed. In
334 // this way, we minimize the amount of bytes we have scanned for a colon.
335 for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
336 const char* line_begin = stream_begin + lines[i].first;
337
338 // Here we handle possible continuations. Note that we do not replace
339 // the '\n' in the line before a continuation (at least, as of now),
340 // which implies that any code which looks for a value must deal with
341 // "\r\n", etc -within- the line (and not just at the end of it).
342 for (++i; i < lines_size_m1; ++i) {
343 const char c = *(stream_begin + lines[i].first);
344 if (CHAR_GT(c, ' ')) {
345 // Not a continuation, so stop. Note that if the 'original' i = 1,
346 // and the next line is not a continuation, we'll end up with i = 2
347 // when we break. This handles the incrementing of i for the outer
348 // loop.
349 break;
350 }
351
352 // Space and tab are valid starts to continuation lines.
353 // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
354 // can choose to reject or normalize continuation lines.
355 if ((c != ' ' && c != '\t') ||
356 http_validation_policy().disallow_header_continuation_lines) {
357 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
358 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
359 return false;
360 }
361
362 // If disallow_header_continuation_lines() is false, we neither reject nor
363 // normalize continuation lines, in violation of RFC7230.
364 }
365 const char* line_end = stream_begin + lines[i - 1].second;
366 QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
367
368 // We cleanup the whitespace at the end of the line before doing anything
369 // else of interest as it allows us to do nothing when irregularly formatted
370 // headers are parsed (e.g. those with only keys, only values, or no colon).
371 //
372 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
373 --line_end;
374 QUICHE_DCHECK_EQ('\n', *line_end)
375 << "\"" << std::string(line_begin, line_end) << "\"";
376 while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
377 --line_end;
378 }
379 ++line_end;
380 QUICHE_DCHECK_CHAR_GE(' ', *line_end);
381 QUICHE_DCHECK_LT(line_begin, line_end);
382
383 // We use '0' for the block idx, because we're always writing to the first
384 // block from the framer (we do this because the framer requires that the
385 // entire header sequence be in a contiguous buffer).
386 headers->header_lines_.push_back(HeaderLineDescription(
387 line_begin - stream_begin, line_end - stream_begin,
388 line_end - stream_begin, line_end - stream_begin, 0));
389 if (current >= line_end) {
390 if (http_validation_policy().require_header_colon) {
391 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
392 : BalsaFrameEnums::HEADER_MISSING_COLON);
393 return false;
394 }
395 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
396 : BalsaFrameEnums::HEADER_MISSING_COLON);
397 // Then the next colon will not be found within this header line-- time
398 // to try again with another header-line.
399 continue;
400 }
401 if (current < line_begin) {
402 // When this condition is true, the last detected colon was part of a
403 // previous line. We reset to the beginning of the line as we don't care
404 // about the presence of any colon before the beginning of the current
405 // line.
406 current = line_begin;
407 }
408 for (; current < line_end; ++current) {
409 if (*current == ':') {
410 break;
411 }
412
413 if (header_properties::IsInvalidHeaderKeyChar(*current)) {
414 // Generally invalid characters were found earlier.
415 HandleError(is_trailer
416 ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
417 : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
418 return false;
419 }
420 }
421
422 if (current == line_end) {
423 // There was no colon in the line. The arguments we passed into the
424 // construction for the HeaderLineDescription object should be OK-- it
425 // assumes that the entire content is 'key' by default (which is true, as
426 // there was no colon, there can be no value). Note that this is a
427 // construct which is technically not allowed by the spec.
428
429 // In strict mode, we do treat this invalid value-less key as an error.
430 if (http_validation_policy().require_header_colon) {
431 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
432 : BalsaFrameEnums::HEADER_MISSING_COLON);
433 return false;
434 }
435 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
436 : BalsaFrameEnums::HEADER_MISSING_COLON);
437 continue;
438 }
439
440 QUICHE_DCHECK_EQ(*current, ':');
441 QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
442 QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
443
444 HeaderLineDescription& current_header_line = headers->header_lines_.back();
445 current_header_line.key_end_idx = current - stream_begin;
446 current_header_line.value_begin_idx = current_header_line.key_end_idx;
447 if (current < line_end) {
448 ++current_header_line.key_end_idx;
449
450 CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
451 ¤t_header_line);
452 }
453
454 const absl::string_view key(
455 stream_begin + current_header_line.first_char_idx,
456 current_header_line.key_end_idx - current_header_line.first_char_idx);
457 const absl::string_view value(
458 stream_begin + current_header_line.value_begin_idx,
459 current_header_line.last_char_idx -
460 current_header_line.value_begin_idx);
461 visitor_->OnHeader(key, value);
462 }
463
464 return true;
465 }
466
HandleWarning(BalsaFrameEnums::ErrorCode error_code)467 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
468 last_error_ = error_code;
469 visitor_->HandleWarning(last_error_);
470 }
471
HandleError(BalsaFrameEnums::ErrorCode error_code)472 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
473 last_error_ = error_code;
474 parse_state_ = BalsaFrameEnums::ERROR;
475 visitor_->HandleError(last_error_);
476 }
477
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)478 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
479 HeaderLines::size_type line_idx, size_t* length) {
480 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
481 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
482 const char* line_end = stream_begin + header_line.last_char_idx;
483 const char* value_begin = (stream_begin + header_line.value_begin_idx);
484
485 if (value_begin >= line_end) {
486 // There is no non-whitespace value data.
487 QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
488 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
489 }
490
491 *length = 0;
492 while (value_begin < line_end) {
493 if (*value_begin < '0' || *value_begin > '9') {
494 // bad! content-length found, and couldn't parse all of it!
495 QUICHE_DVLOG(1)
496 << "invalid content-length - non numeric character detected";
497 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
498 }
499 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
500 size_t length_x_10 = *length * 10;
501 const size_t c = *value_begin - '0';
502 if (*length > kMaxDiv10 ||
503 (std::numeric_limits<size_t>::max() - length_x_10) < c) {
504 QUICHE_DVLOG(1) << "content-length overflow";
505 return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
506 }
507 *length = length_x_10 + c;
508 ++value_begin;
509 }
510 QUICHE_DVLOG(1) << "content_length parsed: " << *length;
511 return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
512 }
513
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)514 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
515 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
516 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
517 const absl::string_view transfer_encoding(
518 stream_begin + header_line.value_begin_idx,
519 header_line.last_char_idx - header_line.value_begin_idx);
520
521 if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
522 headers_->transfer_encoding_is_chunked_ = true;
523 return;
524 }
525
526 if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
527 headers_->transfer_encoding_is_chunked_ = false;
528 return;
529 }
530
531 HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
532 }
533
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)534 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
535 const BalsaHeaders* headers) {
536 // Read from the beginning of the first line to the end of the last line.
537 // Note we need to add the first line's offset as in the case of a trailer
538 // it's non-zero.
539 const char* stream_begin =
540 headers->OriginalHeaderStreamBegin() + lines.front().first;
541 const char* stream_end =
542 headers->OriginalHeaderStreamBegin() + lines.back().second;
543 bool found_invalid = false;
544
545 for (const char* c = stream_begin; c < stream_end; c++) {
546 if (header_properties::IsInvalidHeaderChar(*c)) {
547 found_invalid = true;
548 invalid_chars_[*c]++;
549 }
550 }
551
552 return found_invalid;
553 }
554
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)555 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
556 BalsaHeaders* headers) {
557 QUICHE_DCHECK(!lines.empty());
558 QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
559
560 if (is_request() && track_invalid_chars()) {
561 if (CheckHeaderLinesForInvalidChars(lines, headers)) {
562 if (invalid_chars_error_enabled()) {
563 HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
564 return;
565 }
566
567 HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
568 }
569 }
570
571 // There is no need to attempt to process headers (resp. trailers)
572 // if no header (resp. trailer) lines exist.
573 //
574 // The last line of the message, which is an empty line, is never a header
575 // (resp. trailer) line. Furthermore, the first line of the message is not
576 // a header line. Therefore there are at least two (resp. one) lines in the
577 // message which are not header (resp. trailer) lines.
578 //
579 // Thus, we test to see if we have more than two (resp. one) lines total
580 // before attempting to parse any header (resp. trailer) lines.
581 if (lines.size() <= (is_trailer ? 1 : 2)) {
582 return;
583 }
584
585 HeaderLines::size_type content_length_idx = 0;
586 HeaderLines::size_type transfer_encoding_idx = 0;
587 const char* stream_begin = headers->OriginalHeaderStreamBegin();
588 // Parse the rest of the header or trailer data into key-value pairs.
589 if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
590 return;
591 }
592 // At this point, we've parsed all of the headers/trailers. Time to look
593 // for those headers which we require for framing or for format errors.
594 const HeaderLines::size_type lines_size = headers->header_lines_.size();
595 for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
596 const HeaderLineDescription& line = headers->header_lines_[i];
597 const absl::string_view key(stream_begin + line.first_char_idx,
598 line.key_end_idx - line.first_char_idx);
599 QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
600
601 // If a header begins with either lowercase or uppercase 'c' or 't', then
602 // the header may be one of content-length, connection, content-encoding
603 // or transfer-encoding. These headers are special, as they change the way
604 // that the message is framed, and so the framer is required to search
605 // for them. However, first check for a formatting error, and skip
606 // special header treatment on trailer lines (when is_trailer is true).
607 if (key.empty() || key[0] == ' ') {
608 parse_state_ = BalsaFrameEnums::ERROR;
609 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
610 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
611 return;
612 }
613 if (is_trailer) {
614 continue;
615 }
616 if (absl::EqualsIgnoreCase(key, kContentLength)) {
617 size_t length = 0;
618 BalsaHeadersEnums::ContentLengthStatus content_length_status =
619 ProcessContentLengthLine(i, &length);
620 if (content_length_idx == 0) {
621 content_length_idx = i + 1;
622 headers->content_length_status_ = content_length_status;
623 headers->content_length_ = length;
624 content_length_remaining_ = length;
625 continue;
626 }
627 if ((headers->content_length_status_ != content_length_status) ||
628 ((headers->content_length_status_ ==
629 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
630 (http_validation_policy().disallow_multiple_content_length ||
631 length != headers->content_length_))) {
632 HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
633 return;
634 }
635 continue;
636 }
637 if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
638 if (transfer_encoding_idx != 0) {
639 HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
640 return;
641 }
642 transfer_encoding_idx = i + 1;
643 }
644 }
645
646 if (!is_trailer) {
647 if (http_validation_policy()
648 .disallow_transfer_encoding_with_content_length &&
649 content_length_idx != 0 && transfer_encoding_idx != 0) {
650 HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
651 return;
652 }
653 if (headers->transfer_encoding_is_chunked_) {
654 headers->content_length_ = 0;
655 headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
656 content_length_remaining_ = 0;
657 }
658 if (transfer_encoding_idx != 0) {
659 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
660 }
661 }
662 }
663
AssignParseStateAfterHeadersHaveBeenParsed()664 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
665 // For responses, can't have a body if the request was a HEAD, or if it is
666 // one of these response-codes. rfc2616 section 4.3
667 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
668 int response_code = headers_->parsed_response_code_;
669 if (!is_request_ && (request_was_head_ ||
670 !BalsaHeaders::ResponseCanHaveBody(response_code))) {
671 // There is no body.
672 return;
673 }
674
675 if (headers_->transfer_encoding_is_chunked_) {
676 // Note that
677 // if ( Transfer-Encoding: chunked && Content-length: )
678 // then Transfer-Encoding: chunked trumps.
679 // This is as specified in the spec.
680 // rfc2616 section 4.4.3
681 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
682 return;
683 }
684
685 // Errors parsing content-length definitely can cause
686 // protocol errors/warnings
687 switch (headers_->content_length_status_) {
688 // If we have a content-length, and it is parsed
689 // properly, there are two options.
690 // 1) zero content, in which case the message is done, and
691 // 2) nonzero content, in which case we have to
692 // consume the body.
693 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
694 if (headers_->content_length_ == 0) {
695 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
696 } else {
697 parse_state_ = BalsaFrameEnums::READING_CONTENT;
698 }
699 break;
700 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
701 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
702 // If there were characters left-over after parsing the
703 // content length, we should flag an error and stop.
704 HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
705 break;
706 // We can have: no transfer-encoding, no content length, and no
707 // connection: close...
708 // Unfortunately, this case doesn't seem to be covered in the spec.
709 // We'll assume that the safest thing to do here is what the google
710 // binaries before 2008 already do, which is to assume that
711 // everything until the connection is closed is body.
712 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
713 if (is_request_) {
714 const absl::string_view method = headers_->request_method();
715 // POSTs and PUTs should have a detectable body length. If they
716 // do not we consider it an error.
717 if (method != "POST" && method != "PUT") {
718 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
719 break;
720 } else if (!allow_reading_until_close_for_request_) {
721 HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
722 break;
723 }
724 }
725 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
726 HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
727 break;
728 // The COV_NF_... statements here provide hints to the apparatus
729 // which computes coverage reports/ratios that this code is never
730 // intended to be executed, and should technically be impossible.
731 // COV_NF_START
732 default:
733 QUICHE_LOG(FATAL) << "Saw a content_length_status: "
734 << headers_->content_length_status_
735 << " which is unknown.";
736 // COV_NF_END
737 }
738 }
739
ProcessHeaders(const char * message_start,size_t message_length)740 size_t BalsaFrame::ProcessHeaders(const char* message_start,
741 size_t message_length) {
742 const char* const original_message_start = message_start;
743 const char* const message_end = message_start + message_length;
744 const char* message_current = message_start;
745 const char* checkpoint = message_start;
746
747 if (message_length == 0) {
748 return message_current - original_message_start;
749 }
750
751 while (message_current < message_end) {
752 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
753
754 // Yes, we could use strchr (assuming null termination), or
755 // memchr, but as it turns out that is slower than this tight loop
756 // for the input that we see.
757 if (!saw_non_newline_char_) {
758 do {
759 const char c = *message_current;
760 if (c != '\r' && c != '\n') {
761 if (CHAR_LE(c, ' ')) {
762 HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
763 return message_current - original_message_start;
764 }
765 break;
766 }
767 ++message_current;
768 if (message_current == message_end) {
769 return message_current - original_message_start;
770 }
771 } while (true);
772 saw_non_newline_char_ = true;
773 message_start = message_current;
774 checkpoint = message_current;
775 }
776 while (message_current < message_end) {
777 if (*message_current != '\n') {
778 ++message_current;
779 continue;
780 }
781 const size_t relative_idx = message_current - message_start;
782 const size_t message_current_idx = 1 + base_idx + relative_idx;
783 lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
784 if (lines_.size() == 1) {
785 headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
786 checkpoint = message_current + 1;
787 const char* begin = headers_->OriginalHeaderStreamBegin();
788
789 QUICHE_DVLOG(1) << "First line "
790 << std::string(begin, lines_[0].second);
791 QUICHE_DVLOG(1) << "is_request_: " << is_request_;
792 ProcessFirstLine(begin, begin + lines_[0].second);
793 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
794 break;
795 }
796
797 if (parse_state_ == BalsaFrameEnums::ERROR) {
798 return message_current - original_message_start;
799 }
800 }
801 const size_t chars_since_last_slash_n =
802 (message_current_idx - last_slash_n_idx_);
803 last_slash_n_idx_ = message_current_idx;
804 if (chars_since_last_slash_n > 2) {
805 // false positive.
806 ++message_current;
807 continue;
808 }
809 if ((chars_since_last_slash_n == 1) ||
810 (((message_current > message_start) &&
811 (*(message_current - 1) == '\r')) ||
812 (last_char_was_slash_r_))) {
813 break;
814 }
815 ++message_current;
816 }
817
818 if (message_current == message_end) {
819 continue;
820 }
821
822 ++message_current;
823 QUICHE_DCHECK(message_current >= message_start);
824 if (message_current > message_start) {
825 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
826 }
827
828 // Check if we have exceeded maximum headers length
829 // Although we check for this limit before and after we call this function
830 // we check it here as well to make sure that in case the visitor changed
831 // the max_header_length_ (for example after processing the first line)
832 // we handle it gracefully.
833 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
834 HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
835 return message_current - original_message_start;
836 }
837
838 // Since we know that we won't be writing any more bytes of the header,
839 // we tell that to the headers object. The headers object may make
840 // more efficient allocation decisions when this is signaled.
841 headers_->DoneWritingFromFramer();
842 visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
843
844 // Ok, now that we've written everything into our header buffer, it is
845 // time to process the header lines (extract proper values for headers
846 // which are important for framing).
847 ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
848 if (parse_state_ == BalsaFrameEnums::ERROR) {
849 return message_current - original_message_start;
850 }
851
852 if (use_interim_headers_callback_ &&
853 IsInterimResponse(headers_->parsed_response_code())) {
854 // Deliver headers from this interim response but reset everything else to
855 // prepare for the next set of headers.
856 visitor_->OnInterimHeaders(std::move(*headers_));
857 Reset();
858 checkpoint = message_start = message_current;
859 continue;
860 }
861 if (continue_headers_ != nullptr &&
862 headers_->parsed_response_code_ == kContinueStatusCode) {
863 // Save the headers from this 100 Continue response but reset everything
864 // else to prepare for the next set of headers.
865 BalsaHeaders saved_continue_headers = std::move(*headers_);
866 Reset();
867 *continue_headers_ = std::move(saved_continue_headers);
868 visitor_->ContinueHeaderDone();
869 checkpoint = message_start = message_current;
870 continue;
871 }
872 AssignParseStateAfterHeadersHaveBeenParsed();
873 if (parse_state_ == BalsaFrameEnums::ERROR) {
874 return message_current - original_message_start;
875 }
876 visitor_->ProcessHeaders(*headers_);
877 visitor_->HeaderDone();
878 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
879 visitor_->MessageDone();
880 }
881 return message_current - original_message_start;
882 }
883 // If we've gotten to here, it means that we've consumed all of the
884 // available input. We need to record whether or not the last character we
885 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
886 // a header framing that is split across the two calls.
887 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
888 QUICHE_DCHECK(message_current >= message_start);
889 if (message_current > message_start) {
890 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
891 }
892 return message_current - original_message_start;
893 }
894
BytesSafeToSplice() const895 size_t BalsaFrame::BytesSafeToSplice() const {
896 switch (parse_state_) {
897 case BalsaFrameEnums::READING_CHUNK_DATA:
898 return chunk_length_remaining_;
899 case BalsaFrameEnums::READING_UNTIL_CLOSE:
900 return std::numeric_limits<size_t>::max();
901 case BalsaFrameEnums::READING_CONTENT:
902 return content_length_remaining_;
903 default:
904 return 0;
905 }
906 }
907
BytesSpliced(size_t bytes_spliced)908 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
909 switch (parse_state_) {
910 case BalsaFrameEnums::READING_CHUNK_DATA:
911 if (chunk_length_remaining_ < bytes_spliced) {
912 HandleError(BalsaFrameEnums::
913 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
914 return;
915 }
916 chunk_length_remaining_ -= bytes_spliced;
917 if (chunk_length_remaining_ == 0) {
918 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
919 }
920 return;
921
922 case BalsaFrameEnums::READING_UNTIL_CLOSE:
923 return;
924
925 case BalsaFrameEnums::READING_CONTENT:
926 if (content_length_remaining_ < bytes_spliced) {
927 HandleError(BalsaFrameEnums::
928 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
929 return;
930 }
931 content_length_remaining_ -= bytes_spliced;
932 if (content_length_remaining_ == 0) {
933 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
934 visitor_->MessageDone();
935 }
936 return;
937
938 default:
939 HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
940 return;
941 }
942 }
943
ProcessInput(const char * input,size_t size)944 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
945 const char* current = input;
946 const char* on_entry = current;
947 const char* end = current + size;
948
949 QUICHE_DCHECK(headers_ != nullptr);
950 if (headers_ == nullptr) {
951 return 0;
952 }
953
954 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
955 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
956 // Yes, we still have to check this here as the user can change the
957 // max_header_length amount!
958 // Also it is possible that we have reached the maximum allowed header size,
959 // and we have more to consume (remember we are still inside
960 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
961 if (header_length > max_header_length_ ||
962 (header_length == max_header_length_ && size > 0)) {
963 HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
964 return current - input;
965 }
966 const size_t bytes_to_process =
967 std::min(max_header_length_ - header_length, size);
968 current += ProcessHeaders(input, bytes_to_process);
969 // If we are still reading headers check if we have crossed the headers
970 // limit. Note that we check for >= as opposed to >. This is because if
971 // header_length_after equals max_header_length_ and we are still in the
972 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
973 // sure that the headers limit will be crossed later on
974 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
975 // Note that headers_ is valid only if we are still reading headers.
976 const size_t header_length_after =
977 headers_->GetReadableBytesFromHeaderStream();
978 if (header_length_after >= max_header_length_) {
979 HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
980 }
981 }
982 return current - input;
983 }
984
985 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
986 parse_state_ == BalsaFrameEnums::ERROR) {
987 // Can do nothing more 'till we're reset.
988 return current - input;
989 }
990
991 QUICHE_DCHECK_LE(current, end);
992 if (current == end) {
993 return current - input;
994 }
995
996 while (true) {
997 switch (parse_state_) {
998 case BalsaFrameEnums::READING_CHUNK_LENGTH:
999 // In this state we read the chunk length.
1000 // Note that once we hit a character which is not in:
1001 // [0-9;A-Fa-f\n], we transition to a different state.
1002 //
1003 QUICHE_DCHECK_LE(current, end);
1004 while (true) {
1005 if (current == end) {
1006 visitor_->OnRawBodyInput(
1007 absl::string_view(on_entry, current - on_entry));
1008 return current - input;
1009 }
1010
1011 const char c = *current;
1012 ++current;
1013
1014 static const signed char kBad = -1;
1015 static const signed char kDelimiter = -2;
1016
1017 // valid cases:
1018 // "09123\n" // -> 09123
1019 // "09123\r\n" // -> 09123
1020 // "09123 \n" // -> 09123
1021 // "09123 \r\n" // -> 09123
1022 // "09123 12312\n" // -> 09123
1023 // "09123 12312\r\n" // -> 09123
1024 // "09123; foo=bar\n" // -> 09123
1025 // "09123; foo=bar\r\n" // -> 09123
1026 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
1027 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
1028 // invalid cases:
1029 // "[ \t]+[^\n]*\n"
1030 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
1031 // "\r\n"
1032 // "\n"
1033 signed char addition = kBad;
1034 // clang-format off
1035 switch (c) {
1036 case '0': addition = 0; break;
1037 case '1': addition = 1; break;
1038 case '2': addition = 2; break;
1039 case '3': addition = 3; break;
1040 case '4': addition = 4; break;
1041 case '5': addition = 5; break;
1042 case '6': addition = 6; break;
1043 case '7': addition = 7; break;
1044 case '8': addition = 8; break;
1045 case '9': addition = 9; break;
1046 case 'a': addition = 0xA; break;
1047 case 'b': addition = 0xB; break;
1048 case 'c': addition = 0xC; break;
1049 case 'd': addition = 0xD; break;
1050 case 'e': addition = 0xE; break;
1051 case 'f': addition = 0xF; break;
1052 case 'A': addition = 0xA; break;
1053 case 'B': addition = 0xB; break;
1054 case 'C': addition = 0xC; break;
1055 case 'D': addition = 0xD; break;
1056 case 'E': addition = 0xE; break;
1057 case 'F': addition = 0xF; break;
1058 case '\t':
1059 case '\n':
1060 case '\r':
1061 case ' ':
1062 case ';':
1063 addition = kDelimiter;
1064 break;
1065 default:
1066 // Leave addition == kBad
1067 break;
1068 }
1069 // clang-format on
1070 if (addition >= 0) {
1071 chunk_length_character_extracted_ = true;
1072 size_t length_x_16 = chunk_length_remaining_ * 16;
1073 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1074 if ((chunk_length_remaining_ > kMaxDiv16) ||
1075 (std::numeric_limits<size_t>::max() - length_x_16) <
1076 static_cast<size_t>(addition)) {
1077 // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1078 visitor_->OnRawBodyInput(
1079 absl::string_view(on_entry, current - on_entry));
1080 HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1081 return current - input;
1082 }
1083 chunk_length_remaining_ = length_x_16 + addition;
1084 continue;
1085 }
1086
1087 if (!chunk_length_character_extracted_ || addition == kBad) {
1088 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1089 // characters were converted, or an unexpected character was
1090 // seen.
1091 visitor_->OnRawBodyInput(
1092 absl::string_view(on_entry, current - on_entry));
1093 HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1094 return current - input;
1095 }
1096
1097 break;
1098 }
1099
1100 --current;
1101 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1102 visitor_->OnChunkLength(chunk_length_remaining_);
1103 continue;
1104
1105 case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1106 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1107 // there is data to be read.
1108 const char* extensions_start = current;
1109 size_t extensions_length = 0;
1110 QUICHE_DCHECK_LE(current, end);
1111 while (true) {
1112 if (current == end) {
1113 visitor_->OnChunkExtensionInput(
1114 absl::string_view(extensions_start, extensions_length));
1115 visitor_->OnRawBodyInput(
1116 absl::string_view(on_entry, current - on_entry));
1117 return current - input;
1118 }
1119 const char c = *current;
1120 if (c == '\r' || c == '\n') {
1121 extensions_length = (extensions_start == current)
1122 ? 0
1123 : current - extensions_start - 1;
1124 }
1125
1126 ++current;
1127 if (c == '\n') {
1128 break;
1129 }
1130 }
1131
1132 chunk_length_character_extracted_ = false;
1133 visitor_->OnChunkExtensionInput(
1134 absl::string_view(extensions_start, extensions_length));
1135
1136 if (chunk_length_remaining_ != 0) {
1137 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1138 continue;
1139 }
1140
1141 HeaderFramingFound('\n');
1142 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1143 continue;
1144 }
1145
1146 case BalsaFrameEnums::READING_CHUNK_DATA:
1147 while (current < end) {
1148 if (chunk_length_remaining_ == 0) {
1149 break;
1150 }
1151 // read in the chunk
1152 size_t bytes_remaining = end - current;
1153 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1154 ? chunk_length_remaining_
1155 : bytes_remaining;
1156 const char* tmp_current = current + consumed_bytes;
1157 visitor_->OnRawBodyInput(
1158 absl::string_view(on_entry, tmp_current - on_entry));
1159 visitor_->OnBodyChunkInput(
1160 absl::string_view(current, consumed_bytes));
1161 on_entry = current = tmp_current;
1162 chunk_length_remaining_ -= consumed_bytes;
1163 }
1164
1165 if (chunk_length_remaining_ == 0) {
1166 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1167 continue;
1168 }
1169
1170 visitor_->OnRawBodyInput(
1171 absl::string_view(on_entry, current - on_entry));
1172 return current - input;
1173
1174 case BalsaFrameEnums::READING_CHUNK_TERM:
1175 QUICHE_DCHECK_LE(current, end);
1176 while (true) {
1177 if (current == end) {
1178 visitor_->OnRawBodyInput(
1179 absl::string_view(on_entry, current - on_entry));
1180 return current - input;
1181 }
1182
1183 const char c = *current;
1184 ++current;
1185
1186 if (c == '\n') {
1187 break;
1188 }
1189 }
1190 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1191 continue;
1192
1193 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1194 QUICHE_DCHECK_LE(current, end);
1195 while (true) {
1196 if (current == end) {
1197 visitor_->OnRawBodyInput(
1198 absl::string_view(on_entry, current - on_entry));
1199 return current - input;
1200 }
1201
1202 const char c = *current;
1203 if (HeaderFramingFound(c) != 0) {
1204 // If we've found a "\r\n\r\n", then the message
1205 // is done.
1206 ++current;
1207 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1208 visitor_->OnRawBodyInput(
1209 absl::string_view(on_entry, current - on_entry));
1210 visitor_->MessageDone();
1211 return current - input;
1212 }
1213
1214 // If not, however, since the spec only suggests that the
1215 // client SHOULD indicate the presence of trailers, we get to
1216 // *test* that they did or didn't.
1217 // If all of the bytes we've seen since:
1218 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1219 // are either '\r', or '\n', then we can assume that we don't yet
1220 // know if we need to parse headers, or if the next byte will make
1221 // the HeaderFramingFound condition (above) true.
1222 if (!HeaderFramingMayBeFound()) {
1223 break;
1224 }
1225
1226 // If HeaderFramingMayBeFound(), then we have seen only characters
1227 // '\r' or '\n'.
1228 ++current;
1229
1230 // Lets try again! There is no state change here.
1231 }
1232
1233 // If (!HeaderFramingMayBeFound()), then we know that we must be
1234 // reading the first non CRLF character of a trailer.
1235 parse_state_ = BalsaFrameEnums::READING_TRAILER;
1236 visitor_->OnRawBodyInput(
1237 absl::string_view(on_entry, current - on_entry));
1238 on_entry = current;
1239 continue;
1240
1241 // TODO(yongfa): No leading whitespace is allowed before field-name per
1242 // RFC2616. Leading whitespace will cause header parsing error too.
1243 case BalsaFrameEnums::READING_TRAILER:
1244 while (current < end) {
1245 const char c = *current;
1246 ++current;
1247 ++trailer_length_;
1248 if (trailer_ != nullptr) {
1249 // Reuse the header length limit for trailer, which is just a bunch
1250 // of headers.
1251 if (trailer_length_ > max_header_length_) {
1252 --current;
1253 HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1254 return current - input;
1255 }
1256 if (LineFramingFound(c)) {
1257 trailer_lines_.push_back(
1258 std::make_pair(start_of_trailer_line_, trailer_length_));
1259 start_of_trailer_line_ = trailer_length_;
1260 }
1261 }
1262 if (HeaderFramingFound(c) != 0) {
1263 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1264 if (trailer_ != nullptr) {
1265 trailer_->WriteFromFramer(on_entry, current - on_entry);
1266 trailer_->DoneWritingFromFramer();
1267 ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailer_);
1268 if (parse_state_ == BalsaFrameEnums::ERROR) {
1269 return current - input;
1270 }
1271 visitor_->ProcessTrailers(*trailer_);
1272 }
1273 visitor_->OnTrailerInput(
1274 absl::string_view(on_entry, current - on_entry));
1275 visitor_->MessageDone();
1276 return current - input;
1277 }
1278 }
1279 if (trailer_ != nullptr) {
1280 trailer_->WriteFromFramer(on_entry, current - on_entry);
1281 }
1282 visitor_->OnTrailerInput(
1283 absl::string_view(on_entry, current - on_entry));
1284 return current - input;
1285
1286 case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1287 const size_t bytes_remaining = end - current;
1288 if (bytes_remaining > 0) {
1289 visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1290 visitor_->OnBodyChunkInput(
1291 absl::string_view(current, bytes_remaining));
1292 current += bytes_remaining;
1293 }
1294 return current - input;
1295 }
1296
1297 case BalsaFrameEnums::READING_CONTENT:
1298 while ((content_length_remaining_ != 0u) && current < end) {
1299 // read in the content
1300 const size_t bytes_remaining = end - current;
1301 const size_t consumed_bytes =
1302 (content_length_remaining_ < bytes_remaining)
1303 ? content_length_remaining_
1304 : bytes_remaining;
1305 visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1306 visitor_->OnBodyChunkInput(
1307 absl::string_view(current, consumed_bytes));
1308 current += consumed_bytes;
1309 content_length_remaining_ -= consumed_bytes;
1310 }
1311 if (content_length_remaining_ == 0) {
1312 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1313 visitor_->MessageDone();
1314 }
1315 return current - input;
1316
1317 default:
1318 // The state-machine should never be in a state that isn't handled
1319 // above. This is a glaring logic error, and we should do something
1320 // drastic to ensure that this gets looked-at and fixed.
1321 QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1322 << " memory corruption?!"; // COV_NF_LINE
1323 }
1324 }
1325 }
1326
1327 const int32_t BalsaFrame::kValidTerm1;
1328 const int32_t BalsaFrame::kValidTerm1Mask;
1329 const int32_t BalsaFrame::kValidTerm2;
1330 const int32_t BalsaFrame::kValidTerm2Mask;
1331
1332 } // namespace quiche
1333
1334 #undef CHAR_LT
1335 #undef CHAR_LE
1336 #undef CHAR_GT
1337 #undef CHAR_GE
1338 #undef QUICHE_DCHECK_CHAR_GE
1339