• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "quiche/balsa/balsa_frame.h"
6 
7 #include <algorithm>
8 #include <array>
9 #include <cstddef>
10 #include <cstdint>
11 #include <cstring>
12 #include <limits>
13 #include <memory>
14 #include <ostream>
15 #include <string>
16 #include <utility>
17 
18 #include "absl/strings/match.h"
19 #include "absl/strings/numbers.h"
20 #include "absl/strings/string_view.h"
21 #include "quiche/balsa/balsa_enums.h"
22 #include "quiche/balsa/balsa_headers.h"
23 #include "quiche/balsa/balsa_visitor_interface.h"
24 #include "quiche/balsa/header_properties.h"
25 #include "quiche/common/platform/api/quiche_logging.h"
26 
27 // When comparing characters (other than == and !=), cast to unsigned char
28 // to make sure values above 127 rank as expected, even on platforms where char
29 // is signed and thus such values are represented as negative numbers before the
30 // cast.
31 #define CHAR_LT(a, b) \
32   (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
33 #define CHAR_LE(a, b) \
34   (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
35 #define CHAR_GT(a, b) \
36   (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
37 #define CHAR_GE(a, b) \
38   (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
39 #define QUICHE_DCHECK_CHAR_GE(a, b) \
40   QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
41 
42 namespace quiche {
43 
44 namespace {
45 
46 const size_t kContinueStatusCode = 100;
47 
48 constexpr absl::string_view kChunked = "chunked";
49 constexpr absl::string_view kContentLength = "content-length";
50 constexpr absl::string_view kIdentity = "identity";
51 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
52 
IsInterimResponse(size_t response_code)53 bool IsInterimResponse(size_t response_code) {
54   return response_code >= 100 && response_code < 200;
55 }
56 
57 }  // namespace
58 
Reset()59 void BalsaFrame::Reset() {
60   last_char_was_slash_r_ = false;
61   saw_non_newline_char_ = false;
62   start_was_space_ = true;
63   chunk_length_character_extracted_ = false;
64   // is_request_ = true;               // not reset between messages.
65   allow_reading_until_close_for_request_ = false;
66   // request_was_head_ = false;        // not reset between messages.
67   // max_header_length_ = 16 * 1024;   // not reset between messages.
68   // visitor_ = &do_nothing_visitor_;  // not reset between messages.
69   chunk_length_remaining_ = 0;
70   content_length_remaining_ = 0;
71   last_slash_n_loc_ = nullptr;
72   last_recorded_slash_n_loc_ = nullptr;
73   last_slash_n_idx_ = 0;
74   term_chars_ = 0;
75   parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
76   last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
77   invalid_chars_.clear();
78   lines_.clear();
79   if (continue_headers_ != nullptr) {
80     continue_headers_->Clear();
81   }
82   if (headers_ != nullptr) {
83     headers_->Clear();
84   }
85   trailer_lines_.clear();
86   start_of_trailer_line_ = 0;
87   trailer_length_ = 0;
88   if (trailer_ != nullptr) {
89     trailer_->Clear();
90   }
91 }
92 
93 namespace {
94 
95 // Within the line bounded by [current, end), parses a single "island",
96 // comprising a (possibly empty) span of whitespace followed by a (possibly
97 // empty) span of non-whitespace.
98 //
99 // Returns a pointer to the first whitespace character beyond this island, or
100 // returns end if no additional whitespace characters are present after this
101 // island.  (I.e., returnvalue == end || *returnvalue > ' ')
102 //
103 // Upon return, the whitespace span are the characters
104 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
105 // non-whitespace span are the characters whose indices fall in
106 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)107 inline const char* ParseOneIsland(const char* current, const char* begin,
108                                   const char* end, size_t* first_whitespace,
109                                   size_t* first_nonwhite) {
110   *first_whitespace = current - begin;
111   while (current < end && CHAR_LE(*current, ' ')) {
112     ++current;
113   }
114   *first_nonwhite = current - begin;
115   while (current < end && CHAR_GT(*current, ' ')) {
116     ++current;
117   }
118   return current;
119 }
120 
121 }  // namespace
122 
123 // Summary:
124 //     Parses the first line of either a request or response.
125 //     Note that in the case of a detected warning, error_code will be set
126 //   but the function will not return false.
127 //     Exactly zero or one warning or error (but not both) may be detected
128 //   by this function.
129 //     Note that this function will not write the data of the first-line
130 //   into the header's buffer (that should already have been done elsewhere).
131 //
132 // Pre-conditions:
133 //     begin != end
134 //     *begin should be a character which is > ' '. This implies that there
135 //   is at least one non-whitespace characters between [begin, end).
136 //   headers is a valid pointer to a BalsaHeaders class.
137 //     error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
138 //     Entire first line must exist between [begin, end)
139 //     Exactly zero or one newlines -may- exist between [begin, end)
140 //     [begin, end) should exist in the header's buffer.
141 //
142 // Side-effects:
143 //   headers will be modified
144 //   error_code may be modified if either a warning or error is detected
145 //
146 // Returns:
147 //   True if no error (as opposed to warning) is detected.
148 //   False if an error (as opposed to warning) is detected.
149 
150 //
151 // If there is indeed non-whitespace in the line, then the following
152 // will take care of this for you:
153 //  while (*begin <= ' ') ++begin;
154 //  ProcessFirstLine(begin, end, is_request, &headers, &error_code);
155 //
156 
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)157 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
158                         BalsaHeaders* headers,
159                         BalsaFrameEnums::ErrorCode* error_code) {
160   while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
161     --end;
162   }
163 
164   const char* current =
165       ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
166                      &headers->non_whitespace_1_idx_);
167   current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
168                            &headers->non_whitespace_2_idx_);
169   current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
170                            &headers->non_whitespace_3_idx_);
171 
172   // Clean up any trailing whitespace that comes after the third island
173   const char* last = end;
174   while (current <= last && CHAR_LE(*last, ' ')) {
175     --last;
176   }
177   headers->whitespace_4_idx_ = last - begin + 1;
178 
179   // Either the passed-in line is empty, or it starts with a non-whitespace
180   // character.
181   QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
182 
183   QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
184   QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
185 
186   // If the line isn't empty, it has at least one non-whitespace character (see
187   // first QUICHE_DCHECK), which will have been identified as a non-empty
188   // [non_whitespace_1_idx_, whitespace_2_idx_).
189   QUICHE_DCHECK(begin == end ||
190                 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
191 
192   if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
193     // This error may be triggered if the second token is empty, OR there's no
194     // WS after the first token; we don't bother to distinguish exactly which.
195     // (I'm not sure why we distinguish different kinds of parse error at all,
196     // actually.)
197     // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
198     // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
199     *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
200         BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
201         static_cast<int>(is_request));
202     if (!is_request) {  // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
203       return false;
204     }
205   }
206   if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
207     if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
208       // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
209       // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
210       *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
211           BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
212           static_cast<int>(is_request));
213     }
214   }
215 
216   if (!is_request) {
217     headers->parsed_response_code_ = 0;
218     // If the response code is non-empty:
219     if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
220       if (!absl::SimpleAtoi(
221               absl::string_view(begin + headers->non_whitespace_2_idx_,
222                                 headers->non_whitespace_3_idx_ -
223                                     headers->non_whitespace_2_idx_),
224               &headers->parsed_response_code_)) {
225         *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
226         return false;
227       }
228     }
229   }
230 
231   return true;
232 }
233 
234 // begin - beginning of the firstline
235 // end - end of the firstline
236 //
237 // A precondition for this function is that there is non-whitespace between
238 // [begin, end). If this precondition is not met, the function will not perform
239 // as expected (and bad things may happen, and it will eat your first, second,
240 // and third unborn children!).
241 //
242 // Another precondition for this function is that [begin, end) includes
243 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)244 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
245   BalsaFrameEnums::ErrorCode previous_error = last_error_;
246   if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
247     parse_state_ = BalsaFrameEnums::ERROR;
248     HandleError(last_error_);
249     return;
250   }
251   if (previous_error != last_error_) {
252     HandleWarning(last_error_);
253   }
254 
255   const absl::string_view line_input(
256       begin + headers_->non_whitespace_1_idx_,
257       headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
258   const absl::string_view part1(
259       begin + headers_->non_whitespace_1_idx_,
260       headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
261   const absl::string_view part2(
262       begin + headers_->non_whitespace_2_idx_,
263       headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
264   const absl::string_view part3(
265       begin + headers_->non_whitespace_3_idx_,
266       headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
267 
268   if (is_request_) {
269     visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
270     if (part3.empty()) {
271       parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
272     }
273     return;
274   }
275 
276   visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
277 }
278 
279 // 'stream_begin' points to the first character of the headers buffer.
280 // 'line_begin' points to the first character of the line.
281 // 'current' points to a char which is ':'.
282 // 'line_end' points to the position of '\n' + 1.
283 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)284 void BalsaFrame::CleanUpKeyValueWhitespace(
285     const char* stream_begin, const char* line_begin, const char* current,
286     const char* line_end, HeaderLineDescription* current_header_line) {
287   const char* colon_loc = current;
288   QUICHE_DCHECK_LT(colon_loc, line_end);
289   QUICHE_DCHECK_EQ(':', *colon_loc);
290   QUICHE_DCHECK_EQ(':', *current);
291   QUICHE_DCHECK_CHAR_GE(' ', *line_end)
292       << "\"" << std::string(line_begin, line_end) << "\"";
293 
294   --current;
295   while (current > line_begin && CHAR_LE(*current, ' ')) {
296     --current;
297   }
298   current += static_cast<int>(current != colon_loc);
299   current_header_line->key_end_idx = current - stream_begin;
300 
301   current = colon_loc;
302   QUICHE_DCHECK_EQ(':', *current);
303   ++current;
304   while (current < line_end && CHAR_LE(*current, ' ')) {
305     ++current;
306   }
307   current_header_line->value_begin_idx = current - stream_begin;
308 
309   QUICHE_DCHECK_GE(current_header_line->key_end_idx,
310                    current_header_line->first_char_idx);
311   QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
312                    current_header_line->key_end_idx);
313   QUICHE_DCHECK_GE(current_header_line->last_char_idx,
314                    current_header_line->value_begin_idx);
315 }
316 
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)317 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
318                                                 bool is_trailer,
319                                                 BalsaHeaders* headers) {
320   QUICHE_DCHECK(!lines.empty());
321   const char* stream_begin = headers->OriginalHeaderStreamBegin();
322   // The last line is always just a newline (and is uninteresting).
323   const Lines::size_type lines_size_m1 = lines.size() - 1;
324   // For a trailer, there is no first line, so lines[0] is the first header.
325   // For real headers, the first line takes lines[0], so real header starts
326   // at index 1.
327   int first_header_idx = (is_trailer ? 0 : 1);
328   const char* current = stream_begin + lines[first_header_idx].first;
329   // This code is a bit more subtle than it may appear at first glance.
330   // This code looks for a colon in the current line... but it also looks
331   // beyond the current line. If there is no colon in the current line, then
332   // for each subsequent line (until the colon which -has- been found is
333   // associated with a line), no searching for a colon will be performed. In
334   // this way, we minimize the amount of bytes we have scanned for a colon.
335   for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
336     const char* line_begin = stream_begin + lines[i].first;
337 
338     // Here we handle possible continuations.  Note that we do not replace
339     // the '\n' in the line before a continuation (at least, as of now),
340     // which implies that any code which looks for a value must deal with
341     // "\r\n", etc -within- the line (and not just at the end of it).
342     for (++i; i < lines_size_m1; ++i) {
343       const char c = *(stream_begin + lines[i].first);
344       if (CHAR_GT(c, ' ')) {
345         // Not a continuation, so stop.  Note that if the 'original' i = 1,
346         // and the next line is not a continuation, we'll end up with i = 2
347         // when we break. This handles the incrementing of i for the outer
348         // loop.
349         break;
350       }
351 
352       // Space and tab are valid starts to continuation lines.
353       // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
354       // can choose to reject or normalize continuation lines.
355       if ((c != ' ' && c != '\t') ||
356           http_validation_policy().disallow_header_continuation_lines) {
357         HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
358                                : BalsaFrameEnums::INVALID_HEADER_FORMAT);
359         return false;
360       }
361 
362       // If disallow_header_continuation_lines() is false, we neither reject nor
363       // normalize continuation lines, in violation of RFC7230.
364     }
365     const char* line_end = stream_begin + lines[i - 1].second;
366     QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
367 
368     // We cleanup the whitespace at the end of the line before doing anything
369     // else of interest as it allows us to do nothing when irregularly formatted
370     // headers are parsed (e.g. those with only keys, only values, or no colon).
371     //
372     // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
373     --line_end;
374     QUICHE_DCHECK_EQ('\n', *line_end)
375         << "\"" << std::string(line_begin, line_end) << "\"";
376     while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
377       --line_end;
378     }
379     ++line_end;
380     QUICHE_DCHECK_CHAR_GE(' ', *line_end);
381     QUICHE_DCHECK_LT(line_begin, line_end);
382 
383     // We use '0' for the block idx, because we're always writing to the first
384     // block from the framer (we do this because the framer requires that the
385     // entire header sequence be in a contiguous buffer).
386     headers->header_lines_.push_back(HeaderLineDescription(
387         line_begin - stream_begin, line_end - stream_begin,
388         line_end - stream_begin, line_end - stream_begin, 0));
389     if (current >= line_end) {
390       if (http_validation_policy().require_header_colon) {
391         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
392                                : BalsaFrameEnums::HEADER_MISSING_COLON);
393         return false;
394       }
395       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
396                                : BalsaFrameEnums::HEADER_MISSING_COLON);
397       // Then the next colon will not be found within this header line-- time
398       // to try again with another header-line.
399       continue;
400     }
401     if (current < line_begin) {
402       // When this condition is true, the last detected colon was part of a
403       // previous line.  We reset to the beginning of the line as we don't care
404       // about the presence of any colon before the beginning of the current
405       // line.
406       current = line_begin;
407     }
408     for (; current < line_end; ++current) {
409       if (*current == ':') {
410         break;
411       }
412 
413       if (header_properties::IsInvalidHeaderKeyChar(*current)) {
414         // Generally invalid characters were found earlier.
415         HandleError(is_trailer
416                         ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
417                         : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
418         return false;
419       }
420     }
421 
422     if (current == line_end) {
423       // There was no colon in the line. The arguments we passed into the
424       // construction for the HeaderLineDescription object should be OK-- it
425       // assumes that the entire content is 'key' by default (which is true, as
426       // there was no colon, there can be no value). Note that this is a
427       // construct which is technically not allowed by the spec.
428 
429       // In strict mode, we do treat this invalid value-less key as an error.
430       if (http_validation_policy().require_header_colon) {
431         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
432                                : BalsaFrameEnums::HEADER_MISSING_COLON);
433         return false;
434       }
435       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
436                                : BalsaFrameEnums::HEADER_MISSING_COLON);
437       continue;
438     }
439 
440     QUICHE_DCHECK_EQ(*current, ':');
441     QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
442     QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
443 
444     HeaderLineDescription& current_header_line = headers->header_lines_.back();
445     current_header_line.key_end_idx = current - stream_begin;
446     current_header_line.value_begin_idx = current_header_line.key_end_idx;
447     if (current < line_end) {
448       ++current_header_line.key_end_idx;
449 
450       CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
451                                 &current_header_line);
452     }
453 
454     const absl::string_view key(
455         stream_begin + current_header_line.first_char_idx,
456         current_header_line.key_end_idx - current_header_line.first_char_idx);
457     const absl::string_view value(
458         stream_begin + current_header_line.value_begin_idx,
459         current_header_line.last_char_idx -
460             current_header_line.value_begin_idx);
461     visitor_->OnHeader(key, value);
462   }
463 
464   return true;
465 }
466 
HandleWarning(BalsaFrameEnums::ErrorCode error_code)467 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
468   last_error_ = error_code;
469   visitor_->HandleWarning(last_error_);
470 }
471 
HandleError(BalsaFrameEnums::ErrorCode error_code)472 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
473   last_error_ = error_code;
474   parse_state_ = BalsaFrameEnums::ERROR;
475   visitor_->HandleError(last_error_);
476 }
477 
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)478 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
479     HeaderLines::size_type line_idx, size_t* length) {
480   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
481   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
482   const char* line_end = stream_begin + header_line.last_char_idx;
483   const char* value_begin = (stream_begin + header_line.value_begin_idx);
484 
485   if (value_begin >= line_end) {
486     // There is no non-whitespace value data.
487     QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
488     return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
489   }
490 
491   *length = 0;
492   while (value_begin < line_end) {
493     if (*value_begin < '0' || *value_begin > '9') {
494       // bad! content-length found, and couldn't parse all of it!
495       QUICHE_DVLOG(1)
496           << "invalid content-length - non numeric character detected";
497       return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
498     }
499     const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
500     size_t length_x_10 = *length * 10;
501     const size_t c = *value_begin - '0';
502     if (*length > kMaxDiv10 ||
503         (std::numeric_limits<size_t>::max() - length_x_10) < c) {
504       QUICHE_DVLOG(1) << "content-length overflow";
505       return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
506     }
507     *length = length_x_10 + c;
508     ++value_begin;
509   }
510   QUICHE_DVLOG(1) << "content_length parsed: " << *length;
511   return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
512 }
513 
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)514 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
515   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
516   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
517   const absl::string_view transfer_encoding(
518       stream_begin + header_line.value_begin_idx,
519       header_line.last_char_idx - header_line.value_begin_idx);
520 
521   if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
522     headers_->transfer_encoding_is_chunked_ = true;
523     return;
524   }
525 
526   if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
527     headers_->transfer_encoding_is_chunked_ = false;
528     return;
529   }
530 
531   HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
532 }
533 
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)534 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
535                                                  const BalsaHeaders* headers) {
536   // Read from the beginning of the first line to the end of the last line.
537   // Note we need to add the first line's offset as in the case of a trailer
538   // it's non-zero.
539   const char* stream_begin =
540       headers->OriginalHeaderStreamBegin() + lines.front().first;
541   const char* stream_end =
542       headers->OriginalHeaderStreamBegin() + lines.back().second;
543   bool found_invalid = false;
544 
545   for (const char* c = stream_begin; c < stream_end; c++) {
546     if (header_properties::IsInvalidHeaderChar(*c)) {
547       found_invalid = true;
548       invalid_chars_[*c]++;
549     }
550   }
551 
552   return found_invalid;
553 }
554 
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)555 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
556                                     BalsaHeaders* headers) {
557   QUICHE_DCHECK(!lines.empty());
558   QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
559 
560   if (is_request() && track_invalid_chars()) {
561     if (CheckHeaderLinesForInvalidChars(lines, headers)) {
562       if (invalid_chars_error_enabled()) {
563         HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
564         return;
565       }
566 
567       HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
568     }
569   }
570 
571   // There is no need to attempt to process headers (resp. trailers)
572   // if no header (resp. trailer) lines exist.
573   //
574   // The last line of the message, which is an empty line, is never a header
575   // (resp. trailer) line.  Furthermore, the first line of the message is not
576   // a header line.  Therefore there are at least two (resp. one) lines in the
577   // message which are not header (resp. trailer) lines.
578   //
579   // Thus, we test to see if we have more than two (resp. one) lines total
580   // before attempting to parse any header (resp. trailer) lines.
581   if (lines.size() <= (is_trailer ? 1 : 2)) {
582     return;
583   }
584 
585   HeaderLines::size_type content_length_idx = 0;
586   HeaderLines::size_type transfer_encoding_idx = 0;
587   const char* stream_begin = headers->OriginalHeaderStreamBegin();
588   // Parse the rest of the header or trailer data into key-value pairs.
589   if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
590     return;
591   }
592   // At this point, we've parsed all of the headers/trailers.  Time to look
593   // for those headers which we require for framing or for format errors.
594   const HeaderLines::size_type lines_size = headers->header_lines_.size();
595   for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
596     const HeaderLineDescription& line = headers->header_lines_[i];
597     const absl::string_view key(stream_begin + line.first_char_idx,
598                                 line.key_end_idx - line.first_char_idx);
599     QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
600 
601     // If a header begins with either lowercase or uppercase 'c' or 't', then
602     // the header may be one of content-length, connection, content-encoding
603     // or transfer-encoding. These headers are special, as they change the way
604     // that the message is framed, and so the framer is required to search
605     // for them.  However, first check for a formatting error, and skip
606     // special header treatment on trailer lines (when is_trailer is true).
607     if (key.empty() || key[0] == ' ') {
608       parse_state_ = BalsaFrameEnums::ERROR;
609       HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
610                              : BalsaFrameEnums::INVALID_HEADER_FORMAT);
611       return;
612     }
613     if (is_trailer) {
614       continue;
615     }
616     if (absl::EqualsIgnoreCase(key, kContentLength)) {
617       size_t length = 0;
618       BalsaHeadersEnums::ContentLengthStatus content_length_status =
619           ProcessContentLengthLine(i, &length);
620       if (content_length_idx == 0) {
621         content_length_idx = i + 1;
622         headers->content_length_status_ = content_length_status;
623         headers->content_length_ = length;
624         content_length_remaining_ = length;
625         continue;
626       }
627       if ((headers->content_length_status_ != content_length_status) ||
628           ((headers->content_length_status_ ==
629             BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
630            (http_validation_policy().disallow_multiple_content_length ||
631             length != headers->content_length_))) {
632         HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
633         return;
634       }
635       continue;
636     }
637     if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
638       if (transfer_encoding_idx != 0) {
639         HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
640         return;
641       }
642       transfer_encoding_idx = i + 1;
643     }
644   }
645 
646   if (!is_trailer) {
647     if (http_validation_policy()
648             .disallow_transfer_encoding_with_content_length &&
649         content_length_idx != 0 && transfer_encoding_idx != 0) {
650       HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
651       return;
652     }
653     if (headers->transfer_encoding_is_chunked_) {
654       headers->content_length_ = 0;
655       headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
656       content_length_remaining_ = 0;
657     }
658     if (transfer_encoding_idx != 0) {
659       ProcessTransferEncodingLine(transfer_encoding_idx - 1);
660     }
661   }
662 }
663 
AssignParseStateAfterHeadersHaveBeenParsed()664 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
665   // For responses, can't have a body if the request was a HEAD, or if it is
666   // one of these response-codes.  rfc2616 section 4.3
667   parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
668   int response_code = headers_->parsed_response_code_;
669   if (!is_request_ && (request_was_head_ ||
670                        !BalsaHeaders::ResponseCanHaveBody(response_code))) {
671     // There is no body.
672     return;
673   }
674 
675   if (headers_->transfer_encoding_is_chunked_) {
676     // Note that
677     // if ( Transfer-Encoding: chunked &&  Content-length: )
678     // then Transfer-Encoding: chunked trumps.
679     // This is as specified in the spec.
680     // rfc2616 section 4.4.3
681     parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
682     return;
683   }
684 
685   // Errors parsing content-length definitely can cause
686   // protocol errors/warnings
687   switch (headers_->content_length_status_) {
688     // If we have a content-length, and it is parsed
689     // properly, there are two options.
690     // 1) zero content, in which case the message is done, and
691     // 2) nonzero content, in which case we have to
692     //    consume the body.
693     case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
694       if (headers_->content_length_ == 0) {
695         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
696       } else {
697         parse_state_ = BalsaFrameEnums::READING_CONTENT;
698       }
699       break;
700     case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
701     case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
702       // If there were characters left-over after parsing the
703       // content length, we should flag an error and stop.
704       HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
705       break;
706       // We can have: no transfer-encoding, no content length, and no
707       // connection: close...
708       // Unfortunately, this case doesn't seem to be covered in the spec.
709       // We'll assume that the safest thing to do here is what the google
710       // binaries before 2008 already do, which is to assume that
711       // everything until the connection is closed is body.
712     case BalsaHeadersEnums::NO_CONTENT_LENGTH:
713       if (is_request_) {
714         const absl::string_view method = headers_->request_method();
715         // POSTs and PUTs should have a detectable body length.  If they
716         // do not we consider it an error.
717         if (method != "POST" && method != "PUT") {
718           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
719           break;
720         } else if (!allow_reading_until_close_for_request_) {
721           HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
722           break;
723         }
724       }
725       parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
726       HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
727       break;
728       // The COV_NF_... statements here provide hints to the apparatus
729       // which computes coverage reports/ratios that this code is never
730       // intended to be executed, and should technically be impossible.
731       // COV_NF_START
732     default:
733       QUICHE_LOG(FATAL) << "Saw a content_length_status: "
734                         << headers_->content_length_status_
735                         << " which is unknown.";
736       // COV_NF_END
737   }
738 }
739 
ProcessHeaders(const char * message_start,size_t message_length)740 size_t BalsaFrame::ProcessHeaders(const char* message_start,
741                                   size_t message_length) {
742   const char* const original_message_start = message_start;
743   const char* const message_end = message_start + message_length;
744   const char* message_current = message_start;
745   const char* checkpoint = message_start;
746 
747   if (message_length == 0) {
748     return message_current - original_message_start;
749   }
750 
751   while (message_current < message_end) {
752     size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
753 
754     // Yes, we could use strchr (assuming null termination), or
755     // memchr, but as it turns out that is slower than this tight loop
756     // for the input that we see.
757     if (!saw_non_newline_char_) {
758       do {
759         const char c = *message_current;
760         if (c != '\r' && c != '\n') {
761           if (CHAR_LE(c, ' ')) {
762             HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
763             return message_current - original_message_start;
764           }
765           break;
766         }
767         ++message_current;
768         if (message_current == message_end) {
769           return message_current - original_message_start;
770         }
771       } while (true);
772       saw_non_newline_char_ = true;
773       message_start = message_current;
774       checkpoint = message_current;
775     }
776     while (message_current < message_end) {
777       if (*message_current != '\n') {
778         ++message_current;
779         continue;
780       }
781       const size_t relative_idx = message_current - message_start;
782       const size_t message_current_idx = 1 + base_idx + relative_idx;
783       lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
784       if (lines_.size() == 1) {
785         headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
786         checkpoint = message_current + 1;
787         const char* begin = headers_->OriginalHeaderStreamBegin();
788 
789         QUICHE_DVLOG(1) << "First line "
790                         << std::string(begin, lines_[0].second);
791         QUICHE_DVLOG(1) << "is_request_: " << is_request_;
792         ProcessFirstLine(begin, begin + lines_[0].second);
793         if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
794           break;
795         }
796 
797         if (parse_state_ == BalsaFrameEnums::ERROR) {
798           return message_current - original_message_start;
799         }
800       }
801       const size_t chars_since_last_slash_n =
802           (message_current_idx - last_slash_n_idx_);
803       last_slash_n_idx_ = message_current_idx;
804       if (chars_since_last_slash_n > 2) {
805         // false positive.
806         ++message_current;
807         continue;
808       }
809       if ((chars_since_last_slash_n == 1) ||
810           (((message_current > message_start) &&
811             (*(message_current - 1) == '\r')) ||
812            (last_char_was_slash_r_))) {
813         break;
814       }
815       ++message_current;
816     }
817 
818     if (message_current == message_end) {
819       continue;
820     }
821 
822     ++message_current;
823     QUICHE_DCHECK(message_current >= message_start);
824     if (message_current > message_start) {
825       headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
826     }
827 
828     // Check if we have exceeded maximum headers length
829     // Although we check for this limit before and after we call this function
830     // we check it here as well to make sure that in case the visitor changed
831     // the max_header_length_ (for example after processing the first line)
832     // we handle it gracefully.
833     if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
834       HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
835       return message_current - original_message_start;
836     }
837 
838     // Since we know that we won't be writing any more bytes of the header,
839     // we tell that to the headers object. The headers object may make
840     // more efficient allocation decisions when this is signaled.
841     headers_->DoneWritingFromFramer();
842     visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
843 
844     // Ok, now that we've written everything into our header buffer, it is
845     // time to process the header lines (extract proper values for headers
846     // which are important for framing).
847     ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
848     if (parse_state_ == BalsaFrameEnums::ERROR) {
849       return message_current - original_message_start;
850     }
851 
852     if (use_interim_headers_callback_ &&
853         IsInterimResponse(headers_->parsed_response_code())) {
854       // Deliver headers from this interim response but reset everything else to
855       // prepare for the next set of headers.
856       visitor_->OnInterimHeaders(std::move(*headers_));
857       Reset();
858       checkpoint = message_start = message_current;
859       continue;
860     }
861     if (continue_headers_ != nullptr &&
862         headers_->parsed_response_code_ == kContinueStatusCode) {
863       // Save the headers from this 100 Continue response but reset everything
864       // else to prepare for the next set of headers.
865       BalsaHeaders saved_continue_headers = std::move(*headers_);
866       Reset();
867       *continue_headers_ = std::move(saved_continue_headers);
868       visitor_->ContinueHeaderDone();
869       checkpoint = message_start = message_current;
870       continue;
871     }
872     AssignParseStateAfterHeadersHaveBeenParsed();
873     if (parse_state_ == BalsaFrameEnums::ERROR) {
874       return message_current - original_message_start;
875     }
876     visitor_->ProcessHeaders(*headers_);
877     visitor_->HeaderDone();
878     if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
879       visitor_->MessageDone();
880     }
881     return message_current - original_message_start;
882   }
883   // If we've gotten to here, it means that we've consumed all of the
884   // available input. We need to record whether or not the last character we
885   // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
886   // a header framing that is split across the two calls.
887   last_char_was_slash_r_ = (*(message_end - 1) == '\r');
888   QUICHE_DCHECK(message_current >= message_start);
889   if (message_current > message_start) {
890     headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
891   }
892   return message_current - original_message_start;
893 }
894 
BytesSafeToSplice() const895 size_t BalsaFrame::BytesSafeToSplice() const {
896   switch (parse_state_) {
897     case BalsaFrameEnums::READING_CHUNK_DATA:
898       return chunk_length_remaining_;
899     case BalsaFrameEnums::READING_UNTIL_CLOSE:
900       return std::numeric_limits<size_t>::max();
901     case BalsaFrameEnums::READING_CONTENT:
902       return content_length_remaining_;
903     default:
904       return 0;
905   }
906 }
907 
BytesSpliced(size_t bytes_spliced)908 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
909   switch (parse_state_) {
910     case BalsaFrameEnums::READING_CHUNK_DATA:
911       if (chunk_length_remaining_ < bytes_spliced) {
912         HandleError(BalsaFrameEnums::
913                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
914         return;
915       }
916       chunk_length_remaining_ -= bytes_spliced;
917       if (chunk_length_remaining_ == 0) {
918         parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
919       }
920       return;
921 
922     case BalsaFrameEnums::READING_UNTIL_CLOSE:
923       return;
924 
925     case BalsaFrameEnums::READING_CONTENT:
926       if (content_length_remaining_ < bytes_spliced) {
927         HandleError(BalsaFrameEnums::
928                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
929         return;
930       }
931       content_length_remaining_ -= bytes_spliced;
932       if (content_length_remaining_ == 0) {
933         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
934         visitor_->MessageDone();
935       }
936       return;
937 
938     default:
939       HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
940       return;
941   }
942 }
943 
ProcessInput(const char * input,size_t size)944 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
945   const char* current = input;
946   const char* on_entry = current;
947   const char* end = current + size;
948 
949   QUICHE_DCHECK(headers_ != nullptr);
950   if (headers_ == nullptr) {
951     return 0;
952   }
953 
954   if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
955     const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
956     // Yes, we still have to check this here as the user can change the
957     // max_header_length amount!
958     // Also it is possible that we have reached the maximum allowed header size,
959     // and we have more to consume (remember we are still inside
960     // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
961     if (header_length > max_header_length_ ||
962         (header_length == max_header_length_ && size > 0)) {
963       HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
964       return current - input;
965     }
966     const size_t bytes_to_process =
967         std::min(max_header_length_ - header_length, size);
968     current += ProcessHeaders(input, bytes_to_process);
969     // If we are still reading headers check if we have crossed the headers
970     // limit. Note that we check for >= as opposed to >. This is because if
971     // header_length_after equals max_header_length_ and we are still in the
972     // parse_state_  BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
973     // sure that the headers limit will be crossed later on
974     if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
975       // Note that headers_ is valid only if we are still reading headers.
976       const size_t header_length_after =
977           headers_->GetReadableBytesFromHeaderStream();
978       if (header_length_after >= max_header_length_) {
979         HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
980       }
981     }
982     return current - input;
983   }
984 
985   if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
986       parse_state_ == BalsaFrameEnums::ERROR) {
987     // Can do nothing more 'till we're reset.
988     return current - input;
989   }
990 
991   QUICHE_DCHECK_LE(current, end);
992   if (current == end) {
993     return current - input;
994   }
995 
996   while (true) {
997     switch (parse_state_) {
998       case BalsaFrameEnums::READING_CHUNK_LENGTH:
999         // In this state we read the chunk length.
1000         // Note that once we hit a character which is not in:
1001         // [0-9;A-Fa-f\n], we transition to a different state.
1002         //
1003         QUICHE_DCHECK_LE(current, end);
1004         while (true) {
1005           if (current == end) {
1006             visitor_->OnRawBodyInput(
1007                 absl::string_view(on_entry, current - on_entry));
1008             return current - input;
1009           }
1010 
1011           const char c = *current;
1012           ++current;
1013 
1014           static const signed char kBad = -1;
1015           static const signed char kDelimiter = -2;
1016 
1017           // valid cases:
1018           //  "09123\n"                      // -> 09123
1019           //  "09123\r\n"                    // -> 09123
1020           //  "09123  \n"                    // -> 09123
1021           //  "09123  \r\n"                  // -> 09123
1022           //  "09123  12312\n"               // -> 09123
1023           //  "09123  12312\r\n"             // -> 09123
1024           //  "09123; foo=bar\n"             // -> 09123
1025           //  "09123; foo=bar\r\n"           // -> 09123
1026           //  "FFFFFFFFFFFFFFFF\r\n"         // -> FFFFFFFFFFFFFFFF
1027           //  "FFFFFFFFFFFFFFFF 22\r\n"      // -> FFFFFFFFFFFFFFFF
1028           // invalid cases:
1029           // "[ \t]+[^\n]*\n"
1030           // "FFFFFFFFFFFFFFFFF\r\n"  (would overflow)
1031           // "\r\n"
1032           // "\n"
1033           signed char addition = kBad;
1034           // clang-format off
1035           switch (c) {
1036             case '0': addition = 0; break;
1037             case '1': addition = 1; break;
1038             case '2': addition = 2; break;
1039             case '3': addition = 3; break;
1040             case '4': addition = 4; break;
1041             case '5': addition = 5; break;
1042             case '6': addition = 6; break;
1043             case '7': addition = 7; break;
1044             case '8': addition = 8; break;
1045             case '9': addition = 9; break;
1046             case 'a': addition = 0xA; break;
1047             case 'b': addition = 0xB; break;
1048             case 'c': addition = 0xC; break;
1049             case 'd': addition = 0xD; break;
1050             case 'e': addition = 0xE; break;
1051             case 'f': addition = 0xF; break;
1052             case 'A': addition = 0xA; break;
1053             case 'B': addition = 0xB; break;
1054             case 'C': addition = 0xC; break;
1055             case 'D': addition = 0xD; break;
1056             case 'E': addition = 0xE; break;
1057             case 'F': addition = 0xF; break;
1058             case '\t':
1059             case '\n':
1060             case '\r':
1061             case ' ':
1062             case ';':
1063               addition = kDelimiter;
1064               break;
1065             default:
1066               // Leave addition == kBad
1067               break;
1068           }
1069           // clang-format on
1070           if (addition >= 0) {
1071             chunk_length_character_extracted_ = true;
1072             size_t length_x_16 = chunk_length_remaining_ * 16;
1073             const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1074             if ((chunk_length_remaining_ > kMaxDiv16) ||
1075                 (std::numeric_limits<size_t>::max() - length_x_16) <
1076                     static_cast<size_t>(addition)) {
1077               // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1078               visitor_->OnRawBodyInput(
1079                   absl::string_view(on_entry, current - on_entry));
1080               HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1081               return current - input;
1082             }
1083             chunk_length_remaining_ = length_x_16 + addition;
1084             continue;
1085           }
1086 
1087           if (!chunk_length_character_extracted_ || addition == kBad) {
1088             // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1089             // characters were converted, or an unexpected character was
1090             // seen.
1091             visitor_->OnRawBodyInput(
1092                 absl::string_view(on_entry, current - on_entry));
1093             HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1094             return current - input;
1095           }
1096 
1097           break;
1098         }
1099 
1100         --current;
1101         parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1102         visitor_->OnChunkLength(chunk_length_remaining_);
1103         continue;
1104 
1105       case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1106         // TODO(phython): Convert this scanning to be 16 bytes at a time if
1107         // there is data to be read.
1108         const char* extensions_start = current;
1109         size_t extensions_length = 0;
1110         QUICHE_DCHECK_LE(current, end);
1111         while (true) {
1112           if (current == end) {
1113             visitor_->OnChunkExtensionInput(
1114                 absl::string_view(extensions_start, extensions_length));
1115             visitor_->OnRawBodyInput(
1116                 absl::string_view(on_entry, current - on_entry));
1117             return current - input;
1118           }
1119           const char c = *current;
1120           if (c == '\r' || c == '\n') {
1121             extensions_length = (extensions_start == current)
1122                                     ? 0
1123                                     : current - extensions_start - 1;
1124           }
1125 
1126           ++current;
1127           if (c == '\n') {
1128             break;
1129           }
1130         }
1131 
1132         chunk_length_character_extracted_ = false;
1133         visitor_->OnChunkExtensionInput(
1134             absl::string_view(extensions_start, extensions_length));
1135 
1136         if (chunk_length_remaining_ != 0) {
1137           parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1138           continue;
1139         }
1140 
1141         HeaderFramingFound('\n');
1142         parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1143         continue;
1144       }
1145 
1146       case BalsaFrameEnums::READING_CHUNK_DATA:
1147         while (current < end) {
1148           if (chunk_length_remaining_ == 0) {
1149             break;
1150           }
1151           // read in the chunk
1152           size_t bytes_remaining = end - current;
1153           size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1154                                       ? chunk_length_remaining_
1155                                       : bytes_remaining;
1156           const char* tmp_current = current + consumed_bytes;
1157           visitor_->OnRawBodyInput(
1158               absl::string_view(on_entry, tmp_current - on_entry));
1159           visitor_->OnBodyChunkInput(
1160               absl::string_view(current, consumed_bytes));
1161           on_entry = current = tmp_current;
1162           chunk_length_remaining_ -= consumed_bytes;
1163         }
1164 
1165         if (chunk_length_remaining_ == 0) {
1166           parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1167           continue;
1168         }
1169 
1170         visitor_->OnRawBodyInput(
1171             absl::string_view(on_entry, current - on_entry));
1172         return current - input;
1173 
1174       case BalsaFrameEnums::READING_CHUNK_TERM:
1175         QUICHE_DCHECK_LE(current, end);
1176         while (true) {
1177           if (current == end) {
1178             visitor_->OnRawBodyInput(
1179                 absl::string_view(on_entry, current - on_entry));
1180             return current - input;
1181           }
1182 
1183           const char c = *current;
1184           ++current;
1185 
1186           if (c == '\n') {
1187             break;
1188           }
1189         }
1190         parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1191         continue;
1192 
1193       case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1194         QUICHE_DCHECK_LE(current, end);
1195         while (true) {
1196           if (current == end) {
1197             visitor_->OnRawBodyInput(
1198                 absl::string_view(on_entry, current - on_entry));
1199             return current - input;
1200           }
1201 
1202           const char c = *current;
1203           if (HeaderFramingFound(c) != 0) {
1204             // If we've found a "\r\n\r\n", then the message
1205             // is done.
1206             ++current;
1207             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1208             visitor_->OnRawBodyInput(
1209                 absl::string_view(on_entry, current - on_entry));
1210             visitor_->MessageDone();
1211             return current - input;
1212           }
1213 
1214           // If not, however, since the spec only suggests that the
1215           // client SHOULD indicate the presence of trailers, we get to
1216           // *test* that they did or didn't.
1217           // If all of the bytes we've seen since:
1218           //   OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1219           // are either '\r', or '\n', then we can assume that we don't yet
1220           // know if we need to parse headers, or if the next byte will make
1221           // the HeaderFramingFound condition (above) true.
1222           if (!HeaderFramingMayBeFound()) {
1223             break;
1224           }
1225 
1226           // If HeaderFramingMayBeFound(), then we have seen only characters
1227           // '\r' or '\n'.
1228           ++current;
1229 
1230           // Lets try again! There is no state change here.
1231         }
1232 
1233         // If (!HeaderFramingMayBeFound()), then we know that we must be
1234         // reading the first non CRLF character of a trailer.
1235         parse_state_ = BalsaFrameEnums::READING_TRAILER;
1236         visitor_->OnRawBodyInput(
1237             absl::string_view(on_entry, current - on_entry));
1238         on_entry = current;
1239         continue;
1240 
1241       // TODO(yongfa): No leading whitespace is allowed before field-name per
1242       // RFC2616. Leading whitespace will cause header parsing error too.
1243       case BalsaFrameEnums::READING_TRAILER:
1244         while (current < end) {
1245           const char c = *current;
1246           ++current;
1247           ++trailer_length_;
1248           if (trailer_ != nullptr) {
1249             // Reuse the header length limit for trailer, which is just a bunch
1250             // of headers.
1251             if (trailer_length_ > max_header_length_) {
1252               --current;
1253               HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1254               return current - input;
1255             }
1256             if (LineFramingFound(c)) {
1257               trailer_lines_.push_back(
1258                   std::make_pair(start_of_trailer_line_, trailer_length_));
1259               start_of_trailer_line_ = trailer_length_;
1260             }
1261           }
1262           if (HeaderFramingFound(c) != 0) {
1263             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1264             if (trailer_ != nullptr) {
1265               trailer_->WriteFromFramer(on_entry, current - on_entry);
1266               trailer_->DoneWritingFromFramer();
1267               ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailer_);
1268               if (parse_state_ == BalsaFrameEnums::ERROR) {
1269                 return current - input;
1270               }
1271               visitor_->ProcessTrailers(*trailer_);
1272             }
1273             visitor_->OnTrailerInput(
1274                 absl::string_view(on_entry, current - on_entry));
1275             visitor_->MessageDone();
1276             return current - input;
1277           }
1278         }
1279         if (trailer_ != nullptr) {
1280           trailer_->WriteFromFramer(on_entry, current - on_entry);
1281         }
1282         visitor_->OnTrailerInput(
1283             absl::string_view(on_entry, current - on_entry));
1284         return current - input;
1285 
1286       case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1287         const size_t bytes_remaining = end - current;
1288         if (bytes_remaining > 0) {
1289           visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1290           visitor_->OnBodyChunkInput(
1291               absl::string_view(current, bytes_remaining));
1292           current += bytes_remaining;
1293         }
1294         return current - input;
1295       }
1296 
1297       case BalsaFrameEnums::READING_CONTENT:
1298         while ((content_length_remaining_ != 0u) && current < end) {
1299           // read in the content
1300           const size_t bytes_remaining = end - current;
1301           const size_t consumed_bytes =
1302               (content_length_remaining_ < bytes_remaining)
1303                   ? content_length_remaining_
1304                   : bytes_remaining;
1305           visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1306           visitor_->OnBodyChunkInput(
1307               absl::string_view(current, consumed_bytes));
1308           current += consumed_bytes;
1309           content_length_remaining_ -= consumed_bytes;
1310         }
1311         if (content_length_remaining_ == 0) {
1312           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1313           visitor_->MessageDone();
1314         }
1315         return current - input;
1316 
1317       default:
1318         // The state-machine should never be in a state that isn't handled
1319         // above.  This is a glaring logic error, and we should do something
1320         // drastic to ensure that this gets looked-at and fixed.
1321         QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_  // COV_NF_LINE
1322                           << " memory corruption?!";            // COV_NF_LINE
1323     }
1324   }
1325 }
1326 
1327 const int32_t BalsaFrame::kValidTerm1;
1328 const int32_t BalsaFrame::kValidTerm1Mask;
1329 const int32_t BalsaFrame::kValidTerm2;
1330 const int32_t BalsaFrame::kValidTerm2Mask;
1331 
1332 }  // namespace quiche
1333 
1334 #undef CHAR_LT
1335 #undef CHAR_LE
1336 #undef CHAR_GT
1337 #undef CHAR_GE
1338 #undef QUICHE_DCHECK_CHAR_GE
1339