• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "quiche/balsa/balsa_frame.h"
6 
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstring>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 
16 #include "absl/strings/match.h"
17 #include "absl/strings/numbers.h"
18 #include "absl/strings/string_view.h"
19 #include "quiche/balsa/balsa_enums.h"
20 #include "quiche/balsa/balsa_headers.h"
21 #include "quiche/balsa/balsa_visitor_interface.h"
22 #include "quiche/balsa/header_properties.h"
23 #include "quiche/common/platform/api/quiche_logging.h"
24 
25 // When comparing characters (other than == and !=), cast to unsigned char
26 // to make sure values above 127 rank as expected, even on platforms where char
27 // is signed and thus such values are represented as negative numbers before the
28 // cast.
29 #define CHAR_LT(a, b) \
30   (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
31 #define CHAR_LE(a, b) \
32   (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
33 #define CHAR_GT(a, b) \
34   (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
35 #define CHAR_GE(a, b) \
36   (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
37 #define QUICHE_DCHECK_CHAR_GE(a, b) \
38   QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
39 
40 namespace quiche {
41 
42 namespace {
43 
44 constexpr size_t kContinueStatusCode = 100;
45 constexpr size_t kSwitchingProtocolsStatusCode = 101;
46 
47 constexpr absl::string_view kChunked = "chunked";
48 constexpr absl::string_view kContentLength = "content-length";
49 constexpr absl::string_view kIdentity = "identity";
50 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
51 
IsInterimResponse(size_t response_code)52 bool IsInterimResponse(size_t response_code) {
53   return response_code >= 100 && response_code < 200;
54 }
55 
56 }  // namespace
57 
Reset()58 void BalsaFrame::Reset() {
59   last_char_was_slash_r_ = false;
60   saw_non_newline_char_ = false;
61   start_was_space_ = true;
62   chunk_length_character_extracted_ = false;
63   // is_request_ = true;               // not reset between messages.
64   allow_reading_until_close_for_request_ = false;
65   // request_was_head_ = false;        // not reset between messages.
66   // max_header_length_ = 16 * 1024;   // not reset between messages.
67   // visitor_ = &do_nothing_visitor_;  // not reset between messages.
68   chunk_length_remaining_ = 0;
69   content_length_remaining_ = 0;
70   last_slash_n_idx_ = 0;
71   term_chars_ = 0;
72   parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
73   last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
74   invalid_chars_.clear();
75   lines_.clear();
76   if (continue_headers_ != nullptr) {
77     continue_headers_->Clear();
78   }
79   if (headers_ != nullptr) {
80     headers_->Clear();
81   }
82   trailer_lines_.clear();
83   start_of_trailer_line_ = 0;
84   trailer_length_ = 0;
85   if (trailer_ != nullptr) {
86     trailer_->Clear();
87   }
88   if (trailers_ != nullptr) {
89     trailers_->Clear();
90   }
91 }
92 
93 namespace {
94 
95 // Within the line bounded by [current, end), parses a single "island",
96 // comprising a (possibly empty) span of whitespace followed by a (possibly
97 // empty) span of non-whitespace.
98 //
99 // Returns a pointer to the first whitespace character beyond this island, or
100 // returns end if no additional whitespace characters are present after this
101 // island.  (I.e., returnvalue == end || *returnvalue > ' ')
102 //
103 // Upon return, the whitespace span are the characters
104 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
105 // non-whitespace span are the characters whose indices fall in
106 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)107 inline const char* ParseOneIsland(const char* current, const char* begin,
108                                   const char* end, size_t* first_whitespace,
109                                   size_t* first_nonwhite) {
110   *first_whitespace = current - begin;
111   while (current < end && CHAR_LE(*current, ' ')) {
112     ++current;
113   }
114   *first_nonwhite = current - begin;
115   while (current < end && CHAR_GT(*current, ' ')) {
116     ++current;
117   }
118   return current;
119 }
120 
121 }  // namespace
122 
123 // Summary:
124 //     Parses the first line of either a request or response.
125 //     Note that in the case of a detected warning, error_code will be set
126 //   but the function will not return false.
127 //     Exactly zero or one warning or error (but not both) may be detected
128 //   by this function.
129 //     Note that this function will not write the data of the first-line
130 //   into the header's buffer (that should already have been done elsewhere).
131 //
132 // Pre-conditions:
133 //     begin != end
134 //     *begin should be a character which is > ' '. This implies that there
135 //   is at least one non-whitespace characters between [begin, end).
136 //   headers is a valid pointer to a BalsaHeaders class.
137 //     error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
138 //     Entire first line must exist between [begin, end)
139 //     Exactly zero or one newlines -may- exist between [begin, end)
140 //     [begin, end) should exist in the header's buffer.
141 //
142 // Side-effects:
143 //   headers will be modified
144 //   error_code may be modified if either a warning or error is detected
145 //
146 // Returns:
147 //   True if no error (as opposed to warning) is detected.
148 //   False if an error (as opposed to warning) is detected.
149 
150 //
151 // If there is indeed non-whitespace in the line, then the following
152 // will take care of this for you:
153 //  while (*begin <= ' ') ++begin;
154 //  ProcessFirstLine(begin, end, is_request, &headers, &error_code);
155 //
156 
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)157 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
158                         BalsaHeaders* headers,
159                         BalsaFrameEnums::ErrorCode* error_code) {
160   while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
161     --end;
162   }
163 
164   const char* current =
165       ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
166                      &headers->non_whitespace_1_idx_);
167   current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
168                            &headers->non_whitespace_2_idx_);
169   current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
170                            &headers->non_whitespace_3_idx_);
171 
172   // Clean up any trailing whitespace that comes after the third island
173   const char* last = end;
174   while (current <= last && CHAR_LE(*last, ' ')) {
175     --last;
176   }
177   headers->whitespace_4_idx_ = last - begin + 1;
178 
179   // Either the passed-in line is empty, or it starts with a non-whitespace
180   // character.
181   QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
182 
183   QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
184   QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
185 
186   // If the line isn't empty, it has at least one non-whitespace character (see
187   // first QUICHE_DCHECK), which will have been identified as a non-empty
188   // [non_whitespace_1_idx_, whitespace_2_idx_).
189   QUICHE_DCHECK(begin == end ||
190                 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
191 
192   if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
193     // This error may be triggered if the second token is empty, OR there's no
194     // WS after the first token; we don't bother to distinguish exactly which.
195     // (I'm not sure why we distinguish different kinds of parse error at all,
196     // actually.)
197     // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
198     // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
199     *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
200         BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
201         static_cast<int>(is_request));
202     if (!is_request) {  // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
203       return false;
204     }
205   }
206   if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
207     if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
208       // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
209       // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
210       *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
211           BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
212           static_cast<int>(is_request));
213     }
214   }
215 
216   if (!is_request) {
217     headers->parsed_response_code_ = 0;
218     // If the response code is non-empty:
219     if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
220       if (!absl::SimpleAtoi(
221               absl::string_view(begin + headers->non_whitespace_2_idx_,
222                                 headers->non_whitespace_3_idx_ -
223                                     headers->non_whitespace_2_idx_),
224               &headers->parsed_response_code_)) {
225         *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
226         return false;
227       }
228     }
229   }
230 
231   return true;
232 }
233 
234 // begin - beginning of the firstline
235 // end - end of the firstline
236 //
237 // A precondition for this function is that there is non-whitespace between
238 // [begin, end). If this precondition is not met, the function will not perform
239 // as expected (and bad things may happen, and it will eat your first, second,
240 // and third unborn children!).
241 //
242 // Another precondition for this function is that [begin, end) includes
243 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)244 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
245   BalsaFrameEnums::ErrorCode previous_error = last_error_;
246   if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
247     parse_state_ = BalsaFrameEnums::ERROR;
248     HandleError(last_error_);
249     return;
250   }
251   if (previous_error != last_error_) {
252     HandleWarning(last_error_);
253   }
254 
255   const absl::string_view line_input(
256       begin + headers_->non_whitespace_1_idx_,
257       headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
258   const absl::string_view part1(
259       begin + headers_->non_whitespace_1_idx_,
260       headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
261   const absl::string_view part2(
262       begin + headers_->non_whitespace_2_idx_,
263       headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
264   const absl::string_view part3(
265       begin + headers_->non_whitespace_3_idx_,
266       headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
267 
268   if (is_request_) {
269     visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
270     if (part3.empty()) {
271       parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
272     }
273     return;
274   }
275 
276   visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
277 }
278 
279 // 'stream_begin' points to the first character of the headers buffer.
280 // 'line_begin' points to the first character of the line.
281 // 'current' points to a char which is ':'.
282 // 'line_end' points to the position of '\n' + 1.
283 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)284 void BalsaFrame::CleanUpKeyValueWhitespace(
285     const char* stream_begin, const char* line_begin, const char* current,
286     const char* line_end, HeaderLineDescription* current_header_line) {
287   const char* colon_loc = current;
288   QUICHE_DCHECK_LT(colon_loc, line_end);
289   QUICHE_DCHECK_EQ(':', *colon_loc);
290   QUICHE_DCHECK_EQ(':', *current);
291   QUICHE_DCHECK_CHAR_GE(' ', *line_end)
292       << "\"" << std::string(line_begin, line_end) << "\"";
293 
294   --current;
295   while (current > line_begin && CHAR_LE(*current, ' ')) {
296     --current;
297   }
298   current += static_cast<int>(current != colon_loc);
299   current_header_line->key_end_idx = current - stream_begin;
300 
301   current = colon_loc;
302   QUICHE_DCHECK_EQ(':', *current);
303   ++current;
304   while (current < line_end && CHAR_LE(*current, ' ')) {
305     ++current;
306   }
307   current_header_line->value_begin_idx = current - stream_begin;
308 
309   QUICHE_DCHECK_GE(current_header_line->key_end_idx,
310                    current_header_line->first_char_idx);
311   QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
312                    current_header_line->key_end_idx);
313   QUICHE_DCHECK_GE(current_header_line->last_char_idx,
314                    current_header_line->value_begin_idx);
315 }
316 
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)317 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
318                                                 bool is_trailer,
319                                                 BalsaHeaders* headers) {
320   QUICHE_DCHECK(!lines.empty());
321   const char* stream_begin = headers->OriginalHeaderStreamBegin();
322   // The last line is always just a newline (and is uninteresting).
323   const Lines::size_type lines_size_m1 = lines.size() - 1;
324   // For a trailer, there is no first line, so lines[0] is the first header.
325   // For real headers, the first line takes lines[0], so real header starts
326   // at index 1.
327   int first_header_idx = (is_trailer ? 0 : 1);
328   const char* current = stream_begin + lines[first_header_idx].first;
329   // This code is a bit more subtle than it may appear at first glance.
330   // This code looks for a colon in the current line... but it also looks
331   // beyond the current line. If there is no colon in the current line, then
332   // for each subsequent line (until the colon which -has- been found is
333   // associated with a line), no searching for a colon will be performed. In
334   // this way, we minimize the amount of bytes we have scanned for a colon.
335   for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
336     const char* line_begin = stream_begin + lines[i].first;
337 
338     // Here we handle possible continuations.  Note that we do not replace
339     // the '\n' in the line before a continuation (at least, as of now),
340     // which implies that any code which looks for a value must deal with
341     // "\r\n", etc -within- the line (and not just at the end of it).
342     for (++i; i < lines_size_m1; ++i) {
343       const char c = *(stream_begin + lines[i].first);
344       if (CHAR_GT(c, ' ')) {
345         // Not a continuation, so stop.  Note that if the 'original' i = 1,
346         // and the next line is not a continuation, we'll end up with i = 2
347         // when we break. This handles the incrementing of i for the outer
348         // loop.
349         break;
350       }
351 
352       // Space and tab are valid starts to continuation lines.
353       // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
354       // can choose to reject or normalize continuation lines.
355       if ((c != ' ' && c != '\t') ||
356           http_validation_policy().disallow_header_continuation_lines) {
357         HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
358                                : BalsaFrameEnums::INVALID_HEADER_FORMAT);
359         return false;
360       }
361 
362       // If disallow_header_continuation_lines() is false, we neither reject nor
363       // normalize continuation lines, in violation of RFC7230.
364     }
365     const char* line_end = stream_begin + lines[i - 1].second;
366     QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
367 
368     // We cleanup the whitespace at the end of the line before doing anything
369     // else of interest as it allows us to do nothing when irregularly formatted
370     // headers are parsed (e.g. those with only keys, only values, or no colon).
371     //
372     // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
373     --line_end;
374     QUICHE_DCHECK_EQ('\n', *line_end)
375         << "\"" << std::string(line_begin, line_end) << "\"";
376     while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
377       --line_end;
378     }
379     ++line_end;
380     QUICHE_DCHECK_CHAR_GE(' ', *line_end);
381     QUICHE_DCHECK_LT(line_begin, line_end);
382 
383     // We use '0' for the block idx, because we're always writing to the first
384     // block from the framer (we do this because the framer requires that the
385     // entire header sequence be in a contiguous buffer).
386     headers->header_lines_.push_back(HeaderLineDescription(
387         line_begin - stream_begin, line_end - stream_begin,
388         line_end - stream_begin, line_end - stream_begin, 0));
389     if (current >= line_end) {
390       if (http_validation_policy().require_header_colon) {
391         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
392                                : BalsaFrameEnums::HEADER_MISSING_COLON);
393         return false;
394       }
395       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
396                                : BalsaFrameEnums::HEADER_MISSING_COLON);
397       // Then the next colon will not be found within this header line-- time
398       // to try again with another header-line.
399       continue;
400     }
401     if (current < line_begin) {
402       // When this condition is true, the last detected colon was part of a
403       // previous line.  We reset to the beginning of the line as we don't care
404       // about the presence of any colon before the beginning of the current
405       // line.
406       current = line_begin;
407     }
408     for (; current < line_end; ++current) {
409       if (*current == ':') {
410         break;
411       }
412 
413       // Generally invalid characters were found earlier.
414       if (http_validation_policy().disallow_double_quote_in_header_name) {
415         if (header_properties::IsInvalidHeaderKeyChar(*current)) {
416           HandleError(is_trailer
417                           ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
418                           : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
419           return false;
420         }
421       } else if (header_properties::IsInvalidHeaderKeyCharAllowDoubleQuote(
422                      *current)) {
423         HandleError(is_trailer
424                         ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
425                         : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
426         return false;
427       }
428     }
429 
430     if (current == line_end) {
431       // There was no colon in the line. The arguments we passed into the
432       // construction for the HeaderLineDescription object should be OK-- it
433       // assumes that the entire content is 'key' by default (which is true, as
434       // there was no colon, there can be no value). Note that this is a
435       // construct which is technically not allowed by the spec.
436 
437       // In strict mode, we do treat this invalid value-less key as an error.
438       if (http_validation_policy().require_header_colon) {
439         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
440                                : BalsaFrameEnums::HEADER_MISSING_COLON);
441         return false;
442       }
443       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
444                                : BalsaFrameEnums::HEADER_MISSING_COLON);
445       continue;
446     }
447 
448     QUICHE_DCHECK_EQ(*current, ':');
449     QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
450     QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
451 
452     HeaderLineDescription& current_header_line = headers->header_lines_.back();
453     current_header_line.key_end_idx = current - stream_begin;
454     current_header_line.value_begin_idx = current_header_line.key_end_idx;
455     if (current < line_end) {
456       ++current_header_line.key_end_idx;
457 
458       CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
459                                 &current_header_line);
460     }
461 
462     const absl::string_view key(
463         stream_begin + current_header_line.first_char_idx,
464         current_header_line.key_end_idx - current_header_line.first_char_idx);
465     const absl::string_view value(
466         stream_begin + current_header_line.value_begin_idx,
467         current_header_line.last_char_idx -
468             current_header_line.value_begin_idx);
469     visitor_->OnHeader(key, value);
470   }
471 
472   return true;
473 }
474 
HandleWarning(BalsaFrameEnums::ErrorCode error_code)475 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
476   last_error_ = error_code;
477   visitor_->HandleWarning(last_error_);
478 }
479 
HandleError(BalsaFrameEnums::ErrorCode error_code)480 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
481   last_error_ = error_code;
482   parse_state_ = BalsaFrameEnums::ERROR;
483   visitor_->HandleError(last_error_);
484 }
485 
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)486 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
487     HeaderLines::size_type line_idx, size_t* length) {
488   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
489   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
490   const char* line_end = stream_begin + header_line.last_char_idx;
491   const char* value_begin = (stream_begin + header_line.value_begin_idx);
492 
493   if (value_begin >= line_end) {
494     // There is no non-whitespace value data.
495     QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
496     return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
497   }
498 
499   *length = 0;
500   while (value_begin < line_end) {
501     if (*value_begin < '0' || *value_begin > '9') {
502       // bad! content-length found, and couldn't parse all of it!
503       QUICHE_DVLOG(1)
504           << "invalid content-length - non numeric character detected";
505       return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
506     }
507     const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
508     size_t length_x_10 = *length * 10;
509     const size_t c = *value_begin - '0';
510     if (*length > kMaxDiv10 ||
511         (std::numeric_limits<size_t>::max() - length_x_10) < c) {
512       QUICHE_DVLOG(1) << "content-length overflow";
513       return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
514     }
515     *length = length_x_10 + c;
516     ++value_begin;
517   }
518   QUICHE_DVLOG(1) << "content_length parsed: " << *length;
519   return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
520 }
521 
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)522 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
523   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
524   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
525   const absl::string_view transfer_encoding(
526       stream_begin + header_line.value_begin_idx,
527       header_line.last_char_idx - header_line.value_begin_idx);
528 
529   if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
530     headers_->transfer_encoding_is_chunked_ = true;
531     return;
532   }
533 
534   if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
535     headers_->transfer_encoding_is_chunked_ = false;
536     return;
537   }
538 
539   if (http_validation_policy().validate_transfer_encoding) {
540     HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
541   }
542 }
543 
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)544 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
545                                                  const BalsaHeaders* headers) {
546   // Read from the beginning of the first line to the end of the last line.
547   // Note we need to add the first line's offset as in the case of a trailer
548   // it's non-zero.
549   const char* stream_begin =
550       headers->OriginalHeaderStreamBegin() + lines.front().first;
551   const char* stream_end =
552       headers->OriginalHeaderStreamBegin() + lines.back().second;
553   bool found_invalid = false;
554 
555   for (const char* c = stream_begin; c < stream_end; c++) {
556     if (header_properties::IsInvalidHeaderChar(*c)) {
557       found_invalid = true;
558       invalid_chars_[*c]++;
559     }
560   }
561 
562   return found_invalid;
563 }
564 
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)565 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
566                                     BalsaHeaders* headers) {
567   QUICHE_DCHECK(!lines.empty());
568   QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
569 
570   if ((is_request() || http_validation_policy()
571                            .disallow_invalid_header_characters_in_response) &&
572       track_invalid_chars()) {
573     if (CheckHeaderLinesForInvalidChars(lines, headers)) {
574       if (invalid_chars_error_enabled()) {
575         HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
576         return;
577       }
578 
579       HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
580     }
581   }
582 
583   // There is no need to attempt to process headers (resp. trailers)
584   // if no header (resp. trailer) lines exist.
585   //
586   // The last line of the message, which is an empty line, is never a header
587   // (resp. trailer) line.  Furthermore, the first line of the message is not
588   // a header line.  Therefore there are at least two (resp. one) lines in the
589   // message which are not header (resp. trailer) lines.
590   //
591   // Thus, we test to see if we have more than two (resp. one) lines total
592   // before attempting to parse any header (resp. trailer) lines.
593   if (lines.size() <= (is_trailer ? 1 : 2)) {
594     return;
595   }
596 
597   HeaderLines::size_type content_length_idx = 0;
598   HeaderLines::size_type transfer_encoding_idx = 0;
599   const char* stream_begin = headers->OriginalHeaderStreamBegin();
600   // Parse the rest of the header or trailer data into key-value pairs.
601   if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
602     return;
603   }
604   // At this point, we've parsed all of the headers/trailers.  Time to look
605   // for those headers which we require for framing or for format errors.
606   const HeaderLines::size_type lines_size = headers->header_lines_.size();
607   for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
608     const HeaderLineDescription& line = headers->header_lines_[i];
609     const absl::string_view key(stream_begin + line.first_char_idx,
610                                 line.key_end_idx - line.first_char_idx);
611     QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
612 
613     // If a header begins with either lowercase or uppercase 'c' or 't', then
614     // the header may be one of content-length, connection, content-encoding
615     // or transfer-encoding. These headers are special, as they change the way
616     // that the message is framed, and so the framer is required to search
617     // for them.  However, first check for a formatting error, and skip
618     // special header treatment on trailer lines (when is_trailer is true).
619     if (key.empty() || key[0] == ' ') {
620       parse_state_ = BalsaFrameEnums::ERROR;
621       HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
622                              : BalsaFrameEnums::INVALID_HEADER_FORMAT);
623       return;
624     }
625     if (is_trailer) {
626       continue;
627     }
628     if (absl::EqualsIgnoreCase(key, kContentLength)) {
629       size_t length = 0;
630       BalsaHeadersEnums::ContentLengthStatus content_length_status =
631           ProcessContentLengthLine(i, &length);
632       if (content_length_idx == 0) {
633         content_length_idx = i + 1;
634         headers->content_length_status_ = content_length_status;
635         headers->content_length_ = length;
636         content_length_remaining_ = length;
637         continue;
638       }
639       if ((headers->content_length_status_ != content_length_status) ||
640           ((headers->content_length_status_ ==
641             BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
642            (http_validation_policy().disallow_multiple_content_length ||
643             length != headers->content_length_))) {
644         HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
645         return;
646       }
647       continue;
648     }
649     if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
650       if (http_validation_policy().validate_transfer_encoding &&
651           transfer_encoding_idx != 0) {
652         HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
653         return;
654       }
655       transfer_encoding_idx = i + 1;
656     }
657   }
658 
659   if (!is_trailer) {
660     if (http_validation_policy().validate_transfer_encoding &&
661         http_validation_policy()
662             .disallow_transfer_encoding_with_content_length &&
663         content_length_idx != 0 && transfer_encoding_idx != 0) {
664       HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
665       return;
666     }
667     if (headers->transfer_encoding_is_chunked_) {
668       headers->content_length_ = 0;
669       headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
670       content_length_remaining_ = 0;
671     }
672     if (transfer_encoding_idx != 0) {
673       ProcessTransferEncodingLine(transfer_encoding_idx - 1);
674     }
675   }
676 }
677 
AssignParseStateAfterHeadersHaveBeenParsed()678 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
679   // For responses, can't have a body if the request was a HEAD, or if it is
680   // one of these response-codes.  rfc2616 section 4.3
681   parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
682   int response_code = headers_->parsed_response_code_;
683   if (!is_request_ && (request_was_head_ ||
684                        !BalsaHeaders::ResponseCanHaveBody(response_code))) {
685     // There is no body.
686     return;
687   }
688 
689   if (headers_->transfer_encoding_is_chunked_) {
690     // Note that
691     // if ( Transfer-Encoding: chunked &&  Content-length: )
692     // then Transfer-Encoding: chunked trumps.
693     // This is as specified in the spec.
694     // rfc2616 section 4.4.3
695     parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
696     return;
697   }
698 
699   // Errors parsing content-length definitely can cause
700   // protocol errors/warnings
701   switch (headers_->content_length_status_) {
702     // If we have a content-length, and it is parsed
703     // properly, there are two options.
704     // 1) zero content, in which case the message is done, and
705     // 2) nonzero content, in which case we have to
706     //    consume the body.
707     case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
708       if (headers_->content_length_ == 0) {
709         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
710       } else {
711         parse_state_ = BalsaFrameEnums::READING_CONTENT;
712       }
713       break;
714     case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
715     case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
716       // If there were characters left-over after parsing the
717       // content length, we should flag an error and stop.
718       HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
719       break;
720       // We can have: no transfer-encoding, no content length, and no
721       // connection: close...
722       // Unfortunately, this case doesn't seem to be covered in the spec.
723       // We'll assume that the safest thing to do here is what the google
724       // binaries before 2008 already do, which is to assume that
725       // everything until the connection is closed is body.
726     case BalsaHeadersEnums::NO_CONTENT_LENGTH:
727       if (is_request_) {
728         const absl::string_view method = headers_->request_method();
729         // POSTs and PUTs should have a detectable body length.  If they
730         // do not we consider it an error.
731         if ((method != "POST" && method != "PUT") ||
732             !http_validation_policy().require_content_length_if_body_required) {
733           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
734           break;
735         } else if (!allow_reading_until_close_for_request_) {
736           HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
737           break;
738         }
739       }
740       parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
741       HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
742       break;
743       // The COV_NF_... statements here provide hints to the apparatus
744       // which computes coverage reports/ratios that this code is never
745       // intended to be executed, and should technically be impossible.
746       // COV_NF_START
747     default:
748       QUICHE_LOG(FATAL) << "Saw a content_length_status: "
749                         << headers_->content_length_status_
750                         << " which is unknown.";
751       // COV_NF_END
752   }
753 }
754 
ProcessHeaders(const char * message_start,size_t message_length)755 size_t BalsaFrame::ProcessHeaders(const char* message_start,
756                                   size_t message_length) {
757   const char* const original_message_start = message_start;
758   const char* const message_end = message_start + message_length;
759   const char* message_current = message_start;
760   const char* checkpoint = message_start;
761 
762   if (message_length == 0) {
763     return message_current - original_message_start;
764   }
765 
766   while (message_current < message_end) {
767     size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
768 
769     // Yes, we could use strchr (assuming null termination), or
770     // memchr, but as it turns out that is slower than this tight loop
771     // for the input that we see.
772     if (!saw_non_newline_char_) {
773       do {
774         const char c = *message_current;
775         if (c != '\r' && c != '\n') {
776           if (CHAR_LE(c, ' ')) {
777             HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
778             return message_current - original_message_start;
779           }
780           break;
781         }
782         ++message_current;
783         if (message_current == message_end) {
784           return message_current - original_message_start;
785         }
786       } while (true);
787       saw_non_newline_char_ = true;
788       message_start = message_current;
789       checkpoint = message_current;
790     }
791     while (message_current < message_end) {
792       if (*message_current != '\n') {
793         ++message_current;
794         continue;
795       }
796       const size_t relative_idx = message_current - message_start;
797       const size_t message_current_idx = 1 + base_idx + relative_idx;
798       lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
799       if (lines_.size() == 1) {
800         headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
801         checkpoint = message_current + 1;
802         const char* begin = headers_->OriginalHeaderStreamBegin();
803 
804         QUICHE_DVLOG(1) << "First line "
805                         << std::string(begin, lines_[0].second);
806         QUICHE_DVLOG(1) << "is_request_: " << is_request_;
807         ProcessFirstLine(begin, begin + lines_[0].second);
808         if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
809           break;
810         }
811 
812         if (parse_state_ == BalsaFrameEnums::ERROR) {
813           return message_current - original_message_start;
814         }
815       }
816       const size_t chars_since_last_slash_n =
817           (message_current_idx - last_slash_n_idx_);
818       last_slash_n_idx_ = message_current_idx;
819       if (chars_since_last_slash_n > 2) {
820         // false positive.
821         ++message_current;
822         continue;
823       }
824       if ((chars_since_last_slash_n == 1) ||
825           (((message_current > message_start) &&
826             (*(message_current - 1) == '\r')) ||
827            (last_char_was_slash_r_))) {
828         break;
829       }
830       ++message_current;
831     }
832 
833     if (message_current == message_end) {
834       continue;
835     }
836 
837     ++message_current;
838     QUICHE_DCHECK(message_current >= message_start);
839     if (message_current > message_start) {
840       headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
841     }
842 
843     // Check if we have exceeded maximum headers length
844     // Although we check for this limit before and after we call this function
845     // we check it here as well to make sure that in case the visitor changed
846     // the max_header_length_ (for example after processing the first line)
847     // we handle it gracefully.
848     if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
849       HandleHeadersTooLongError();
850       return message_current - original_message_start;
851     }
852 
853     // Since we know that we won't be writing any more bytes of the header,
854     // we tell that to the headers object. The headers object may make
855     // more efficient allocation decisions when this is signaled.
856     headers_->DoneWritingFromFramer();
857     visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
858 
859     // Ok, now that we've written everything into our header buffer, it is
860     // time to process the header lines (extract proper values for headers
861     // which are important for framing).
862     ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
863     if (parse_state_ == BalsaFrameEnums::ERROR) {
864       return message_current - original_message_start;
865     }
866 
867     if (use_interim_headers_callback_ &&
868         IsInterimResponse(headers_->parsed_response_code()) &&
869         headers_->parsed_response_code() != kSwitchingProtocolsStatusCode) {
870       // Deliver headers from this interim response but reset everything else to
871       // prepare for the next set of headers. Skip 101 Switching Protocols
872       // because these are considered final headers for the current protocol.
873       visitor_->OnInterimHeaders(
874           std::make_unique<BalsaHeaders>(std::move(*headers_)));
875       Reset();
876       checkpoint = message_start = message_current;
877       continue;
878     }
879     if (continue_headers_ != nullptr &&
880         headers_->parsed_response_code_ == kContinueStatusCode) {
881       // Save the headers from this 100 Continue response but reset everything
882       // else to prepare for the next set of headers.
883       BalsaHeaders saved_continue_headers = std::move(*headers_);
884       Reset();
885       *continue_headers_ = std::move(saved_continue_headers);
886       visitor_->ContinueHeaderDone();
887       checkpoint = message_start = message_current;
888       continue;
889     }
890     AssignParseStateAfterHeadersHaveBeenParsed();
891     if (parse_state_ == BalsaFrameEnums::ERROR) {
892       return message_current - original_message_start;
893     }
894     visitor_->ProcessHeaders(*headers_);
895     visitor_->HeaderDone();
896     if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
897       visitor_->MessageDone();
898     }
899     return message_current - original_message_start;
900   }
901   // If we've gotten to here, it means that we've consumed all of the
902   // available input. We need to record whether or not the last character we
903   // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
904   // a header framing that is split across the two calls.
905   last_char_was_slash_r_ = (*(message_end - 1) == '\r');
906   QUICHE_DCHECK(message_current >= message_start);
907   if (message_current > message_start) {
908     headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
909   }
910   return message_current - original_message_start;
911 }
912 
BytesSafeToSplice() const913 size_t BalsaFrame::BytesSafeToSplice() const {
914   switch (parse_state_) {
915     case BalsaFrameEnums::READING_CHUNK_DATA:
916       return chunk_length_remaining_;
917     case BalsaFrameEnums::READING_UNTIL_CLOSE:
918       return std::numeric_limits<size_t>::max();
919     case BalsaFrameEnums::READING_CONTENT:
920       return content_length_remaining_;
921     default:
922       return 0;
923   }
924 }
925 
BytesSpliced(size_t bytes_spliced)926 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
927   switch (parse_state_) {
928     case BalsaFrameEnums::READING_CHUNK_DATA:
929       if (chunk_length_remaining_ < bytes_spliced) {
930         HandleError(BalsaFrameEnums::
931                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
932         return;
933       }
934       chunk_length_remaining_ -= bytes_spliced;
935       if (chunk_length_remaining_ == 0) {
936         parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
937       }
938       return;
939 
940     case BalsaFrameEnums::READING_UNTIL_CLOSE:
941       return;
942 
943     case BalsaFrameEnums::READING_CONTENT:
944       if (content_length_remaining_ < bytes_spliced) {
945         HandleError(BalsaFrameEnums::
946                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
947         return;
948       }
949       content_length_remaining_ -= bytes_spliced;
950       if (content_length_remaining_ == 0) {
951         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
952         visitor_->MessageDone();
953       }
954       return;
955 
956     default:
957       HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
958       return;
959   }
960 }
961 
ProcessInput(const char * input,size_t size)962 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
963   const char* current = input;
964   const char* on_entry = current;
965   const char* end = current + size;
966 
967   QUICHE_DCHECK(headers_ != nullptr);
968   if (headers_ == nullptr) {
969     return 0;
970   }
971 
972   if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
973     const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
974     // Yes, we still have to check this here as the user can change the
975     // max_header_length amount!
976     // Also it is possible that we have reached the maximum allowed header size,
977     // and we have more to consume (remember we are still inside
978     // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
979     if (header_length > max_header_length_ ||
980         (header_length == max_header_length_ && size > 0)) {
981       HandleHeadersTooLongError();
982       return current - input;
983     }
984     const size_t bytes_to_process =
985         std::min(max_header_length_ - header_length, size);
986     current += ProcessHeaders(input, bytes_to_process);
987     // If we are still reading headers check if we have crossed the headers
988     // limit. Note that we check for >= as opposed to >. This is because if
989     // header_length_after equals max_header_length_ and we are still in the
990     // parse_state_  BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
991     // sure that the headers limit will be crossed later on
992     if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
993       // Note that headers_ is valid only if we are still reading headers.
994       const size_t header_length_after =
995           headers_->GetReadableBytesFromHeaderStream();
996       if (header_length_after >= max_header_length_) {
997         HandleHeadersTooLongError();
998       }
999     }
1000     return current - input;
1001   }
1002 
1003   if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1004       parse_state_ == BalsaFrameEnums::ERROR) {
1005     // Can do nothing more 'till we're reset.
1006     return current - input;
1007   }
1008 
1009   QUICHE_DCHECK_LE(current, end);
1010   if (current == end) {
1011     return current - input;
1012   }
1013 
1014   while (true) {
1015     switch (parse_state_) {
1016       case BalsaFrameEnums::READING_CHUNK_LENGTH:
1017         // In this state we read the chunk length.
1018         // Note that once we hit a character which is not in:
1019         // [0-9;A-Fa-f\n], we transition to a different state.
1020         //
1021         QUICHE_DCHECK_LE(current, end);
1022         while (true) {
1023           if (current == end) {
1024             visitor_->OnRawBodyInput(
1025                 absl::string_view(on_entry, current - on_entry));
1026             return current - input;
1027           }
1028 
1029           const char c = *current;
1030           ++current;
1031 
1032           static const signed char kBad = -1;
1033           static const signed char kDelimiter = -2;
1034 
1035           // valid cases:
1036           //  "09123\n"                      // -> 09123
1037           //  "09123\r\n"                    // -> 09123
1038           //  "09123  \n"                    // -> 09123
1039           //  "09123  \r\n"                  // -> 09123
1040           //  "09123  12312\n"               // -> 09123
1041           //  "09123  12312\r\n"             // -> 09123
1042           //  "09123; foo=bar\n"             // -> 09123
1043           //  "09123; foo=bar\r\n"           // -> 09123
1044           //  "FFFFFFFFFFFFFFFF\r\n"         // -> FFFFFFFFFFFFFFFF
1045           //  "FFFFFFFFFFFFFFFF 22\r\n"      // -> FFFFFFFFFFFFFFFF
1046           // invalid cases:
1047           // "[ \t]+[^\n]*\n"
1048           // "FFFFFFFFFFFFFFFFF\r\n"  (would overflow)
1049           // "\r\n"
1050           // "\n"
1051           signed char addition = kBad;
1052           // clang-format off
1053           switch (c) {
1054             case '0': addition = 0; break;
1055             case '1': addition = 1; break;
1056             case '2': addition = 2; break;
1057             case '3': addition = 3; break;
1058             case '4': addition = 4; break;
1059             case '5': addition = 5; break;
1060             case '6': addition = 6; break;
1061             case '7': addition = 7; break;
1062             case '8': addition = 8; break;
1063             case '9': addition = 9; break;
1064             case 'a': addition = 0xA; break;
1065             case 'b': addition = 0xB; break;
1066             case 'c': addition = 0xC; break;
1067             case 'd': addition = 0xD; break;
1068             case 'e': addition = 0xE; break;
1069             case 'f': addition = 0xF; break;
1070             case 'A': addition = 0xA; break;
1071             case 'B': addition = 0xB; break;
1072             case 'C': addition = 0xC; break;
1073             case 'D': addition = 0xD; break;
1074             case 'E': addition = 0xE; break;
1075             case 'F': addition = 0xF; break;
1076             case '\t':
1077             case '\n':
1078             case '\r':
1079             case ' ':
1080             case ';':
1081               addition = kDelimiter;
1082               break;
1083             default:
1084               // Leave addition == kBad
1085               break;
1086           }
1087           // clang-format on
1088           if (addition >= 0) {
1089             chunk_length_character_extracted_ = true;
1090             size_t length_x_16 = chunk_length_remaining_ * 16;
1091             const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1092             if ((chunk_length_remaining_ > kMaxDiv16) ||
1093                 (std::numeric_limits<size_t>::max() - length_x_16) <
1094                     static_cast<size_t>(addition)) {
1095               // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1096               visitor_->OnRawBodyInput(
1097                   absl::string_view(on_entry, current - on_entry));
1098               HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1099               return current - input;
1100             }
1101             chunk_length_remaining_ = length_x_16 + addition;
1102             continue;
1103           }
1104 
1105           if (!chunk_length_character_extracted_ || addition == kBad) {
1106             // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1107             // characters were converted, or an unexpected character was
1108             // seen.
1109             visitor_->OnRawBodyInput(
1110                 absl::string_view(on_entry, current - on_entry));
1111             HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1112             return current - input;
1113           }
1114 
1115           break;
1116         }
1117 
1118         --current;
1119         parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1120         visitor_->OnChunkLength(chunk_length_remaining_);
1121         continue;
1122 
1123       case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1124         // TODO(phython): Convert this scanning to be 16 bytes at a time if
1125         // there is data to be read.
1126         const char* extensions_start = current;
1127         size_t extensions_length = 0;
1128         QUICHE_DCHECK_LE(current, end);
1129         while (true) {
1130           if (current == end) {
1131             visitor_->OnChunkExtensionInput(
1132                 absl::string_view(extensions_start, extensions_length));
1133             visitor_->OnRawBodyInput(
1134                 absl::string_view(on_entry, current - on_entry));
1135             return current - input;
1136           }
1137           const char c = *current;
1138           if (c == '\r' || c == '\n') {
1139             extensions_length = (extensions_start == current)
1140                                     ? 0
1141                                     : current - extensions_start - 1;
1142           }
1143 
1144           ++current;
1145           if (c == '\n') {
1146             break;
1147           }
1148         }
1149 
1150         chunk_length_character_extracted_ = false;
1151         visitor_->OnChunkExtensionInput(
1152             absl::string_view(extensions_start, extensions_length));
1153 
1154         if (chunk_length_remaining_ != 0) {
1155           parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1156           continue;
1157         }
1158 
1159         HeaderFramingFound('\n');
1160         parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1161         continue;
1162       }
1163 
1164       case BalsaFrameEnums::READING_CHUNK_DATA:
1165         while (current < end) {
1166           if (chunk_length_remaining_ == 0) {
1167             break;
1168           }
1169           // read in the chunk
1170           size_t bytes_remaining = end - current;
1171           size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1172                                       ? chunk_length_remaining_
1173                                       : bytes_remaining;
1174           const char* tmp_current = current + consumed_bytes;
1175           visitor_->OnRawBodyInput(
1176               absl::string_view(on_entry, tmp_current - on_entry));
1177           visitor_->OnBodyChunkInput(
1178               absl::string_view(current, consumed_bytes));
1179           on_entry = current = tmp_current;
1180           chunk_length_remaining_ -= consumed_bytes;
1181         }
1182 
1183         if (chunk_length_remaining_ == 0) {
1184           parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1185           continue;
1186         }
1187 
1188         visitor_->OnRawBodyInput(
1189             absl::string_view(on_entry, current - on_entry));
1190         return current - input;
1191 
1192       case BalsaFrameEnums::READING_CHUNK_TERM:
1193         QUICHE_DCHECK_LE(current, end);
1194         while (true) {
1195           if (current == end) {
1196             visitor_->OnRawBodyInput(
1197                 absl::string_view(on_entry, current - on_entry));
1198             return current - input;
1199           }
1200 
1201           const char c = *current;
1202           ++current;
1203 
1204           if (c == '\n') {
1205             break;
1206           }
1207         }
1208         parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1209         continue;
1210 
1211       case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1212         QUICHE_DCHECK_LE(current, end);
1213         while (true) {
1214           if (current == end) {
1215             visitor_->OnRawBodyInput(
1216                 absl::string_view(on_entry, current - on_entry));
1217             return current - input;
1218           }
1219 
1220           const char c = *current;
1221           if (HeaderFramingFound(c) != 0) {
1222             // If we've found a "\r\n\r\n", then the message
1223             // is done.
1224             ++current;
1225             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1226             visitor_->OnRawBodyInput(
1227                 absl::string_view(on_entry, current - on_entry));
1228             visitor_->MessageDone();
1229             return current - input;
1230           }
1231 
1232           // If not, however, since the spec only suggests that the
1233           // client SHOULD indicate the presence of trailers, we get to
1234           // *test* that they did or didn't.
1235           // If all of the bytes we've seen since:
1236           //   OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1237           // are either '\r', or '\n', then we can assume that we don't yet
1238           // know if we need to parse headers, or if the next byte will make
1239           // the HeaderFramingFound condition (above) true.
1240           if (!HeaderFramingMayBeFound()) {
1241             break;
1242           }
1243 
1244           // If HeaderFramingMayBeFound(), then we have seen only characters
1245           // '\r' or '\n'.
1246           ++current;
1247 
1248           // Lets try again! There is no state change here.
1249         }
1250 
1251         // If (!HeaderFramingMayBeFound()), then we know that we must be
1252         // reading the first non CRLF character of a trailer.
1253         parse_state_ = BalsaFrameEnums::READING_TRAILER;
1254         visitor_->OnRawBodyInput(
1255             absl::string_view(on_entry, current - on_entry));
1256         on_entry = current;
1257         continue;
1258 
1259       // TODO(yongfa): No leading whitespace is allowed before field-name per
1260       // RFC2616. Leading whitespace will cause header parsing error too.
1261       case BalsaFrameEnums::READING_TRAILER:
1262         while (current < end) {
1263           const char c = *current;
1264           ++current;
1265           ++trailer_length_;
1266           if (GetTrailers() != nullptr) {
1267             // Reuse the header length limit for trailer, which is just a bunch
1268             // of headers.
1269             if (trailer_length_ > max_header_length_) {
1270               --current;
1271               HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1272               return current - input;
1273             }
1274             if (LineFramingFound(c)) {
1275               trailer_lines_.push_back(
1276                   std::make_pair(start_of_trailer_line_, trailer_length_));
1277               start_of_trailer_line_ = trailer_length_;
1278             }
1279           }
1280           if (HeaderFramingFound(c) != 0) {
1281             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1282             if (BalsaHeaders* trailers = GetTrailers(); trailers != nullptr) {
1283               trailers->WriteFromFramer(on_entry, current - on_entry);
1284               trailers->DoneWritingFromFramer();
1285               ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailers);
1286               if (parse_state_ == BalsaFrameEnums::ERROR) {
1287                 return current - input;
1288               }
1289               if (trailers_ != nullptr) {
1290                 visitor_->OnTrailers(std::move(trailers_));
1291 
1292                 // Allows trailers to be delivered without another call to
1293                 // EnableTrailers() in case the framer is Reset().
1294                 trailers_ = std::make_unique<BalsaHeaders>();
1295               } else {
1296                 visitor_->ProcessTrailers(*trailer_);
1297               }
1298             }
1299             visitor_->OnTrailerInput(
1300                 absl::string_view(on_entry, current - on_entry));
1301             visitor_->MessageDone();
1302             return current - input;
1303           }
1304         }
1305         if (BalsaHeaders* trailers = GetTrailers(); trailers != nullptr) {
1306           trailers->WriteFromFramer(on_entry, current - on_entry);
1307         }
1308         visitor_->OnTrailerInput(
1309             absl::string_view(on_entry, current - on_entry));
1310         return current - input;
1311 
1312       case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1313         const size_t bytes_remaining = end - current;
1314         if (bytes_remaining > 0) {
1315           visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1316           visitor_->OnBodyChunkInput(
1317               absl::string_view(current, bytes_remaining));
1318           current += bytes_remaining;
1319         }
1320         return current - input;
1321       }
1322 
1323       case BalsaFrameEnums::READING_CONTENT:
1324         while ((content_length_remaining_ != 0u) && current < end) {
1325           // read in the content
1326           const size_t bytes_remaining = end - current;
1327           const size_t consumed_bytes =
1328               (content_length_remaining_ < bytes_remaining)
1329                   ? content_length_remaining_
1330                   : bytes_remaining;
1331           visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1332           visitor_->OnBodyChunkInput(
1333               absl::string_view(current, consumed_bytes));
1334           current += consumed_bytes;
1335           content_length_remaining_ -= consumed_bytes;
1336         }
1337         if (content_length_remaining_ == 0) {
1338           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1339           visitor_->MessageDone();
1340         }
1341         return current - input;
1342 
1343       default:
1344         // The state-machine should never be in a state that isn't handled
1345         // above.  This is a glaring logic error, and we should do something
1346         // drastic to ensure that this gets looked-at and fixed.
1347         QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_  // COV_NF_LINE
1348                           << " memory corruption?!";            // COV_NF_LINE
1349     }
1350   }
1351 }
1352 
HandleHeadersTooLongError()1353 void BalsaFrame::HandleHeadersTooLongError() {
1354   if (parse_truncated_headers_even_when_headers_too_long_) {
1355     const size_t len = headers_->GetReadableBytesFromHeaderStream();
1356     const char* stream_begin = headers_->OriginalHeaderStreamBegin();
1357 
1358     if (last_slash_n_idx_ < len && stream_begin[last_slash_n_idx_] != '\r') {
1359       // We write an end to the truncated line, and a blank line to end the
1360       // headers, to end up with something that will parse.
1361       static const absl::string_view kTwoLineEnds = "\r\n\r\n";
1362       headers_->WriteFromFramer(kTwoLineEnds.data(), kTwoLineEnds.size());
1363 
1364       // This is the last, truncated line.
1365       lines_.push_back(std::make_pair(last_slash_n_idx_, len + 2));
1366       // A blank line to end the headers.
1367       lines_.push_back(std::make_pair(len + 2, len + 4));
1368     }
1369 
1370     ProcessHeaderLines(lines_, /*is_trailer=*/false, headers_);
1371   }
1372 
1373   HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
1374 }
1375 
GetTrailers() const1376 BalsaHeaders* BalsaFrame::GetTrailers() const {
1377   if (trailers_ != nullptr) {
1378     return trailers_.get();
1379   }
1380   return trailer_;
1381 }
1382 
1383 const int32_t BalsaFrame::kValidTerm1;
1384 const int32_t BalsaFrame::kValidTerm1Mask;
1385 const int32_t BalsaFrame::kValidTerm2;
1386 const int32_t BalsaFrame::kValidTerm2Mask;
1387 
1388 }  // namespace quiche
1389 
1390 #undef CHAR_LT
1391 #undef CHAR_LE
1392 #undef CHAR_GT
1393 #undef CHAR_GE
1394 #undef QUICHE_DCHECK_CHAR_GE
1395