1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // A lightweight implementation for storing HTTP headers.
6
7 #ifndef QUICHE_BALSA_BALSA_HEADERS_H_
8 #define QUICHE_BALSA_BALSA_HEADERS_H_
9
10 #include <cstddef>
11 #include <cstring>
12 #include <functional>
13 #include <iterator>
14 #include <memory>
15 #include <ostream>
16 #include <string>
17 #include <utility>
18 #include <vector>
19
20 #include "absl/container/flat_hash_map.h"
21 #include "absl/container/flat_hash_set.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/ascii.h"
24 #include "absl/strings/match.h"
25 #include "absl/strings/string_view.h"
26 #include "absl/types/optional.h"
27 #include "quiche/balsa/balsa_enums.h"
28 #include "quiche/balsa/header_api.h"
29 #include "quiche/balsa/standard_header_map.h"
30 #include "quiche/common/platform/api/quiche_bug_tracker.h"
31 #include "quiche/common/platform/api/quiche_export.h"
32 #include "quiche/common/platform/api/quiche_logging.h"
33
34 namespace gfe2 {
35 class Http2HeaderValidator;
36 } // namespace gfe2
37
38 namespace quiche {
39
40 namespace test {
41 class BalsaHeadersTestPeer;
42 } // namespace test
43
44 // WARNING:
45 // Note that -no- char* returned by any function in this
46 // file is null-terminated.
47
48 // This class exists to service the specific needs of BalsaHeaders.
49 //
50 // Functional goals:
51 // 1) provide a backing-store for all of the StringPieces that BalsaHeaders
52 // returns. Every StringPiece returned from BalsaHeaders should remain
53 // valid until the BalsaHeader's object is cleared, or the header-line is
54 // erased.
55 // 2) provide a backing-store for BalsaFrame, which requires contiguous memory
56 // for its fast-path parsing functions. Note that the cost of copying is
57 // less than the cost of requiring the parser to do slow-path parsing, as
58 // it would have to check for bounds every byte, instead of every 16 bytes.
59 //
60 // This class is optimized for the case where headers are stored in one of two
61 // buffers. It doesn't make a lot of effort to densely pack memory-- in fact,
62 // it -may- be somewhat memory inefficient. This possible inefficiency allows a
63 // certain simplicity of implementation and speed which makes it worthwhile.
64 // If, in the future, better memory density is required, it should be possible
65 // to reuse the abstraction presented by this object to achieve those goals.
66 //
67 // In the most common use-case, this memory inefficiency should be relatively
68 // small.
69 //
70 // Alternate implementations of BalsaBuffer may include:
71 // - vector of strings, one per header line (similar to HTTPHeaders)
72 // - densely packed strings:
73 // - keep a sorted array/map of free-space linked lists or numbers.
74 // - use the entry that most closely first your needs.
75 // - at this point, perhaps just use a vector of strings, and let
76 // the allocator do the right thing.
77 //
78 class QUICHE_EXPORT BalsaBuffer {
79 public:
80 static constexpr size_t kDefaultBlocksize = 4096;
81
82 // The BufferBlock is a structure used internally by the
83 // BalsaBuffer class to store the base buffer pointers to
84 // each block, as well as the important metadata for buffer
85 // sizes and bytes free. It *may* be possible to replace this
86 // with a vector<char>, but it's unclear whether moving a vector
87 // can invalidate pointers into it. LWG issue 2321 proposes to fix this.
88 struct QUICHE_EXPORT BufferBlock {
89 public:
90 std::unique_ptr<char[]> buffer;
91 size_t buffer_size = 0;
92 size_t bytes_free = 0;
93
bytes_usedBufferBlock94 size_t bytes_used() const { return buffer_size - bytes_free; }
start_of_unused_bytesBufferBlock95 char* start_of_unused_bytes() const { return buffer.get() + bytes_used(); }
96
BufferBlockBufferBlock97 BufferBlock() {}
98
BufferBlockBufferBlock99 BufferBlock(std::unique_ptr<char[]> buf, size_t size, size_t free)
100 : buffer(std::move(buf)), buffer_size(size), bytes_free(free) {}
101
102 BufferBlock(const BufferBlock&) = delete;
103 BufferBlock& operator=(const BufferBlock&) = delete;
104 BufferBlock(BufferBlock&&) = default;
105 BufferBlock& operator=(BufferBlock&&) = default;
106
107 // Note: allocating a fresh buffer even if we could reuse an old one may let
108 // us shed memory, and invalidates old StringPieces (making them easier to
109 // catch with asan).
CopyFromBufferBlock110 void CopyFrom(const BufferBlock& rhs) {
111 QUICHE_DCHECK(this != &rhs);
112 buffer_size = rhs.buffer_size;
113 bytes_free = rhs.bytes_free;
114 if (rhs.buffer == nullptr) {
115 buffer = nullptr;
116 } else {
117 buffer = std::make_unique<char[]>(buffer_size);
118 memcpy(buffer.get(), rhs.buffer.get(), rhs.bytes_used());
119 }
120 }
121 };
122
123 typedef std::vector<BufferBlock> Blocks;
124
BalsaBuffer()125 BalsaBuffer()
126 : blocksize_(kDefaultBlocksize), can_write_to_contiguous_buffer_(true) {}
127
BalsaBuffer(size_t blocksize)128 explicit BalsaBuffer(size_t blocksize)
129 : blocksize_(blocksize), can_write_to_contiguous_buffer_(true) {}
130
131 BalsaBuffer(const BalsaBuffer&) = delete;
132 BalsaBuffer& operator=(const BalsaBuffer&) = delete;
133 BalsaBuffer(BalsaBuffer&&) = default;
134 BalsaBuffer& operator=(BalsaBuffer&&) = default;
135
136 // Returns the total amount of memory reserved by the buffer blocks.
GetTotalBufferBlockSize()137 size_t GetTotalBufferBlockSize() const {
138 size_t buffer_size = 0;
139 for (Blocks::const_iterator iter = blocks_.begin(); iter != blocks_.end();
140 ++iter) {
141 buffer_size += iter->buffer_size;
142 }
143 return buffer_size;
144 }
145
146 // Returns the total amount of memory used by the buffer blocks.
GetTotalBytesUsed()147 size_t GetTotalBytesUsed() const {
148 size_t bytes_used = 0;
149 for (const auto& b : blocks_) {
150 bytes_used += b.bytes_used();
151 }
152 return bytes_used;
153 }
154
GetPtr(Blocks::size_type block_idx)155 const char* GetPtr(Blocks::size_type block_idx) const {
156 QUICHE_DCHECK_LT(block_idx, blocks_.size())
157 << block_idx << ", " << blocks_.size();
158 return block_idx >= blocks_.size() ? nullptr
159 : blocks_[block_idx].buffer.get();
160 }
161
GetPtr(Blocks::size_type block_idx)162 char* GetPtr(Blocks::size_type block_idx) {
163 QUICHE_DCHECK_LT(block_idx, blocks_.size())
164 << block_idx << ", " << blocks_.size();
165 return block_idx >= blocks_.size() ? nullptr
166 : blocks_[block_idx].buffer.get();
167 }
168
169 // This function is different from Reserve(), as it ensures that the data
170 // stored via subsequent calls to this function are all contiguous (and in
171 // the order in which these writes happened). This is essentially the same
172 // as a string append.
173 //
174 // You may call this function at any time between object
175 // construction/Clear(), and the calling of the
176 // NoMoreWriteToContiguousBuffer() function.
177 //
178 // You must not call this function after the NoMoreWriteToContiguousBuffer()
179 // function is called, unless a Clear() has been called since.
180 // If you do, the program will abort().
181 //
182 // This condition is placed upon this code so that calls to Reserve() can
183 // append to the buffer in the first block safely, and without invaliding
184 // the StringPiece which it returns.
185 //
186 // This function's main intended user is the BalsaFrame class, which,
187 // for reasons of efficiency, requires that the buffer from which it parses
188 // the headers be contiguous.
189 //
WriteToContiguousBuffer(absl::string_view sp)190 void WriteToContiguousBuffer(absl::string_view sp) {
191 if (sp.empty()) {
192 return;
193 }
194 QUICHE_CHECK(can_write_to_contiguous_buffer_);
195
196 if (blocks_.empty()) {
197 blocks_.push_back(AllocBlock());
198 }
199
200 QUICHE_DCHECK_GE(blocks_.size(), 1u);
201 if (blocks_[0].buffer == nullptr && sp.size() <= blocksize_) {
202 blocks_[0] = AllocBlock();
203 memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size());
204 } else if (blocks_[0].bytes_free < sp.size()) {
205 // the first block isn't big enough, resize it.
206 const size_t old_storage_size_used = blocks_[0].bytes_used();
207 // Increase to at least 2*old_storage_size_used; if sp.size() is larger,
208 // we'll increase by that amount.
209 const size_t new_storage_size =
210 old_storage_size_used + (old_storage_size_used < sp.size()
211 ? sp.size()
212 : old_storage_size_used);
213 std::unique_ptr<char[]> new_storage{new char[new_storage_size]};
214 char* old_storage = blocks_[0].buffer.get();
215 if (old_storage_size_used != 0u) {
216 memcpy(new_storage.get(), old_storage, old_storage_size_used);
217 }
218 memcpy(new_storage.get() + old_storage_size_used, sp.data(), sp.size());
219 blocks_[0].buffer = std::move(new_storage);
220 blocks_[0].bytes_free = new_storage_size - old_storage_size_used;
221 blocks_[0].buffer_size = new_storage_size;
222 } else {
223 memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size());
224 }
225 blocks_[0].bytes_free -= sp.size();
226 }
227
NoMoreWriteToContiguousBuffer()228 void NoMoreWriteToContiguousBuffer() {
229 can_write_to_contiguous_buffer_ = false;
230 }
231
232 // Reserves "permanent" storage of the size indicated. Returns a pointer to
233 // the beginning of that storage, and assigns the index of the block used to
234 // block_buffer_idx. This function uses the first block IFF the
235 // NoMoreWriteToContiguousBuffer function has been called since the last
236 // Clear/Construction.
Reserve(size_t size,Blocks::size_type * block_buffer_idx)237 char* Reserve(size_t size, Blocks::size_type* block_buffer_idx) {
238 if (blocks_.empty()) {
239 blocks_.push_back(AllocBlock());
240 }
241
242 // There should always be a 'first_block', even if it
243 // contains nothing.
244 QUICHE_DCHECK_GE(blocks_.size(), 1u);
245 BufferBlock* block = nullptr;
246 Blocks::size_type block_idx = can_write_to_contiguous_buffer_ ? 1 : 0;
247 for (; block_idx < blocks_.size(); ++block_idx) {
248 if (blocks_[block_idx].bytes_free >= size) {
249 block = &blocks_[block_idx];
250 break;
251 }
252 }
253 if (block == nullptr) {
254 if (blocksize_ < size) {
255 blocks_.push_back(AllocCustomBlock(size));
256 } else {
257 blocks_.push_back(AllocBlock());
258 }
259 block = &blocks_.back();
260 }
261
262 char* storage = block->start_of_unused_bytes();
263 block->bytes_free -= size;
264 if (block_buffer_idx != nullptr) {
265 *block_buffer_idx = block_idx;
266 }
267 return storage;
268 }
269
Clear()270 void Clear() {
271 blocks_.clear();
272 blocks_.shrink_to_fit();
273 can_write_to_contiguous_buffer_ = true;
274 }
275
CopyFrom(const BalsaBuffer & b)276 void CopyFrom(const BalsaBuffer& b) {
277 blocks_.resize(b.blocks_.size());
278 for (Blocks::size_type i = 0; i < blocks_.size(); ++i) {
279 blocks_[i].CopyFrom(b.blocks_[i]);
280 }
281 blocksize_ = b.blocksize_;
282 can_write_to_contiguous_buffer_ = b.can_write_to_contiguous_buffer_;
283 }
284
StartOfFirstBlock()285 const char* StartOfFirstBlock() const {
286 QUICHE_BUG_IF(bug_if_1182_1, blocks_.empty())
287 << "First block not allocated yet!";
288 return blocks_.empty() ? nullptr : blocks_[0].buffer.get();
289 }
290
EndOfFirstBlock()291 const char* EndOfFirstBlock() const {
292 QUICHE_BUG_IF(bug_if_1182_2, blocks_.empty())
293 << "First block not allocated yet!";
294 return blocks_.empty() ? nullptr : blocks_[0].start_of_unused_bytes();
295 }
296
GetReadableBytesOfFirstBlock()297 size_t GetReadableBytesOfFirstBlock() const {
298 return blocks_.empty() ? 0 : blocks_[0].bytes_used();
299 }
300
can_write_to_contiguous_buffer()301 bool can_write_to_contiguous_buffer() const {
302 return can_write_to_contiguous_buffer_;
303 }
blocksize()304 size_t blocksize() const { return blocksize_; }
num_blocks()305 Blocks::size_type num_blocks() const { return blocks_.size(); }
buffer_size(size_t idx)306 size_t buffer_size(size_t idx) const { return blocks_[idx].buffer_size; }
bytes_used(size_t idx)307 size_t bytes_used(size_t idx) const { return blocks_[idx].bytes_used(); }
308
309 private:
AllocBlock()310 BufferBlock AllocBlock() { return AllocCustomBlock(blocksize_); }
311
AllocCustomBlock(size_t blocksize)312 BufferBlock AllocCustomBlock(size_t blocksize) {
313 return BufferBlock{std::make_unique<char[]>(blocksize), blocksize,
314 blocksize};
315 }
316
317 // A container of BufferBlocks
318 Blocks blocks_;
319
320 // The default allocation size for a block.
321 // In general, blocksize_ bytes will be allocated for
322 // each buffer.
323 size_t blocksize_;
324
325 // If set to true, then the first block cannot be used for Reserve() calls as
326 // the WriteToContiguous... function will modify the base pointer for this
327 // block, and the Reserve() calls need to be sure that the base pointer will
328 // not be changing in order to provide the user with StringPieces which
329 // continue to be valid.
330 bool can_write_to_contiguous_buffer_;
331 };
332
333 ////////////////////////////////////////////////////////////////////////////////
334
335 // All of the functions in the BalsaHeaders class use string pieces, by either
336 // using the StringPiece class, or giving an explicit size and char* (as these
337 // are the native representation for these string pieces).
338 // This is done for several reasons.
339 // 1) This minimizes copying/allocation/deallocation as compared to using
340 // string parameters
341 // 2) This reduces the number of strlen() calls done (as the length of any
342 // string passed in is relatively likely to be known at compile time, and for
343 // those strings passed back we obviate the need for a strlen() to determine
344 // the size of new storage allocations if a new allocation is required.
345 // 3) This class attempts to store all of its data in two linear buffers in
346 // order to enhance the speed of parsing and writing out to a buffer. As a
347 // result, many string pieces are -not- terminated by '\0', and are not
348 // c-strings. Since this is the case, we must delineate the length of the
349 // string explicitly via a length.
350 //
351 // WARNING: The side effect of using StringPiece is that if the underlying
352 // buffer changes (due to modifying the headers) the StringPieces which point
353 // to the data which was modified, may now contain "garbage", and should not
354 // be dereferenced.
355 // For example, If you fetch some component of the first-line, (request or
356 // response), and then you modify the first line, the StringPieces you
357 // originally received from the original first-line may no longer be valid).
358 //
359 // StringPieces pointing to pieces of header lines which have not been
360 // erased() or modified should be valid until the object is cleared or
361 // destroyed.
362 //
363 // Key comparisons are case-insensitive.
364
365 class QUICHE_EXPORT BalsaHeaders : public HeaderApi {
366 public:
367 // Each header line is parsed into a HeaderLineDescription, which maintains
368 // pointers into the BalsaBuffer.
369 //
370 // Succinctly describes one header line as indices into a buffer.
371 struct QUICHE_EXPORT HeaderLineDescription {
HeaderLineDescriptionHeaderLineDescription372 HeaderLineDescription(size_t first_character_index, size_t key_end_index,
373 size_t value_begin_index, size_t last_character_index,
374 size_t buffer_base_index)
375 : first_char_idx(first_character_index),
376 key_end_idx(key_end_index),
377 value_begin_idx(value_begin_index),
378 last_char_idx(last_character_index),
379 buffer_base_idx(buffer_base_index),
380 skip(false) {}
381
HeaderLineDescriptionHeaderLineDescription382 HeaderLineDescription()
383 : first_char_idx(0),
384 key_end_idx(0),
385 value_begin_idx(0),
386 last_char_idx(0),
387 buffer_base_idx(0),
388 skip(false) {}
389
KeyLengthHeaderLineDescription390 size_t KeyLength() const {
391 QUICHE_DCHECK_GE(key_end_idx, first_char_idx);
392 return key_end_idx - first_char_idx;
393 }
ValuesLengthHeaderLineDescription394 size_t ValuesLength() const {
395 QUICHE_DCHECK_GE(last_char_idx, value_begin_idx);
396 return last_char_idx - value_begin_idx;
397 }
398
399 size_t first_char_idx;
400 size_t key_end_idx;
401 size_t value_begin_idx;
402 size_t last_char_idx;
403 BalsaBuffer::Blocks::size_type buffer_base_idx;
404 bool skip;
405 };
406
407 using HeaderTokenList = std::vector<absl::string_view>;
408
409 // An iterator for walking through all the header lines.
410 class const_header_lines_iterator;
411
412 // An iterator that only stops at lines with a particular key
413 // (case-insensitive). See also GetIteratorForKey.
414 //
415 // Check against header_lines_key_end() to determine when iteration is
416 // finished. lines().end() will also work.
417 class const_header_lines_key_iterator;
418
419 // A simple class that can be used in a range-based for loop.
420 template <typename IteratorType>
421 class QUICHE_EXPORT iterator_range {
422 public:
423 using iterator = IteratorType;
424 using const_iterator = IteratorType;
425 using value_type = typename std::iterator_traits<IteratorType>::value_type;
426
iterator_range(IteratorType begin_iterator,IteratorType end_iterator)427 iterator_range(IteratorType begin_iterator, IteratorType end_iterator)
428 : begin_iterator_(std::move(begin_iterator)),
429 end_iterator_(std::move(end_iterator)) {}
430
begin()431 IteratorType begin() const { return begin_iterator_; }
end()432 IteratorType end() const { return end_iterator_; }
433
434 private:
435 IteratorType begin_iterator_, end_iterator_;
436 };
437
438 // Set of names of headers that might have multiple values.
439 // CoalesceOption::kCoalesce can be used to match Envoy behavior in
440 // WriteToBuffer().
441 using MultivaluedHeadersSet =
442 absl::flat_hash_set<absl::string_view, StringPieceCaseHash,
443 StringPieceCaseEqual>;
444
445 // Map of key => vector<value>, where vector contains ordered list of all
446 // values for |key| (ignoring the casing).
447 using MultivaluedHeadersValuesMap =
448 absl::flat_hash_map<absl::string_view, std::vector<absl::string_view>,
449 StringPieceCaseHash, StringPieceCaseEqual>;
450
BalsaHeaders()451 BalsaHeaders()
452 : balsa_buffer_(4096),
453 content_length_(0),
454 content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH),
455 parsed_response_code_(0),
456 firstline_buffer_base_idx_(0),
457 whitespace_1_idx_(0),
458 non_whitespace_1_idx_(0),
459 whitespace_2_idx_(0),
460 non_whitespace_2_idx_(0),
461 whitespace_3_idx_(0),
462 non_whitespace_3_idx_(0),
463 whitespace_4_idx_(0),
464 transfer_encoding_is_chunked_(false) {}
465
BalsaHeaders(size_t bufsize)466 explicit BalsaHeaders(size_t bufsize)
467 : balsa_buffer_(bufsize),
468 content_length_(0),
469 content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH),
470 parsed_response_code_(0),
471 firstline_buffer_base_idx_(0),
472 whitespace_1_idx_(0),
473 non_whitespace_1_idx_(0),
474 whitespace_2_idx_(0),
475 non_whitespace_2_idx_(0),
476 whitespace_3_idx_(0),
477 non_whitespace_3_idx_(0),
478 whitespace_4_idx_(0),
479 transfer_encoding_is_chunked_(false) {}
480
481 // Copying BalsaHeaders is expensive, so require that it be visible.
482 BalsaHeaders(const BalsaHeaders&) = delete;
483 BalsaHeaders& operator=(const BalsaHeaders&) = delete;
484 BalsaHeaders(BalsaHeaders&&) = default;
485 BalsaHeaders& operator=(BalsaHeaders&&) = default;
486
487 // Returns a range that represents all of the header lines.
488 iterator_range<const_header_lines_iterator> lines() const;
489
490 // Returns an iterator range consisting of the header lines matching key.
491 // String backing 'key' must remain valid for lifetime of range.
492 iterator_range<const_header_lines_key_iterator> lines(
493 absl::string_view key) const;
494
495 // Returns a forward-only iterator that only stops at lines matching key.
496 // String backing 'key' must remain valid for lifetime of iterator.
497 //
498 // Check returned iterator against header_lines_key_end() to determine when
499 // iteration is finished.
500 //
501 // Consider calling lines(key)--it may be more readable.
502 const_header_lines_key_iterator GetIteratorForKey(
503 absl::string_view key) const;
504
505 const_header_lines_key_iterator header_lines_key_end() const;
506
507 void erase(const const_header_lines_iterator& it);
508
509 void Clear();
510
511 // Explicit copy functions to avoid risk of accidental copies.
Copy()512 BalsaHeaders Copy() const {
513 BalsaHeaders copy;
514 copy.CopyFrom(*this);
515 return copy;
516 }
517 void CopyFrom(const BalsaHeaders& other);
518
519 // Replaces header entries with key 'key' if they exist, or appends
520 // a new header if none exist. See 'AppendHeader' below for additional
521 // comments about ContentLength and TransferEncoding headers. Note that this
522 // will allocate new storage every time that it is called.
523 void ReplaceOrAppendHeader(absl::string_view key,
524 absl::string_view value) override;
525
526 // Append a new header entry to the header object. Clients who wish to append
527 // Content-Length header should use SetContentLength() method instead of
528 // adding the content length header using AppendHeader (manually adding the
529 // content length header will not update the content_length_ and
530 // content_length_status_ values).
531 // Similarly, clients who wish to add or remove the transfer encoding header
532 // in order to apply or remove chunked encoding should use
533 // SetTransferEncodingToChunkedAndClearContentLength() or
534 // SetNoTransferEncoding() instead.
535 void AppendHeader(absl::string_view key, absl::string_view value) override;
536
537 // Appends ',value' to an existing header named 'key'. If no header with the
538 // correct key exists, it will call AppendHeader(key, value). Calling this
539 // function on a key which exists several times in the headers will produce
540 // unpredictable results.
541 void AppendToHeader(absl::string_view key, absl::string_view value) override;
542
543 // Appends ', value' to an existing header named 'key'. If no header with the
544 // correct key exists, it will call AppendHeader(key, value). Calling this
545 // function on a key which exists several times in the headers will produce
546 // unpredictable results.
547 void AppendToHeaderWithCommaAndSpace(absl::string_view key,
548 absl::string_view value) override;
549
550 // Returns the value corresponding to the given header key. Returns an empty
551 // string if the header key does not exist. For headers that may consist of
552 // multiple lines, use GetAllOfHeader() instead.
553 // Make the QuicheLowerCaseString overload visible,
554 // and only override the absl::string_view one.
555 using HeaderApi::GetHeader;
556 absl::string_view GetHeader(absl::string_view key) const override;
557
558 // Iterates over all currently valid header lines, appending their
559 // values into the vector 'out', in top-to-bottom order.
560 // Header-lines which have been erased are not currently valid, and
561 // will not have their values appended. Empty values will be
562 // represented as empty string. If 'key' doesn't exist in the headers at
563 // all, out will not be changed. We do not clear the vector out
564 // before adding new entries. If there are header lines with matching
565 // key but empty value then they are also added to the vector out.
566 // (Basically empty values are not treated in any special manner).
567 //
568 // Example:
569 // Input header:
570 // "GET / HTTP/1.0\r\n"
571 // "key1: v1\r\n"
572 // "key1: \r\n"
573 // "key1:\r\n"
574 // "key1: v1\r\n"
575 // "key1:v2\r\n"
576 //
577 // vector out is initially: ["foo"]
578 // vector out after GetAllOfHeader("key1", &out) is:
579 // ["foo", "v1", "", "", "v1", "v2"]
580 //
581 // See gfe::header_properties::IsMultivaluedHeader() for which headers
582 // GFE treats as being multivalued.
583
584 // Make the QuicheLowerCaseString overload visible,
585 // and only override the absl::string_view one.
586 using HeaderApi::GetAllOfHeader;
587 void GetAllOfHeader(absl::string_view key,
588 std::vector<absl::string_view>* out) const override;
589
590 // Same as above, but iterates over all header lines including removed ones.
591 // Appends their values into the vector 'out' in top-to-bottom order,
592 // first all valid headers then all that were removed.
593 void GetAllOfHeaderIncludeRemoved(absl::string_view key,
594 std::vector<absl::string_view>* out) const;
595
596 // Joins all values for `key` into a comma-separated string.
597 // Make the QuicheLowerCaseString overload visible,
598 // and only override the absl::string_view one.
599 using HeaderApi::GetAllOfHeaderAsString;
600 std::string GetAllOfHeaderAsString(absl::string_view key) const override;
601
602 // Determine if a given header is present. Case-insensitive.
HasHeader(absl::string_view key)603 inline bool HasHeader(absl::string_view key) const override {
604 return GetConstHeaderLinesIterator(key) != header_lines_.end();
605 }
606
607 // Goes through all headers with key 'key' and checks to see if one of the
608 // values is 'value'. Returns true if there are headers with the desired key
609 // and value, false otherwise. Case-insensitive for the key; case-sensitive
610 // for the value.
HeaderHasValue(absl::string_view key,absl::string_view value)611 bool HeaderHasValue(absl::string_view key,
612 absl::string_view value) const override {
613 return HeaderHasValueHelper(key, value, true);
614 }
615 // Same as above, but also case-insensitive for the value.
HeaderHasValueIgnoreCase(absl::string_view key,absl::string_view value)616 bool HeaderHasValueIgnoreCase(absl::string_view key,
617 absl::string_view value) const override {
618 return HeaderHasValueHelper(key, value, false);
619 }
620
621 // Returns true iff any header 'key' exists with non-empty value.
622 bool HasNonEmptyHeader(absl::string_view key) const override;
623
624 const_header_lines_iterator GetHeaderPosition(absl::string_view key) const;
625
626 // Removes all headers in given set |keys| at once efficiently. Keys
627 // are case insensitive.
628 //
629 // Alternatives considered:
630 //
631 // 1. Use string_hash_set<>, the caller (such as ClearHopByHopHeaders) lower
632 // cases the keys and RemoveAllOfHeaderInList just does lookup. This according
633 // to microbenchmark gives the best performance because it does not require
634 // an extra copy of the hash table. However, it is not taken because of the
635 // possible risk that caller could forget to lowercase the keys.
636 //
637 // 2. Use flat_hash_set<StringPiece, StringPieceCaseHash,StringPieceCaseEqual>
638 // or string_hash_set<StringPieceCaseHash, StringPieceCaseEqual>. Both appear
639 // to have (much) worse performance with WithoutDupToken and LongHeader case
640 // in microbenchmark.
641 void RemoveAllOfHeaderInList(const HeaderTokenList& keys) override;
642
643 void RemoveAllOfHeader(absl::string_view key) override;
644
645 // Removes all headers starting with 'key' [case insensitive]
646 void RemoveAllHeadersWithPrefix(absl::string_view prefix) override;
647
648 // Returns true if we have at least one header with given prefix
649 // [case insensitive]. Currently for test use only.
650 bool HasHeadersWithPrefix(absl::string_view prefix) const override;
651
652 // Returns the key value pairs for all headers where the header key begins
653 // with the specified prefix.
654 void GetAllOfHeaderWithPrefix(
655 absl::string_view prefix,
656 std::vector<std::pair<absl::string_view, absl::string_view>>* out)
657 const override;
658
659 void GetAllHeadersWithLimit(
660 std::vector<std::pair<absl::string_view, absl::string_view>>* out,
661 int limit) const override;
662
663 // Removes all values equal to a given value from header lines with given key.
664 // All string operations done here are case-sensitive.
665 // If a header line has only values matching the given value, the entire
666 // line is removed.
667 // If the given value is found in a multi-value header line mixed with other
668 // values, the line is edited in-place to remove the values.
669 // Returns the number of occurrences of value that were removed.
670 // This method runs in linear time.
671 size_t RemoveValue(absl::string_view key, absl::string_view value);
672
673 // Returns the upper bound on the required buffer space to fully write out
674 // the header object (this include the first line, all header lines, and the
675 // final line separator that marks the ending of the header).
676 size_t GetSizeForWriteBuffer() const override;
677
678 // Indicates if to serialize headers with lower-case header keys.
679 enum class CaseOption { kNoModification, kLowercase, kPropercase };
680
681 // Indicates if to coalesce headers with multiple values to match Envoy/GFE3.
682 enum class CoalesceOption { kNoCoalesce, kCoalesce };
683
684 // The following WriteHeader* methods are template member functions that
685 // place one requirement on the Buffer class: it must implement a Write
686 // method that takes a pointer and a length. The buffer passed in is not
687 // required to be stretchable. For non-stretchable buffers, the user must
688 // call GetSizeForWriteBuffer() to find out the upper bound on the output
689 // buffer space required to make sure that the entire header is serialized.
690 // BalsaHeaders will not check that there is adequate space in the buffer
691 // object during the write.
692
693 // Writes the entire header and the final line separator that marks the end
694 // of the HTTP header section to the buffer. After this method returns, no
695 // more header data should be written to the buffer.
696 template <typename Buffer>
WriteHeaderAndEndingToBuffer(Buffer * buffer,CaseOption case_option,CoalesceOption coalesce_option)697 void WriteHeaderAndEndingToBuffer(Buffer* buffer, CaseOption case_option,
698 CoalesceOption coalesce_option) const {
699 WriteToBuffer(buffer, case_option, coalesce_option);
700 WriteHeaderEndingToBuffer(buffer);
701 }
702
703 template <typename Buffer>
WriteHeaderAndEndingToBuffer(Buffer * buffer)704 void WriteHeaderAndEndingToBuffer(Buffer* buffer) const {
705 WriteHeaderAndEndingToBuffer(buffer, CaseOption::kNoModification,
706 CoalesceOption::kNoCoalesce);
707 }
708
709 // Writes the final line separator to the buffer to terminate the HTTP header
710 // section. After this method returns, no more header data should be written
711 // to the buffer.
712 template <typename Buffer>
WriteHeaderEndingToBuffer(Buffer * buffer)713 static void WriteHeaderEndingToBuffer(Buffer* buffer) {
714 buffer->WriteString("\r\n");
715 }
716
717 // Writes the entire header to the buffer without the line separator that
718 // terminates the HTTP header. This lets users append additional header lines
719 // using WriteHeaderLineToBuffer and then terminate the header with
720 // WriteHeaderEndingToBuffer as the header is serialized to the buffer,
721 // without having to first copy the header.
722 template <typename Buffer>
723 void WriteToBuffer(Buffer* buffer, CaseOption case_option,
724 CoalesceOption coalesce_option) const;
725
726 template <typename Buffer>
WriteToBuffer(Buffer * buffer)727 void WriteToBuffer(Buffer* buffer) const {
728 WriteToBuffer(buffer, CaseOption::kNoModification,
729 CoalesceOption::kNoCoalesce);
730 }
731
732 // Used by WriteToBuffer to coalesce multiple values of headers listed in
733 // |multivalued_headers| into a single comma-separated value. Public for test.
734 template <typename Buffer>
735 void WriteToBufferCoalescingMultivaluedHeaders(
736 Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers,
737 CaseOption case_option) const;
738
739 // Populates |multivalues| with values of |header_lines_| with keys present
740 // in |multivalued_headers| set.
741 void GetValuesOfMultivaluedHeaders(
742 const MultivaluedHeadersSet& multivalued_headers,
743 MultivaluedHeadersValuesMap* multivalues) const;
744
ToPropercase(absl::string_view header)745 static std::string ToPropercase(absl::string_view header) {
746 std::string copy = std::string(header);
747 bool should_uppercase = true;
748 for (char& c : copy) {
749 if (!absl::ascii_isalnum(c)) {
750 should_uppercase = true;
751 } else if (should_uppercase) {
752 c = absl::ascii_toupper(c);
753 should_uppercase = false;
754 } else {
755 c = absl::ascii_tolower(c);
756 }
757 }
758 return copy;
759 }
760
761 template <typename Buffer>
WriteHeaderKeyToBuffer(Buffer * buffer,absl::string_view key,CaseOption case_option)762 void WriteHeaderKeyToBuffer(Buffer* buffer, absl::string_view key,
763 CaseOption case_option) const {
764 if (case_option == CaseOption::kLowercase) {
765 buffer->WriteString(absl::AsciiStrToLower(key));
766 } else if (case_option == CaseOption::kPropercase) {
767 const auto& header_set = quiche::GetStandardHeaderSet();
768 auto it = header_set.find(key);
769 if (it != header_set.end()) {
770 buffer->WriteString(*it);
771 } else {
772 buffer->WriteString(ToPropercase(key));
773 }
774 } else {
775 buffer->WriteString(key);
776 }
777 }
778
779 // Takes a header line in the form of a key/value pair and append it to the
780 // buffer. This function should be called after WriteToBuffer to
781 // append additional header lines to the header without copying the header.
782 // When the user is done with appending to the buffer,
783 // WriteHeaderEndingToBuffer must be used to terminate the HTTP
784 // header in the buffer. This method is a no-op if key is empty.
785 template <typename Buffer>
WriteHeaderLineToBuffer(Buffer * buffer,absl::string_view key,absl::string_view value,CaseOption case_option)786 void WriteHeaderLineToBuffer(Buffer* buffer, absl::string_view key,
787 absl::string_view value,
788 CaseOption case_option) const {
789 // If the key is empty, we don't want to write the rest because it
790 // will not be a well-formed header line.
791 if (!key.empty()) {
792 WriteHeaderKeyToBuffer(buffer, key, case_option);
793 buffer->WriteString(": ");
794 buffer->WriteString(value);
795 buffer->WriteString("\r\n");
796 }
797 }
798
799 // Takes a header line in the form of a key and vector of values and appends
800 // it to the buffer. This function should be called after WriteToBuffer to
801 // append additional header lines to the header without copying the header.
802 // When the user is done with appending to the buffer,
803 // WriteHeaderEndingToBuffer must be used to terminate the HTTP
804 // header in the buffer. This method is a no-op if the |key| is empty.
805 template <typename Buffer>
WriteHeaderLineValuesToBuffer(Buffer * buffer,absl::string_view key,const std::vector<absl::string_view> & values,CaseOption case_option)806 void WriteHeaderLineValuesToBuffer(
807 Buffer* buffer, absl::string_view key,
808 const std::vector<absl::string_view>& values,
809 CaseOption case_option) const {
810 // If the key is empty, we don't want to write the rest because it
811 // will not be a well-formed header line.
812 if (!key.empty()) {
813 WriteHeaderKeyToBuffer(buffer, key, case_option);
814 buffer->WriteString(": ");
815 for (auto it = values.begin();;) {
816 buffer->WriteString(*it);
817 if (++it == values.end()) {
818 break;
819 }
820 buffer->WriteString(",");
821 }
822 buffer->WriteString("\r\n");
823 }
824 }
825
826 // Dump the textural representation of the header object to a string, which
827 // is suitable for writing out to logs. All CRLF will be printed out as \n.
828 // This function can be called on a header object in any state. Raw header
829 // data will be printed out if the header object is not completely parsed,
830 // e.g., when there was an error in the middle of parsing.
831 // The header content is appended to the string; the original content is not
832 // cleared.
833 // If used in test cases, WillNotWriteFromFramer() may be of interest.
834 void DumpToString(std::string* str) const;
835 std::string DebugString() const override;
836
837 bool ForEachHeader(std::function<bool(const absl::string_view key,
838 const absl::string_view value)>
839 fn) const override;
840
841 void DumpToPrefixedString(const char* spaces, std::string* str) const;
842
first_line()843 absl::string_view first_line() const {
844 QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_1_idx_);
845 return whitespace_4_idx_ == non_whitespace_1_idx_
846 ? ""
847 : absl::string_view(
848 BeginningOfFirstLine() + non_whitespace_1_idx_,
849 whitespace_4_idx_ - non_whitespace_1_idx_);
850 }
first_line_of_request()851 std::string first_line_of_request() const override {
852 return std::string(first_line());
853 }
854
855 // Returns the parsed value of the response code if it has been parsed.
856 // Guaranteed to return 0 when unparsed (though it is a much better idea to
857 // verify that the BalsaFrame had no errors while parsing).
858 // This may return response codes which are outside the normal bounds of
859 // HTTP response codes-- it is up to the user of this class to ensure that
860 // the response code is one which is interpretable.
parsed_response_code()861 size_t parsed_response_code() const override { return parsed_response_code_; }
862
request_method()863 absl::string_view request_method() const override {
864 QUICHE_DCHECK_GE(whitespace_2_idx_, non_whitespace_1_idx_);
865 return whitespace_2_idx_ == non_whitespace_1_idx_
866 ? ""
867 : absl::string_view(
868 BeginningOfFirstLine() + non_whitespace_1_idx_,
869 whitespace_2_idx_ - non_whitespace_1_idx_);
870 }
871
response_version()872 absl::string_view response_version() const override {
873 // Note: There is no difference between request_method() and
874 // response_version(). They both could be called
875 // GetFirstTokenFromFirstline()... but that wouldn't be anywhere near as
876 // descriptive.
877 return request_method();
878 }
879
request_uri()880 absl::string_view request_uri() const override {
881 QUICHE_DCHECK_GE(whitespace_3_idx_, non_whitespace_2_idx_);
882 return whitespace_3_idx_ == non_whitespace_2_idx_
883 ? ""
884 : absl::string_view(
885 BeginningOfFirstLine() + non_whitespace_2_idx_,
886 whitespace_3_idx_ - non_whitespace_2_idx_);
887 }
888
response_code()889 absl::string_view response_code() const override {
890 // Note: There is no difference between request_uri() and response_code().
891 // They both could be called GetSecondtTokenFromFirstline(), but, as noted
892 // in an earlier comment, that wouldn't be as descriptive.
893 return request_uri();
894 }
895
request_version()896 absl::string_view request_version() const override {
897 QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_3_idx_);
898 return whitespace_4_idx_ == non_whitespace_3_idx_
899 ? ""
900 : absl::string_view(
901 BeginningOfFirstLine() + non_whitespace_3_idx_,
902 whitespace_4_idx_ - non_whitespace_3_idx_);
903 }
904
response_reason_phrase()905 absl::string_view response_reason_phrase() const override {
906 // Note: There is no difference between request_version() and
907 // response_reason_phrase(). They both could be called
908 // GetThirdTokenFromFirstline(), but, as noted in an earlier comment, that
909 // wouldn't be as descriptive.
910 return request_version();
911 }
912
SetRequestFirstlineFromStringPieces(absl::string_view method,absl::string_view uri,absl::string_view version)913 void SetRequestFirstlineFromStringPieces(absl::string_view method,
914 absl::string_view uri,
915 absl::string_view version) {
916 SetFirstlineFromStringPieces(method, uri, version);
917 }
918
919 void SetResponseFirstline(absl::string_view version,
920 size_t parsed_response_code,
921 absl::string_view reason_phrase);
922
923 // These functions are exactly the same, except that their names are
924 // different. This is done so that the code using this class is more
925 // expressive.
926 void SetRequestMethod(absl::string_view method) override;
927 void SetResponseVersion(absl::string_view version) override;
928
929 void SetRequestUri(absl::string_view uri) override;
930 void SetResponseCode(absl::string_view code) override;
set_parsed_response_code(size_t parsed_response_code)931 void set_parsed_response_code(size_t parsed_response_code) {
932 parsed_response_code_ = parsed_response_code;
933 }
934 void SetParsedResponseCodeAndUpdateFirstline(
935 size_t parsed_response_code) override;
936
937 // These functions are exactly the same, except that their names are
938 // different. This is done so that the code using this class is more
939 // expressive.
940 void SetRequestVersion(absl::string_view version) override;
941 void SetResponseReasonPhrase(absl::string_view reason_phrase) override;
942
943 // Simple accessors to some of the internal state
transfer_encoding_is_chunked()944 bool transfer_encoding_is_chunked() const {
945 return transfer_encoding_is_chunked_;
946 }
947
ResponseCodeImpliesNoBody(size_t code)948 static bool ResponseCodeImpliesNoBody(size_t code) {
949 // From HTTP spec section 6.1.1 all 1xx responses must not have a body,
950 // as well as 204 No Content and 304 Not Modified.
951 return ((code >= 100) && (code <= 199)) || (code == 204) || (code == 304);
952 }
953
954 // Note: never check this for requests. Nothing bad will happen if you do,
955 // but spec does not allow requests framed by connection close.
956 // TODO(vitaliyl): refactor.
is_framed_by_connection_close()957 bool is_framed_by_connection_close() const {
958 // We declare that response is framed by connection close if it has no
959 // content-length, no transfer encoding, and is allowed to have a body by
960 // the HTTP spec.
961 // parsed_response_code_ is 0 for requests, so ResponseCodeImpliesNoBody
962 // will return false.
963 return (content_length_status_ == BalsaHeadersEnums::NO_CONTENT_LENGTH) &&
964 !transfer_encoding_is_chunked_ &&
965 !ResponseCodeImpliesNoBody(parsed_response_code_);
966 }
967
content_length()968 size_t content_length() const override { return content_length_; }
content_length_status()969 BalsaHeadersEnums::ContentLengthStatus content_length_status() const {
970 return content_length_status_;
971 }
content_length_valid()972 bool content_length_valid() const override {
973 return content_length_status_ == BalsaHeadersEnums::VALID_CONTENT_LENGTH;
974 }
975
976 // SetContentLength, SetTransferEncodingToChunkedAndClearContentLength, and
977 // SetNoTransferEncoding modifies the header object to use
978 // content-length and transfer-encoding headers in a consistent
979 // manner. They set all internal flags and status so client can get
980 // a consistent view from various accessors.
981 void SetContentLength(size_t length) override;
982 // Sets transfer-encoding to chunked and updates internal state.
983 void SetTransferEncodingToChunkedAndClearContentLength() override;
984 // Removes transfer-encoding headers and updates internal state.
985 void SetNoTransferEncoding() override;
986
987 // If you have a response that needs framing by connection close, use this
988 // method instead of RemoveAllOfHeader("Content-Length"). Has no effect if
989 // transfer_encoding_is_chunked().
990 void ClearContentLength();
991
992 // This should be called if balsa headers are created entirely manually (not
993 // by any of the framer classes) to make sure that function calls like
994 // DumpToString will work correctly.
WillNotWriteFromFramer()995 void WillNotWriteFromFramer() {
996 balsa_buffer_.NoMoreWriteToContiguousBuffer();
997 }
998
999 // True if DoneWritingFromFramer or WillNotWriteFromFramer is called.
FramerIsDoneWriting()1000 bool FramerIsDoneWriting() const {
1001 return !balsa_buffer_.can_write_to_contiguous_buffer();
1002 }
1003
1004 bool IsEmpty() const override;
1005
1006 // From HeaderApi and ConstHeaderApi.
1007 absl::string_view Authority() const override;
1008 void ReplaceOrAppendAuthority(absl::string_view value) override;
1009 void RemoveAuthority() override;
1010 void ApplyToCookie(
1011 std::function<void(absl::string_view cookie)> f) const override;
1012
set_enforce_header_policy(bool enforce)1013 void set_enforce_header_policy(bool enforce) override {
1014 enforce_header_policy_ = enforce;
1015 }
1016
1017 // Removes the last token from the header value. In the presence of multiple
1018 // header lines with given key, will remove the last token of the last line.
1019 // Can be useful if the last encoding has to be removed.
1020 void RemoveLastTokenFromHeaderValue(absl::string_view key);
1021
1022 // Gets the list of names of headers that are multivalued in Envoy.
1023 static const MultivaluedHeadersSet& multivalued_envoy_headers();
1024
1025 // Returns true if HTTP responses with this response code have bodies.
1026 static bool ResponseCanHaveBody(int response_code);
1027
1028 // Given a pointer to the beginning and the end of the header value
1029 // in some buffer, populates tokens list with beginning and end indices
1030 // of all tokens present in the value string.
1031 static void ParseTokenList(absl::string_view header_value,
1032 HeaderTokenList* tokens);
1033
1034 private:
1035 typedef std::vector<HeaderLineDescription> HeaderLines;
1036
1037 class iterator_base;
1038
1039 friend class BalsaFrame;
1040 friend class gfe2::Http2HeaderValidator;
1041 friend class SpdyPayloadFramer;
1042 friend class HTTPMessage;
1043 friend class test::BalsaHeadersTestPeer;
1044
1045 friend bool ParseHTTPFirstLine(const char* begin, const char* end,
1046 bool is_request, BalsaHeaders* headers,
1047 BalsaFrameEnums::ErrorCode* error_code);
1048
1049 // Reverse iterators have been removed for lack of use, refer to
1050 // cl/30618773 in case they are needed.
1051
BeginningOfFirstLine()1052 const char* BeginningOfFirstLine() const {
1053 return GetPtr(firstline_buffer_base_idx_);
1054 }
1055
BeginningOfFirstLine()1056 char* BeginningOfFirstLine() { return GetPtr(firstline_buffer_base_idx_); }
1057
GetPtr(BalsaBuffer::Blocks::size_type block_idx)1058 char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) {
1059 return balsa_buffer_.GetPtr(block_idx);
1060 }
1061
GetPtr(BalsaBuffer::Blocks::size_type block_idx)1062 const char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) const {
1063 return balsa_buffer_.GetPtr(block_idx);
1064 }
1065
WriteFromFramer(const char * ptr,size_t size)1066 void WriteFromFramer(const char* ptr, size_t size) {
1067 balsa_buffer_.WriteToContiguousBuffer(absl::string_view(ptr, size));
1068 }
1069
DoneWritingFromFramer()1070 void DoneWritingFromFramer() {
1071 balsa_buffer_.NoMoreWriteToContiguousBuffer();
1072 }
1073
OriginalHeaderStreamBegin()1074 const char* OriginalHeaderStreamBegin() const {
1075 return balsa_buffer_.StartOfFirstBlock();
1076 }
1077
OriginalHeaderStreamEnd()1078 const char* OriginalHeaderStreamEnd() const {
1079 return balsa_buffer_.EndOfFirstBlock();
1080 }
1081
GetReadableBytesFromHeaderStream()1082 size_t GetReadableBytesFromHeaderStream() const {
1083 return balsa_buffer_.GetReadableBytesOfFirstBlock();
1084 }
1085
GetReadablePtrFromHeaderStream()1086 absl::string_view GetReadablePtrFromHeaderStream() {
1087 return {OriginalHeaderStreamBegin(), GetReadableBytesFromHeaderStream()};
1088 }
1089
1090 absl::string_view GetValueFromHeaderLineDescription(
1091 const HeaderLineDescription& line) const;
1092
1093 void AddAndMakeDescription(absl::string_view key, absl::string_view value,
1094 HeaderLineDescription* d);
1095
1096 void AppendAndMakeDescription(absl::string_view key, absl::string_view value,
1097 HeaderLineDescription* d);
1098
1099 // Removes all header lines with the given key starting at start.
1100 void RemoveAllOfHeaderStartingAt(absl::string_view key,
1101 HeaderLines::iterator start);
1102
1103 HeaderLines::const_iterator GetConstHeaderLinesIterator(
1104 absl::string_view key) const;
1105
1106 HeaderLines::iterator GetHeaderLinesIterator(absl::string_view key,
1107 HeaderLines::iterator start);
1108
1109 HeaderLines::iterator GetHeaderLinesIteratorForLastMultivaluedHeader(
1110 absl::string_view key);
1111
1112 template <typename IteratorType>
1113 const IteratorType HeaderLinesBeginHelper() const;
1114
1115 template <typename IteratorType>
1116 const IteratorType HeaderLinesEndHelper() const;
1117
1118 // Helper function for HeaderHasValue and HeaderHasValueIgnoreCase that
1119 // does most of the work.
1120 bool HeaderHasValueHelper(absl::string_view key, absl::string_view value,
1121 bool case_sensitive) const;
1122
1123 // Called by header removal methods to reset internal values for transfer
1124 // encoding or content length if we're removing the corresponding headers.
1125 void MaybeClearSpecialHeaderValues(absl::string_view key);
1126
1127 void SetFirstlineFromStringPieces(absl::string_view firstline_a,
1128 absl::string_view firstline_b,
1129 absl::string_view firstline_c);
1130 BalsaBuffer balsa_buffer_;
1131
1132 size_t content_length_;
1133 BalsaHeadersEnums::ContentLengthStatus content_length_status_;
1134 size_t parsed_response_code_;
1135 // HTTP firstlines all have the following structure:
1136 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF
1137 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"
1138 // ws1 nws1 ws2 nws2 ws3 nws3 ws4
1139 // | [-------) [-------) [----------------)
1140 // REQ: method request_uri version
1141 // RESP: version statuscode reason
1142 //
1143 // The first NONWS->LWS component we'll call firstline_a.
1144 // The second firstline_b, and the third firstline_c.
1145 //
1146 // firstline_a goes from nws1 to (but not including) ws2
1147 // firstline_b goes from nws2 to (but not including) ws3
1148 // firstline_c goes from nws3 to (but not including) ws4
1149 //
1150 // In the code:
1151 // ws1 == whitespace_1_idx_
1152 // nws1 == non_whitespace_1_idx_
1153 // ws2 == whitespace_2_idx_
1154 // nws2 == non_whitespace_2_idx_
1155 // ws3 == whitespace_3_idx_
1156 // nws3 == non_whitespace_3_idx_
1157 // ws4 == whitespace_4_idx_
1158 BalsaBuffer::Blocks::size_type firstline_buffer_base_idx_;
1159 size_t whitespace_1_idx_;
1160 size_t non_whitespace_1_idx_;
1161 size_t whitespace_2_idx_;
1162 size_t non_whitespace_2_idx_;
1163 size_t whitespace_3_idx_;
1164 size_t non_whitespace_3_idx_;
1165 size_t whitespace_4_idx_;
1166
1167 bool transfer_encoding_is_chunked_;
1168
1169 // If true, QUICHE_BUG if a header that starts with an invalid prefix is
1170 // explicitly set.
1171 bool enforce_header_policy_ = true;
1172
1173 HeaderLines header_lines_;
1174 };
1175
1176 // Base class for iterating the headers in a BalsaHeaders object, returning a
1177 // pair of string_view's for each header.
1178 class QUICHE_EXPORT BalsaHeaders::iterator_base
1179 : public std::iterator<std::forward_iterator_tag,
1180 std::pair<absl::string_view, absl::string_view>> {
1181 public:
iterator_base()1182 iterator_base() : headers_(nullptr), idx_(0) {}
1183
1184 std::pair<absl::string_view, absl::string_view>& operator*() const {
1185 return Lookup(idx_);
1186 }
1187
1188 std::pair<absl::string_view, absl::string_view>* operator->() const {
1189 return &(this->operator*());
1190 }
1191
1192 bool operator==(const BalsaHeaders::iterator_base& it) const {
1193 return idx_ == it.idx_;
1194 }
1195
1196 bool operator<(const BalsaHeaders::iterator_base& it) const {
1197 return idx_ < it.idx_;
1198 }
1199
1200 bool operator<=(const BalsaHeaders::iterator_base& it) const {
1201 return idx_ <= it.idx_;
1202 }
1203
1204 bool operator!=(const BalsaHeaders::iterator_base& it) const {
1205 return !(*this == it);
1206 }
1207
1208 bool operator>(const BalsaHeaders::iterator_base& it) const {
1209 return it < *this;
1210 }
1211
1212 bool operator>=(const BalsaHeaders::iterator_base& it) const {
1213 return it <= *this;
1214 }
1215
1216 // This mainly exists so that we can have interesting output for
1217 // unittesting. The EXPECT_EQ, EXPECT_NE functions require that
1218 // operator<< work for the classes it sees. It would be better if there
1219 // was an additional traits-like system for the gUnit output... but oh
1220 // well.
1221 friend QUICHE_EXPORT std::ostream& operator<<(std::ostream& os,
1222 const iterator_base& it) {
1223 os << "[" << it.headers_ << ", " << it.idx_ << "]";
1224 return os;
1225 }
1226
1227 private:
1228 friend class BalsaHeaders;
1229
iterator_base(const BalsaHeaders * headers,HeaderLines::size_type index)1230 iterator_base(const BalsaHeaders* headers, HeaderLines::size_type index)
1231 : headers_(headers), idx_(index) {}
1232
increment()1233 void increment() {
1234 value_.reset();
1235 const HeaderLines& header_lines = headers_->header_lines_;
1236 const HeaderLines::size_type header_lines_size = header_lines.size();
1237 const HeaderLines::size_type original_idx = idx_;
1238 do {
1239 ++idx_;
1240 } while (idx_ < header_lines_size && header_lines[idx_].skip == true);
1241 // The condition below exists so that ++(end() - 1) == end(), even
1242 // if there are only 'skip == true' elements between the end() iterator
1243 // and the end of the vector of HeaderLineDescriptions.
1244 if (idx_ == header_lines_size) {
1245 idx_ = original_idx + 1;
1246 }
1247 }
1248
Lookup(HeaderLines::size_type index)1249 std::pair<absl::string_view, absl::string_view>& Lookup(
1250 HeaderLines::size_type index) const {
1251 QUICHE_DCHECK_LT(index, headers_->header_lines_.size());
1252 if (!value_.has_value()) {
1253 const HeaderLineDescription& line = headers_->header_lines_[index];
1254 const char* stream_begin = headers_->GetPtr(line.buffer_base_idx);
1255 value_ =
1256 std::make_pair(absl::string_view(stream_begin + line.first_char_idx,
1257 line.KeyLength()),
1258 absl::string_view(stream_begin + line.value_begin_idx,
1259 line.ValuesLength()));
1260 }
1261 return value_.value();
1262 }
1263
1264 const BalsaHeaders* headers_;
1265 HeaderLines::size_type idx_;
1266 mutable absl::optional<std::pair<absl::string_view, absl::string_view>>
1267 value_;
1268 };
1269
1270 // A const iterator for all the header lines.
1271 class QUICHE_EXPORT BalsaHeaders::const_header_lines_iterator
1272 : public BalsaHeaders::iterator_base {
1273 public:
const_header_lines_iterator()1274 const_header_lines_iterator() : iterator_base() {}
1275
1276 const_header_lines_iterator& operator++() {
1277 iterator_base::increment();
1278 return *this;
1279 }
1280
1281 private:
1282 friend class BalsaHeaders;
1283
const_header_lines_iterator(const BalsaHeaders * headers,HeaderLines::size_type index)1284 const_header_lines_iterator(const BalsaHeaders* headers,
1285 HeaderLines::size_type index)
1286 : iterator_base(headers, index) {}
1287 };
1288
1289 // A const iterator that stops only on header lines for a particular key.
1290 class QUICHE_EXPORT BalsaHeaders::const_header_lines_key_iterator
1291 : public BalsaHeaders::iterator_base {
1292 public:
1293 const_header_lines_key_iterator& operator++() {
1294 do {
1295 iterator_base::increment();
1296 } while (!AtEnd() && !absl::EqualsIgnoreCase(key_, (**this).first));
1297 return *this;
1298 }
1299
1300 // Only forward-iteration makes sense, so no operator-- defined.
1301
1302 private:
1303 friend class BalsaHeaders;
1304
const_header_lines_key_iterator(const BalsaHeaders * headers,HeaderLines::size_type index,absl::string_view key)1305 const_header_lines_key_iterator(const BalsaHeaders* headers,
1306 HeaderLines::size_type index,
1307 absl::string_view key)
1308 : iterator_base(headers, index), key_(key) {}
1309
1310 // Should only be used for creating an end iterator.
const_header_lines_key_iterator(const BalsaHeaders * headers,HeaderLines::size_type index)1311 const_header_lines_key_iterator(const BalsaHeaders* headers,
1312 HeaderLines::size_type index)
1313 : iterator_base(headers, index) {}
1314
AtEnd()1315 bool AtEnd() const { return *this >= headers_->lines().end(); }
1316
1317 absl::string_view key_;
1318 };
1319
1320 inline BalsaHeaders::iterator_range<BalsaHeaders::const_header_lines_iterator>
lines()1321 BalsaHeaders::lines() const {
1322 return {HeaderLinesBeginHelper<const_header_lines_iterator>(),
1323 HeaderLinesEndHelper<const_header_lines_iterator>()};
1324 }
1325
1326 inline BalsaHeaders::iterator_range<
1327 BalsaHeaders::const_header_lines_key_iterator>
lines(absl::string_view key)1328 BalsaHeaders::lines(absl::string_view key) const {
1329 return {GetIteratorForKey(key), header_lines_key_end()};
1330 }
1331
1332 inline BalsaHeaders::const_header_lines_key_iterator
header_lines_key_end()1333 BalsaHeaders::header_lines_key_end() const {
1334 return HeaderLinesEndHelper<const_header_lines_key_iterator>();
1335 }
1336
erase(const const_header_lines_iterator & it)1337 inline void BalsaHeaders::erase(const const_header_lines_iterator& it) {
1338 QUICHE_DCHECK_EQ(it.headers_, this);
1339 QUICHE_DCHECK_LT(it.idx_, header_lines_.size());
1340 header_lines_[it.idx_].skip = true;
1341 }
1342
1343 template <typename Buffer>
WriteToBuffer(Buffer * buffer,CaseOption case_option,CoalesceOption coalesce_option)1344 void BalsaHeaders::WriteToBuffer(Buffer* buffer, CaseOption case_option,
1345 CoalesceOption coalesce_option) const {
1346 // write the first line.
1347 const absl::string_view firstline = first_line();
1348 if (!firstline.empty()) {
1349 buffer->WriteString(firstline);
1350 }
1351 buffer->WriteString("\r\n");
1352 if (coalesce_option != CoalesceOption::kCoalesce) {
1353 const HeaderLines::size_type end = header_lines_.size();
1354 for (HeaderLines::size_type i = 0; i < end; ++i) {
1355 const HeaderLineDescription& line = header_lines_[i];
1356 if (line.skip) {
1357 continue;
1358 }
1359 const char* line_ptr = GetPtr(line.buffer_base_idx);
1360 WriteHeaderLineToBuffer(
1361 buffer,
1362 absl::string_view(line_ptr + line.first_char_idx, line.KeyLength()),
1363 absl::string_view(line_ptr + line.value_begin_idx,
1364 line.ValuesLength()),
1365 case_option);
1366 }
1367 } else {
1368 WriteToBufferCoalescingMultivaluedHeaders(
1369 buffer, multivalued_envoy_headers(), case_option);
1370 }
1371 }
1372
GetValuesOfMultivaluedHeaders(const MultivaluedHeadersSet & multivalued_headers,MultivaluedHeadersValuesMap * multivalues)1373 inline void BalsaHeaders::GetValuesOfMultivaluedHeaders(
1374 const MultivaluedHeadersSet& multivalued_headers,
1375 MultivaluedHeadersValuesMap* multivalues) const {
1376 multivalues->reserve(header_lines_.capacity());
1377
1378 // Find lines that need to be coalesced and store them in |multivalues|.
1379 for (const auto& line : header_lines_) {
1380 if (line.skip) {
1381 continue;
1382 }
1383 const char* line_ptr = GetPtr(line.buffer_base_idx);
1384 absl::string_view header_key =
1385 absl::string_view(line_ptr + line.first_char_idx, line.KeyLength());
1386 // If this is multivalued header, it may need to be coalesced.
1387 if (multivalued_headers.contains(header_key)) {
1388 absl::string_view header_value = absl::string_view(
1389 line_ptr + line.value_begin_idx, line.ValuesLength());
1390 // Add |header_value| to the vector of values for this |header_key|,
1391 // therefore preserving the order of values for the same key.
1392 (*multivalues)[header_key].push_back(header_value);
1393 }
1394 }
1395 }
1396
1397 template <typename Buffer>
WriteToBufferCoalescingMultivaluedHeaders(Buffer * buffer,const MultivaluedHeadersSet & multivalued_headers,CaseOption case_option)1398 void BalsaHeaders::WriteToBufferCoalescingMultivaluedHeaders(
1399 Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers,
1400 CaseOption case_option) const {
1401 MultivaluedHeadersValuesMap multivalues;
1402 GetValuesOfMultivaluedHeaders(multivalued_headers, &multivalues);
1403
1404 // Write out header lines while coalescing those that need to be coalesced.
1405 for (const auto& line : header_lines_) {
1406 if (line.skip) {
1407 continue;
1408 }
1409 const char* line_ptr = GetPtr(line.buffer_base_idx);
1410 absl::string_view header_key =
1411 absl::string_view(line_ptr + line.first_char_idx, line.KeyLength());
1412 auto header_multivalue = multivalues.find(header_key);
1413 // If current line doesn't need to be coalesced (as it is either not
1414 // multivalue, or has just a single value so it equals to current line),
1415 // then just write it out.
1416 if (header_multivalue == multivalues.end() ||
1417 header_multivalue->second.size() == 1) {
1418 WriteHeaderLineToBuffer(buffer, header_key,
1419 absl::string_view(line_ptr + line.value_begin_idx,
1420 line.ValuesLength()),
1421 case_option);
1422 } else {
1423 // If this line needs to be coalesced, then write all its values and clear
1424 // them, so the subsequent same header keys will not be written.
1425 if (!header_multivalue->second.empty()) {
1426 WriteHeaderLineValuesToBuffer(buffer, header_key,
1427 header_multivalue->second, case_option);
1428 // Clear the multivalue list as it is already written out, so subsequent
1429 // same header keys will not be written.
1430 header_multivalue->second.clear();
1431 }
1432 }
1433 }
1434 }
1435
1436 template <typename IteratorType>
HeaderLinesBeginHelper()1437 const IteratorType BalsaHeaders::HeaderLinesBeginHelper() const {
1438 if (header_lines_.empty()) {
1439 return IteratorType(this, 0);
1440 }
1441 const HeaderLines::size_type header_lines_size = header_lines_.size();
1442 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
1443 if (header_lines_[i].skip == false) {
1444 return IteratorType(this, i);
1445 }
1446 }
1447 return IteratorType(this, 0);
1448 }
1449
1450 template <typename IteratorType>
HeaderLinesEndHelper()1451 const IteratorType BalsaHeaders::HeaderLinesEndHelper() const {
1452 if (header_lines_.empty()) {
1453 return IteratorType(this, 0);
1454 }
1455 const HeaderLines::size_type header_lines_size = header_lines_.size();
1456 HeaderLines::size_type i = header_lines_size;
1457 do {
1458 --i;
1459 if (header_lines_[i].skip == false) {
1460 return IteratorType(this, i + 1);
1461 }
1462 } while (i != 0);
1463 return IteratorType(this, 0);
1464 }
1465
1466 } // namespace quiche
1467
1468 #endif // QUICHE_BALSA_BALSA_HEADERS_H_
1469