1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // A lightweight implementation for storing HTTP headers.
6 
7 #ifndef QUICHE_BALSA_BALSA_HEADERS_H_
8 #define QUICHE_BALSA_BALSA_HEADERS_H_
9 
10 #include <cstddef>
11 #include <cstring>
12 #include <functional>
13 #include <iterator>
14 #include <memory>
15 #include <ostream>
16 #include <string>
17 #include <utility>
18 #include <vector>
19 
20 #include "absl/container/flat_hash_map.h"
21 #include "absl/container/flat_hash_set.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/ascii.h"
24 #include "absl/strings/match.h"
25 #include "absl/strings/string_view.h"
26 #include "absl/types/optional.h"
27 #include "quiche/balsa/balsa_enums.h"
28 #include "quiche/balsa/header_api.h"
29 #include "quiche/balsa/standard_header_map.h"
30 #include "quiche/common/platform/api/quiche_bug_tracker.h"
31 #include "quiche/common/platform/api/quiche_export.h"
32 #include "quiche/common/platform/api/quiche_logging.h"
33 
34 namespace gfe2 {
35 class Http2HeaderValidator;
36 }  // namespace gfe2
37 
38 namespace quiche {
39 
40 namespace test {
41 class BalsaHeadersTestPeer;
42 }  // namespace test
43 
44 // WARNING:
45 // Note that -no- char* returned by any function in this
46 // file is null-terminated.
47 
48 // This class exists to service the specific needs of BalsaHeaders.
49 //
50 // Functional goals:
51 //   1) provide a backing-store for all of the StringPieces that BalsaHeaders
52 //      returns. Every StringPiece returned from BalsaHeaders should remain
53 //      valid until the BalsaHeader's object is cleared, or the header-line is
54 //      erased.
55 //   2) provide a backing-store for BalsaFrame, which requires contiguous memory
56 //      for its fast-path parsing functions. Note that the cost of copying is
57 //      less than the cost of requiring the parser to do slow-path parsing, as
58 //      it would have to check for bounds every byte, instead of every 16 bytes.
59 //
60 // This class is optimized for the case where headers are stored in one of two
61 // buffers. It doesn't make a lot of effort to densely pack memory-- in fact,
62 // it -may- be somewhat memory inefficient. This possible inefficiency allows a
63 // certain simplicity of implementation and speed which makes it worthwhile.
64 // If, in the future, better memory density is required, it should be possible
65 // to reuse the abstraction presented by this object to achieve those goals.
66 //
67 // In the most common use-case, this memory inefficiency should be relatively
68 // small.
69 //
70 // Alternate implementations of BalsaBuffer may include:
71 //  - vector of strings, one per header line (similar to HTTPHeaders)
72 //  - densely packed strings:
73 //    - keep a sorted array/map of free-space linked lists or numbers.
74 //      - use the entry that most closely first your needs.
75 //    - at this point, perhaps just use a vector of strings, and let
76 //      the allocator do the right thing.
77 //
78 class QUICHE_EXPORT BalsaBuffer {
79  public:
80   static constexpr size_t kDefaultBlocksize = 4096;
81 
82   // The BufferBlock is a structure used internally by the
83   // BalsaBuffer class to store the base buffer pointers to
84   // each block, as well as the important metadata for buffer
85   // sizes and bytes free. It *may* be possible to replace this
86   // with a vector<char>, but it's unclear whether moving a vector
87   // can invalidate pointers into it. LWG issue 2321 proposes to fix this.
88   struct QUICHE_EXPORT BufferBlock {
89    public:
90     std::unique_ptr<char[]> buffer;
91     size_t buffer_size = 0;
92     size_t bytes_free = 0;
93 
bytes_usedBufferBlock94     size_t bytes_used() const { return buffer_size - bytes_free; }
start_of_unused_bytesBufferBlock95     char* start_of_unused_bytes() const { return buffer.get() + bytes_used(); }
96 
BufferBlockBufferBlock97     BufferBlock() {}
98 
BufferBlockBufferBlock99     BufferBlock(std::unique_ptr<char[]> buf, size_t size, size_t free)
100         : buffer(std::move(buf)), buffer_size(size), bytes_free(free) {}
101 
102     BufferBlock(const BufferBlock&) = delete;
103     BufferBlock& operator=(const BufferBlock&) = delete;
104     BufferBlock(BufferBlock&&) = default;
105     BufferBlock& operator=(BufferBlock&&) = default;
106 
107     // Note: allocating a fresh buffer even if we could reuse an old one may let
108     // us shed memory, and invalidates old StringPieces (making them easier to
109     // catch with asan).
CopyFromBufferBlock110     void CopyFrom(const BufferBlock& rhs) {
111       QUICHE_DCHECK(this != &rhs);
112       buffer_size = rhs.buffer_size;
113       bytes_free = rhs.bytes_free;
114       if (rhs.buffer == nullptr) {
115         buffer = nullptr;
116       } else {
117         buffer = std::make_unique<char[]>(buffer_size);
118         memcpy(buffer.get(), rhs.buffer.get(), rhs.bytes_used());
119       }
120     }
121   };
122 
123   typedef std::vector<BufferBlock> Blocks;
124 
BalsaBuffer()125   BalsaBuffer()
126       : blocksize_(kDefaultBlocksize), can_write_to_contiguous_buffer_(true) {}
127 
BalsaBuffer(size_t blocksize)128   explicit BalsaBuffer(size_t blocksize)
129       : blocksize_(blocksize), can_write_to_contiguous_buffer_(true) {}
130 
131   BalsaBuffer(const BalsaBuffer&) = delete;
132   BalsaBuffer& operator=(const BalsaBuffer&) = delete;
133   BalsaBuffer(BalsaBuffer&&) = default;
134   BalsaBuffer& operator=(BalsaBuffer&&) = default;
135 
136   // Returns the total amount of memory reserved by the buffer blocks.
GetTotalBufferBlockSize()137   size_t GetTotalBufferBlockSize() const {
138     size_t buffer_size = 0;
139     for (Blocks::const_iterator iter = blocks_.begin(); iter != blocks_.end();
140          ++iter) {
141       buffer_size += iter->buffer_size;
142     }
143     return buffer_size;
144   }
145 
146   // Returns the total amount of memory used by the buffer blocks.
GetTotalBytesUsed()147   size_t GetTotalBytesUsed() const {
148     size_t bytes_used = 0;
149     for (const auto& b : blocks_) {
150       bytes_used += b.bytes_used();
151     }
152     return bytes_used;
153   }
154 
GetPtr(Blocks::size_type block_idx)155   const char* GetPtr(Blocks::size_type block_idx) const {
156     QUICHE_DCHECK_LT(block_idx, blocks_.size())
157         << block_idx << ", " << blocks_.size();
158     return block_idx >= blocks_.size() ? nullptr
159                                        : blocks_[block_idx].buffer.get();
160   }
161 
GetPtr(Blocks::size_type block_idx)162   char* GetPtr(Blocks::size_type block_idx) {
163     QUICHE_DCHECK_LT(block_idx, blocks_.size())
164         << block_idx << ", " << blocks_.size();
165     return block_idx >= blocks_.size() ? nullptr
166                                        : blocks_[block_idx].buffer.get();
167   }
168 
169   // This function is different from Reserve(), as it ensures that the data
170   // stored via subsequent calls to this function are all contiguous (and in
171   // the order in which these writes happened). This is essentially the same
172   // as a string append.
173   //
174   // You may call this function at any time between object
175   // construction/Clear(), and the calling of the
176   // NoMoreWriteToContiguousBuffer() function.
177   //
178   // You must not call this function after the NoMoreWriteToContiguousBuffer()
179   // function is called, unless a Clear() has been called since.
180   // If you do, the program will abort().
181   //
182   // This condition is placed upon this code so that calls to Reserve() can
183   // append to the buffer in the first block safely, and without invaliding
184   // the StringPiece which it returns.
185   //
186   // This function's main intended user is the BalsaFrame class, which,
187   // for reasons of efficiency, requires that the buffer from which it parses
188   // the headers be contiguous.
189   //
WriteToContiguousBuffer(absl::string_view sp)190   void WriteToContiguousBuffer(absl::string_view sp) {
191     if (sp.empty()) {
192       return;
193     }
194     QUICHE_CHECK(can_write_to_contiguous_buffer_);
195 
196     if (blocks_.empty()) {
197       blocks_.push_back(AllocBlock());
198     }
199 
200     QUICHE_DCHECK_GE(blocks_.size(), 1u);
201     if (blocks_[0].buffer == nullptr && sp.size() <= blocksize_) {
202       blocks_[0] = AllocBlock();
203       memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size());
204     } else if (blocks_[0].bytes_free < sp.size()) {
205       // the first block isn't big enough, resize it.
206       const size_t old_storage_size_used = blocks_[0].bytes_used();
207       // Increase to at least 2*old_storage_size_used; if sp.size() is larger,
208       // we'll increase by that amount.
209       const size_t new_storage_size =
210           old_storage_size_used + (old_storage_size_used < sp.size()
211                                        ? sp.size()
212                                        : old_storage_size_used);
213       std::unique_ptr<char[]> new_storage{new char[new_storage_size]};
214       char* old_storage = blocks_[0].buffer.get();
215       if (old_storage_size_used != 0u) {
216         memcpy(new_storage.get(), old_storage, old_storage_size_used);
217       }
218       memcpy(new_storage.get() + old_storage_size_used, sp.data(), sp.size());
219       blocks_[0].buffer = std::move(new_storage);
220       blocks_[0].bytes_free = new_storage_size - old_storage_size_used;
221       blocks_[0].buffer_size = new_storage_size;
222     } else {
223       memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size());
224     }
225     blocks_[0].bytes_free -= sp.size();
226   }
227 
NoMoreWriteToContiguousBuffer()228   void NoMoreWriteToContiguousBuffer() {
229     can_write_to_contiguous_buffer_ = false;
230   }
231 
232   // Reserves "permanent" storage of the size indicated. Returns a pointer to
233   // the beginning of that storage, and assigns the index of the block used to
234   // block_buffer_idx. This function uses the first block IFF the
235   // NoMoreWriteToContiguousBuffer function has been called since the last
236   // Clear/Construction.
Reserve(size_t size,Blocks::size_type * block_buffer_idx)237   char* Reserve(size_t size, Blocks::size_type* block_buffer_idx) {
238     if (blocks_.empty()) {
239       blocks_.push_back(AllocBlock());
240     }
241 
242     // There should always be a 'first_block', even if it
243     // contains nothing.
244     QUICHE_DCHECK_GE(blocks_.size(), 1u);
245     BufferBlock* block = nullptr;
246     Blocks::size_type block_idx = can_write_to_contiguous_buffer_ ? 1 : 0;
247     for (; block_idx < blocks_.size(); ++block_idx) {
248       if (blocks_[block_idx].bytes_free >= size) {
249         block = &blocks_[block_idx];
250         break;
251       }
252     }
253     if (block == nullptr) {
254       if (blocksize_ < size) {
255         blocks_.push_back(AllocCustomBlock(size));
256       } else {
257         blocks_.push_back(AllocBlock());
258       }
259       block = &blocks_.back();
260     }
261 
262     char* storage = block->start_of_unused_bytes();
263     block->bytes_free -= size;
264     if (block_buffer_idx != nullptr) {
265       *block_buffer_idx = block_idx;
266     }
267     return storage;
268   }
269 
Clear()270   void Clear() {
271     blocks_.clear();
272     blocks_.shrink_to_fit();
273     can_write_to_contiguous_buffer_ = true;
274   }
275 
CopyFrom(const BalsaBuffer & b)276   void CopyFrom(const BalsaBuffer& b) {
277     blocks_.resize(b.blocks_.size());
278     for (Blocks::size_type i = 0; i < blocks_.size(); ++i) {
279       blocks_[i].CopyFrom(b.blocks_[i]);
280     }
281     blocksize_ = b.blocksize_;
282     can_write_to_contiguous_buffer_ = b.can_write_to_contiguous_buffer_;
283   }
284 
StartOfFirstBlock()285   const char* StartOfFirstBlock() const {
286     QUICHE_BUG_IF(bug_if_1182_1, blocks_.empty())
287         << "First block not allocated yet!";
288     return blocks_.empty() ? nullptr : blocks_[0].buffer.get();
289   }
290 
EndOfFirstBlock()291   const char* EndOfFirstBlock() const {
292     QUICHE_BUG_IF(bug_if_1182_2, blocks_.empty())
293         << "First block not allocated yet!";
294     return blocks_.empty() ? nullptr : blocks_[0].start_of_unused_bytes();
295   }
296 
GetReadableBytesOfFirstBlock()297   size_t GetReadableBytesOfFirstBlock() const {
298     return blocks_.empty() ? 0 : blocks_[0].bytes_used();
299   }
300 
can_write_to_contiguous_buffer()301   bool can_write_to_contiguous_buffer() const {
302     return can_write_to_contiguous_buffer_;
303   }
blocksize()304   size_t blocksize() const { return blocksize_; }
num_blocks()305   Blocks::size_type num_blocks() const { return blocks_.size(); }
buffer_size(size_t idx)306   size_t buffer_size(size_t idx) const { return blocks_[idx].buffer_size; }
bytes_used(size_t idx)307   size_t bytes_used(size_t idx) const { return blocks_[idx].bytes_used(); }
308 
309  private:
AllocBlock()310   BufferBlock AllocBlock() { return AllocCustomBlock(blocksize_); }
311 
AllocCustomBlock(size_t blocksize)312   BufferBlock AllocCustomBlock(size_t blocksize) {
313     return BufferBlock{std::make_unique<char[]>(blocksize), blocksize,
314                        blocksize};
315   }
316 
317   // A container of BufferBlocks
318   Blocks blocks_;
319 
320   // The default allocation size for a block.
321   // In general, blocksize_ bytes will be allocated for
322   // each buffer.
323   size_t blocksize_;
324 
325   // If set to true, then the first block cannot be used for Reserve() calls as
326   // the WriteToContiguous... function will modify the base pointer for this
327   // block, and the Reserve() calls need to be sure that the base pointer will
328   // not be changing in order to provide the user with StringPieces which
329   // continue to be valid.
330   bool can_write_to_contiguous_buffer_;
331 };
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 
335 // All of the functions in the BalsaHeaders class use string pieces, by either
336 // using the StringPiece class, or giving an explicit size and char* (as these
337 // are the native representation for these string pieces).
338 // This is done for several reasons.
339 //  1) This minimizes copying/allocation/deallocation as compared to using
340 //  string parameters
341 //  2) This reduces the number of strlen() calls done (as the length of any
342 //  string passed in is relatively likely to be known at compile time, and for
343 //  those strings passed back we obviate the need for a strlen() to determine
344 //  the size of new storage allocations if a new allocation is required.
345 //  3) This class attempts to store all of its data in two linear buffers in
346 //  order to enhance the speed of parsing and writing out to a buffer. As a
347 //  result, many string pieces are -not- terminated by '\0', and are not
348 //  c-strings.  Since this is the case, we must delineate the length of the
349 //  string explicitly via a length.
350 //
351 //  WARNING:  The side effect of using StringPiece is that if the underlying
352 //  buffer changes (due to modifying the headers) the StringPieces which point
353 //  to the data which was modified, may now contain "garbage", and should not
354 //  be dereferenced.
355 //  For example, If you fetch some component of the first-line, (request or
356 //  response), and then you modify the first line, the StringPieces you
357 //  originally received from the original first-line may no longer be valid).
358 //
359 //  StringPieces pointing to pieces of header lines which have not been
360 //  erased() or modified should be valid until the object is cleared or
361 //  destroyed.
362 //
363 //  Key comparisons are case-insensitive.
364 
365 class QUICHE_EXPORT BalsaHeaders : public HeaderApi {
366  public:
367   // Each header line is parsed into a HeaderLineDescription, which maintains
368   // pointers into the BalsaBuffer.
369   //
370   // Succinctly describes one header line as indices into a buffer.
371   struct QUICHE_EXPORT HeaderLineDescription {
HeaderLineDescriptionHeaderLineDescription372     HeaderLineDescription(size_t first_character_index, size_t key_end_index,
373                           size_t value_begin_index, size_t last_character_index,
374                           size_t buffer_base_index)
375         : first_char_idx(first_character_index),
376           key_end_idx(key_end_index),
377           value_begin_idx(value_begin_index),
378           last_char_idx(last_character_index),
379           buffer_base_idx(buffer_base_index),
380           skip(false) {}
381 
HeaderLineDescriptionHeaderLineDescription382     HeaderLineDescription()
383         : first_char_idx(0),
384           key_end_idx(0),
385           value_begin_idx(0),
386           last_char_idx(0),
387           buffer_base_idx(0),
388           skip(false) {}
389 
KeyLengthHeaderLineDescription390     size_t KeyLength() const {
391       QUICHE_DCHECK_GE(key_end_idx, first_char_idx);
392       return key_end_idx - first_char_idx;
393     }
ValuesLengthHeaderLineDescription394     size_t ValuesLength() const {
395       QUICHE_DCHECK_GE(last_char_idx, value_begin_idx);
396       return last_char_idx - value_begin_idx;
397     }
398 
399     size_t first_char_idx;
400     size_t key_end_idx;
401     size_t value_begin_idx;
402     size_t last_char_idx;
403     BalsaBuffer::Blocks::size_type buffer_base_idx;
404     bool skip;
405   };
406 
407   using HeaderTokenList = std::vector<absl::string_view>;
408 
409   // An iterator for walking through all the header lines.
410   class const_header_lines_iterator;
411 
412   // An iterator that only stops at lines with a particular key
413   // (case-insensitive).  See also GetIteratorForKey.
414   //
415   // Check against header_lines_key_end() to determine when iteration is
416   // finished. lines().end() will also work.
417   class const_header_lines_key_iterator;
418 
419   // A simple class that can be used in a range-based for loop.
420   template <typename IteratorType>
421   class QUICHE_EXPORT iterator_range {
422    public:
423     using iterator = IteratorType;
424     using const_iterator = IteratorType;
425     using value_type = typename std::iterator_traits<IteratorType>::value_type;
426 
iterator_range(IteratorType begin_iterator,IteratorType end_iterator)427     iterator_range(IteratorType begin_iterator, IteratorType end_iterator)
428         : begin_iterator_(std::move(begin_iterator)),
429           end_iterator_(std::move(end_iterator)) {}
430 
begin()431     IteratorType begin() const { return begin_iterator_; }
end()432     IteratorType end() const { return end_iterator_; }
433 
434    private:
435     IteratorType begin_iterator_, end_iterator_;
436   };
437 
438   // Set of names of headers that might have multiple values.
439   // CoalesceOption::kCoalesce can be used to match Envoy behavior in
440   // WriteToBuffer().
441   using MultivaluedHeadersSet =
442       absl::flat_hash_set<absl::string_view, StringPieceCaseHash,
443                           StringPieceCaseEqual>;
444 
445   // Map of key => vector<value>, where vector contains ordered list of all
446   // values for |key| (ignoring the casing).
447   using MultivaluedHeadersValuesMap =
448       absl::flat_hash_map<absl::string_view, std::vector<absl::string_view>,
449                           StringPieceCaseHash, StringPieceCaseEqual>;
450 
BalsaHeaders()451   BalsaHeaders()
452       : balsa_buffer_(4096),
453         content_length_(0),
454         content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH),
455         parsed_response_code_(0),
456         firstline_buffer_base_idx_(0),
457         whitespace_1_idx_(0),
458         non_whitespace_1_idx_(0),
459         whitespace_2_idx_(0),
460         non_whitespace_2_idx_(0),
461         whitespace_3_idx_(0),
462         non_whitespace_3_idx_(0),
463         whitespace_4_idx_(0),
464         transfer_encoding_is_chunked_(false) {}
465 
BalsaHeaders(size_t bufsize)466   explicit BalsaHeaders(size_t bufsize)
467       : balsa_buffer_(bufsize),
468         content_length_(0),
469         content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH),
470         parsed_response_code_(0),
471         firstline_buffer_base_idx_(0),
472         whitespace_1_idx_(0),
473         non_whitespace_1_idx_(0),
474         whitespace_2_idx_(0),
475         non_whitespace_2_idx_(0),
476         whitespace_3_idx_(0),
477         non_whitespace_3_idx_(0),
478         whitespace_4_idx_(0),
479         transfer_encoding_is_chunked_(false) {}
480 
481   // Copying BalsaHeaders is expensive, so require that it be visible.
482   BalsaHeaders(const BalsaHeaders&) = delete;
483   BalsaHeaders& operator=(const BalsaHeaders&) = delete;
484   BalsaHeaders(BalsaHeaders&&) = default;
485   BalsaHeaders& operator=(BalsaHeaders&&) = default;
486 
487   // Returns a range that represents all of the header lines.
488   iterator_range<const_header_lines_iterator> lines() const;
489 
490   // Returns an iterator range consisting of the header lines matching key.
491   // String backing 'key' must remain valid for lifetime of range.
492   iterator_range<const_header_lines_key_iterator> lines(
493       absl::string_view key) const;
494 
495   // Returns a forward-only iterator that only stops at lines matching key.
496   // String backing 'key' must remain valid for lifetime of iterator.
497   //
498   // Check returned iterator against header_lines_key_end() to determine when
499   // iteration is finished.
500   //
501   // Consider calling lines(key)--it may be more readable.
502   const_header_lines_key_iterator GetIteratorForKey(
503       absl::string_view key) const;
504 
505   const_header_lines_key_iterator header_lines_key_end() const;
506 
507   void erase(const const_header_lines_iterator& it);
508 
509   void Clear();
510 
511   // Explicit copy functions to avoid risk of accidental copies.
Copy()512   BalsaHeaders Copy() const {
513     BalsaHeaders copy;
514     copy.CopyFrom(*this);
515     return copy;
516   }
517   void CopyFrom(const BalsaHeaders& other);
518 
519   // Replaces header entries with key 'key' if they exist, or appends
520   // a new header if none exist.  See 'AppendHeader' below for additional
521   // comments about ContentLength and TransferEncoding headers. Note that this
522   // will allocate new storage every time that it is called.
523   void ReplaceOrAppendHeader(absl::string_view key,
524                              absl::string_view value) override;
525 
526   // Append a new header entry to the header object. Clients who wish to append
527   // Content-Length header should use SetContentLength() method instead of
528   // adding the content length header using AppendHeader (manually adding the
529   // content length header will not update the content_length_ and
530   // content_length_status_ values).
531   // Similarly, clients who wish to add or remove the transfer encoding header
532   // in order to apply or remove chunked encoding should use
533   // SetTransferEncodingToChunkedAndClearContentLength() or
534   // SetNoTransferEncoding() instead.
535   void AppendHeader(absl::string_view key, absl::string_view value) override;
536 
537   // Appends ',value' to an existing header named 'key'.  If no header with the
538   // correct key exists, it will call AppendHeader(key, value).  Calling this
539   // function on a key which exists several times in the headers will produce
540   // unpredictable results.
541   void AppendToHeader(absl::string_view key, absl::string_view value) override;
542 
543   // Appends ', value' to an existing header named 'key'.  If no header with the
544   // correct key exists, it will call AppendHeader(key, value).  Calling this
545   // function on a key which exists several times in the headers will produce
546   // unpredictable results.
547   void AppendToHeaderWithCommaAndSpace(absl::string_view key,
548                                        absl::string_view value) override;
549 
550   // Returns the value corresponding to the given header key. Returns an empty
551   // string if the header key does not exist. For headers that may consist of
552   // multiple lines, use GetAllOfHeader() instead.
553   // Make the QuicheLowerCaseString overload visible,
554   // and only override the absl::string_view one.
555   using HeaderApi::GetHeader;
556   absl::string_view GetHeader(absl::string_view key) const override;
557 
558   // Iterates over all currently valid header lines, appending their
559   // values into the vector 'out', in top-to-bottom order.
560   // Header-lines which have been erased are not currently valid, and
561   // will not have their values appended. Empty values will be
562   // represented as empty string. If 'key' doesn't exist in the headers at
563   // all, out will not be changed. We do not clear the vector out
564   // before adding new entries. If there are header lines with matching
565   // key but empty value then they are also added to the vector out.
566   // (Basically empty values are not treated in any special manner).
567   //
568   // Example:
569   // Input header:
570   // "GET / HTTP/1.0\r\n"
571   //    "key1: v1\r\n"
572   //    "key1: \r\n"
573   //    "key1:\r\n"
574   //    "key1:  v1\r\n"
575   //    "key1:v2\r\n"
576   //
577   //  vector out is initially: ["foo"]
578   //  vector out after GetAllOfHeader("key1", &out) is:
579   // ["foo", "v1", "", "", "v1", "v2"]
580   //
581   // See gfe::header_properties::IsMultivaluedHeader() for which headers
582   // GFE treats as being multivalued.
583 
584   // Make the QuicheLowerCaseString overload visible,
585   // and only override the absl::string_view one.
586   using HeaderApi::GetAllOfHeader;
587   void GetAllOfHeader(absl::string_view key,
588                       std::vector<absl::string_view>* out) const override;
589 
590   // Same as above, but iterates over all header lines including removed ones.
591   // Appends their values into the vector 'out' in top-to-bottom order,
592   // first all valid headers then all that were removed.
593   void GetAllOfHeaderIncludeRemoved(absl::string_view key,
594                                     std::vector<absl::string_view>* out) const;
595 
596   // Joins all values for `key` into a comma-separated string.
597   // Make the QuicheLowerCaseString overload visible,
598   // and only override the absl::string_view one.
599   using HeaderApi::GetAllOfHeaderAsString;
600   std::string GetAllOfHeaderAsString(absl::string_view key) const override;
601 
602   // Determine if a given header is present.  Case-insensitive.
HasHeader(absl::string_view key)603   inline bool HasHeader(absl::string_view key) const override {
604     return GetConstHeaderLinesIterator(key) != header_lines_.end();
605   }
606 
607   // Goes through all headers with key 'key' and checks to see if one of the
608   // values is 'value'.  Returns true if there are headers with the desired key
609   // and value, false otherwise.  Case-insensitive for the key; case-sensitive
610   // for the value.
HeaderHasValue(absl::string_view key,absl::string_view value)611   bool HeaderHasValue(absl::string_view key,
612                       absl::string_view value) const override {
613     return HeaderHasValueHelper(key, value, true);
614   }
615   // Same as above, but also case-insensitive for the value.
HeaderHasValueIgnoreCase(absl::string_view key,absl::string_view value)616   bool HeaderHasValueIgnoreCase(absl::string_view key,
617                                 absl::string_view value) const override {
618     return HeaderHasValueHelper(key, value, false);
619   }
620 
621   // Returns true iff any header 'key' exists with non-empty value.
622   bool HasNonEmptyHeader(absl::string_view key) const override;
623 
624   const_header_lines_iterator GetHeaderPosition(absl::string_view key) const;
625 
626   // Removes all headers in given set |keys| at once efficiently. Keys
627   // are case insensitive.
628   //
629   // Alternatives considered:
630   //
631   // 1. Use string_hash_set<>, the caller (such as ClearHopByHopHeaders) lower
632   // cases the keys and RemoveAllOfHeaderInList just does lookup. This according
633   // to microbenchmark gives the best performance because it does not require
634   // an extra copy of the hash table. However, it is not taken because of the
635   // possible risk that caller could forget to lowercase the keys.
636   //
637   // 2. Use flat_hash_set<StringPiece, StringPieceCaseHash,StringPieceCaseEqual>
638   // or string_hash_set<StringPieceCaseHash, StringPieceCaseEqual>. Both appear
639   // to have (much) worse performance with WithoutDupToken and LongHeader case
640   // in microbenchmark.
641   void RemoveAllOfHeaderInList(const HeaderTokenList& keys) override;
642 
643   void RemoveAllOfHeader(absl::string_view key) override;
644 
645   // Removes all headers starting with 'key' [case insensitive]
646   void RemoveAllHeadersWithPrefix(absl::string_view prefix) override;
647 
648   // Returns true if we have at least one header with given prefix
649   // [case insensitive]. Currently for test use only.
650   bool HasHeadersWithPrefix(absl::string_view prefix) const override;
651 
652   // Returns the key value pairs for all headers where the header key begins
653   // with the specified prefix.
654   void GetAllOfHeaderWithPrefix(
655       absl::string_view prefix,
656       std::vector<std::pair<absl::string_view, absl::string_view>>* out)
657       const override;
658 
659   void GetAllHeadersWithLimit(
660       std::vector<std::pair<absl::string_view, absl::string_view>>* out,
661       int limit) const override;
662 
663   // Removes all values equal to a given value from header lines with given key.
664   // All string operations done here are case-sensitive.
665   // If a header line has only values matching the given value, the entire
666   // line is removed.
667   // If the given value is found in a multi-value header line mixed with other
668   // values, the line is edited in-place to remove the values.
669   // Returns the number of occurrences of value that were removed.
670   // This method runs in linear time.
671   size_t RemoveValue(absl::string_view key, absl::string_view value);
672 
673   // Returns the upper bound on the required buffer space to fully write out
674   // the header object (this include the first line, all header lines, and the
675   // final line separator that marks the ending of the header).
676   size_t GetSizeForWriteBuffer() const override;
677 
678   // Indicates if to serialize headers with lower-case header keys.
679   enum class CaseOption { kNoModification, kLowercase, kPropercase };
680 
681   // Indicates if to coalesce headers with multiple values to match Envoy/GFE3.
682   enum class CoalesceOption { kNoCoalesce, kCoalesce };
683 
684   // The following WriteHeader* methods are template member functions that
685   // place one requirement on the Buffer class: it must implement a Write
686   // method that takes a pointer and a length. The buffer passed in is not
687   // required to be stretchable. For non-stretchable buffers, the user must
688   // call GetSizeForWriteBuffer() to find out the upper bound on the output
689   // buffer space required to make sure that the entire header is serialized.
690   // BalsaHeaders will not check that there is adequate space in the buffer
691   // object during the write.
692 
693   // Writes the entire header and the final line separator that marks the end
694   // of the HTTP header section to the buffer. After this method returns, no
695   // more header data should be written to the buffer.
696   template <typename Buffer>
WriteHeaderAndEndingToBuffer(Buffer * buffer,CaseOption case_option,CoalesceOption coalesce_option)697   void WriteHeaderAndEndingToBuffer(Buffer* buffer, CaseOption case_option,
698                                     CoalesceOption coalesce_option) const {
699     WriteToBuffer(buffer, case_option, coalesce_option);
700     WriteHeaderEndingToBuffer(buffer);
701   }
702 
703   template <typename Buffer>
WriteHeaderAndEndingToBuffer(Buffer * buffer)704   void WriteHeaderAndEndingToBuffer(Buffer* buffer) const {
705     WriteHeaderAndEndingToBuffer(buffer, CaseOption::kNoModification,
706                                  CoalesceOption::kNoCoalesce);
707   }
708 
709   // Writes the final line separator to the buffer to terminate the HTTP header
710   // section.  After this method returns, no more header data should be written
711   // to the buffer.
712   template <typename Buffer>
WriteHeaderEndingToBuffer(Buffer * buffer)713   static void WriteHeaderEndingToBuffer(Buffer* buffer) {
714     buffer->WriteString("\r\n");
715   }
716 
717   // Writes the entire header to the buffer without the line separator that
718   // terminates the HTTP header. This lets users append additional header lines
719   // using WriteHeaderLineToBuffer and then terminate the header with
720   // WriteHeaderEndingToBuffer as the header is serialized to the buffer,
721   // without having to first copy the header.
722   template <typename Buffer>
723   void WriteToBuffer(Buffer* buffer, CaseOption case_option,
724                      CoalesceOption coalesce_option) const;
725 
726   template <typename Buffer>
WriteToBuffer(Buffer * buffer)727   void WriteToBuffer(Buffer* buffer) const {
728     WriteToBuffer(buffer, CaseOption::kNoModification,
729                   CoalesceOption::kNoCoalesce);
730   }
731 
732   // Used by WriteToBuffer to coalesce multiple values of headers listed in
733   // |multivalued_headers| into a single comma-separated value. Public for test.
734   template <typename Buffer>
735   void WriteToBufferCoalescingMultivaluedHeaders(
736       Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers,
737       CaseOption case_option) const;
738 
739   // Populates |multivalues| with values of |header_lines_| with keys present
740   // in |multivalued_headers| set.
741   void GetValuesOfMultivaluedHeaders(
742       const MultivaluedHeadersSet& multivalued_headers,
743       MultivaluedHeadersValuesMap* multivalues) const;
744 
ToPropercase(absl::string_view header)745   static std::string ToPropercase(absl::string_view header) {
746     std::string copy = std::string(header);
747     bool should_uppercase = true;
748     for (char& c : copy) {
749       if (!absl::ascii_isalnum(c)) {
750         should_uppercase = true;
751       } else if (should_uppercase) {
752         c = absl::ascii_toupper(c);
753         should_uppercase = false;
754       } else {
755         c = absl::ascii_tolower(c);
756       }
757     }
758     return copy;
759   }
760 
761   template <typename Buffer>
WriteHeaderKeyToBuffer(Buffer * buffer,absl::string_view key,CaseOption case_option)762   void WriteHeaderKeyToBuffer(Buffer* buffer, absl::string_view key,
763                               CaseOption case_option) const {
764     if (case_option == CaseOption::kLowercase) {
765       buffer->WriteString(absl::AsciiStrToLower(key));
766     } else if (case_option == CaseOption::kPropercase) {
767       const auto& header_set = quiche::GetStandardHeaderSet();
768       auto it = header_set.find(key);
769       if (it != header_set.end()) {
770         buffer->WriteString(*it);
771       } else {
772         buffer->WriteString(ToPropercase(key));
773       }
774     } else {
775       buffer->WriteString(key);
776     }
777   }
778 
779   // Takes a header line in the form of a key/value pair and append it to the
780   // buffer. This function should be called after WriteToBuffer to
781   // append additional header lines to the header without copying the header.
782   // When the user is done with appending to the buffer,
783   // WriteHeaderEndingToBuffer must be used to terminate the HTTP
784   // header in the buffer. This method is a no-op if key is empty.
785   template <typename Buffer>
WriteHeaderLineToBuffer(Buffer * buffer,absl::string_view key,absl::string_view value,CaseOption case_option)786   void WriteHeaderLineToBuffer(Buffer* buffer, absl::string_view key,
787                                absl::string_view value,
788                                CaseOption case_option) const {
789     // If the key is empty, we don't want to write the rest because it
790     // will not be a well-formed header line.
791     if (!key.empty()) {
792       WriteHeaderKeyToBuffer(buffer, key, case_option);
793       buffer->WriteString(": ");
794       buffer->WriteString(value);
795       buffer->WriteString("\r\n");
796     }
797   }
798 
799   // Takes a header line in the form of a key and vector of values and appends
800   // it to the buffer. This function should be called after WriteToBuffer to
801   // append additional header lines to the header without copying the header.
802   // When the user is done with appending to the buffer,
803   // WriteHeaderEndingToBuffer must be used to terminate the HTTP
804   // header in the buffer. This method is a no-op if the |key| is empty.
805   template <typename Buffer>
WriteHeaderLineValuesToBuffer(Buffer * buffer,absl::string_view key,const std::vector<absl::string_view> & values,CaseOption case_option)806   void WriteHeaderLineValuesToBuffer(
807       Buffer* buffer, absl::string_view key,
808       const std::vector<absl::string_view>& values,
809       CaseOption case_option) const {
810     // If the key is empty, we don't want to write the rest because it
811     // will not be a well-formed header line.
812     if (!key.empty()) {
813       WriteHeaderKeyToBuffer(buffer, key, case_option);
814       buffer->WriteString(": ");
815       for (auto it = values.begin();;) {
816         buffer->WriteString(*it);
817         if (++it == values.end()) {
818           break;
819         }
820         buffer->WriteString(",");
821       }
822       buffer->WriteString("\r\n");
823     }
824   }
825 
826   // Dump the textural representation of the header object to a string, which
827   // is suitable for writing out to logs. All CRLF will be printed out as \n.
828   // This function can be called on a header object in any state. Raw header
829   // data will be printed out if the header object is not completely parsed,
830   // e.g., when there was an error in the middle of parsing.
831   // The header content is appended to the string; the original content is not
832   // cleared.
833   // If used in test cases, WillNotWriteFromFramer() may be of interest.
834   void DumpToString(std::string* str) const;
835   std::string DebugString() const override;
836 
837   bool ForEachHeader(std::function<bool(const absl::string_view key,
838                                         const absl::string_view value)>
839                          fn) const override;
840 
841   void DumpToPrefixedString(const char* spaces, std::string* str) const;
842 
first_line()843   absl::string_view first_line() const {
844     QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_1_idx_);
845     return whitespace_4_idx_ == non_whitespace_1_idx_
846                ? ""
847                : absl::string_view(
848                      BeginningOfFirstLine() + non_whitespace_1_idx_,
849                      whitespace_4_idx_ - non_whitespace_1_idx_);
850   }
first_line_of_request()851   std::string first_line_of_request() const override {
852     return std::string(first_line());
853   }
854 
855   // Returns the parsed value of the response code if it has been parsed.
856   // Guaranteed to return 0 when unparsed (though it is a much better idea to
857   // verify that the BalsaFrame had no errors while parsing).
858   // This may return response codes which are outside the normal bounds of
859   // HTTP response codes-- it is up to the user of this class to ensure that
860   // the response code is one which is interpretable.
parsed_response_code()861   size_t parsed_response_code() const override { return parsed_response_code_; }
862 
request_method()863   absl::string_view request_method() const override {
864     QUICHE_DCHECK_GE(whitespace_2_idx_, non_whitespace_1_idx_);
865     return whitespace_2_idx_ == non_whitespace_1_idx_
866                ? ""
867                : absl::string_view(
868                      BeginningOfFirstLine() + non_whitespace_1_idx_,
869                      whitespace_2_idx_ - non_whitespace_1_idx_);
870   }
871 
response_version()872   absl::string_view response_version() const override {
873     // Note: There is no difference between request_method() and
874     // response_version(). They both could be called
875     // GetFirstTokenFromFirstline()... but that wouldn't be anywhere near as
876     // descriptive.
877     return request_method();
878   }
879 
request_uri()880   absl::string_view request_uri() const override {
881     QUICHE_DCHECK_GE(whitespace_3_idx_, non_whitespace_2_idx_);
882     return whitespace_3_idx_ == non_whitespace_2_idx_
883                ? ""
884                : absl::string_view(
885                      BeginningOfFirstLine() + non_whitespace_2_idx_,
886                      whitespace_3_idx_ - non_whitespace_2_idx_);
887   }
888 
response_code()889   absl::string_view response_code() const override {
890     // Note: There is no difference between request_uri() and response_code().
891     // They both could be called GetSecondtTokenFromFirstline(), but, as noted
892     // in an earlier comment, that wouldn't be as descriptive.
893     return request_uri();
894   }
895 
request_version()896   absl::string_view request_version() const override {
897     QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_3_idx_);
898     return whitespace_4_idx_ == non_whitespace_3_idx_
899                ? ""
900                : absl::string_view(
901                      BeginningOfFirstLine() + non_whitespace_3_idx_,
902                      whitespace_4_idx_ - non_whitespace_3_idx_);
903   }
904 
response_reason_phrase()905   absl::string_view response_reason_phrase() const override {
906     // Note: There is no difference between request_version() and
907     // response_reason_phrase(). They both could be called
908     // GetThirdTokenFromFirstline(), but, as noted in an earlier comment, that
909     // wouldn't be as descriptive.
910     return request_version();
911   }
912 
SetRequestFirstlineFromStringPieces(absl::string_view method,absl::string_view uri,absl::string_view version)913   void SetRequestFirstlineFromStringPieces(absl::string_view method,
914                                            absl::string_view uri,
915                                            absl::string_view version) {
916     SetFirstlineFromStringPieces(method, uri, version);
917   }
918 
919   void SetResponseFirstline(absl::string_view version,
920                             size_t parsed_response_code,
921                             absl::string_view reason_phrase);
922 
923   // These functions are exactly the same, except that their names are
924   // different. This is done so that the code using this class is more
925   // expressive.
926   void SetRequestMethod(absl::string_view method) override;
927   void SetResponseVersion(absl::string_view version) override;
928 
929   void SetRequestUri(absl::string_view uri) override;
930   void SetResponseCode(absl::string_view code) override;
set_parsed_response_code(size_t parsed_response_code)931   void set_parsed_response_code(size_t parsed_response_code) {
932     parsed_response_code_ = parsed_response_code;
933   }
934   void SetParsedResponseCodeAndUpdateFirstline(
935       size_t parsed_response_code) override;
936 
937   // These functions are exactly the same, except that their names are
938   // different. This is done so that the code using this class is more
939   // expressive.
940   void SetRequestVersion(absl::string_view version) override;
941   void SetResponseReasonPhrase(absl::string_view reason_phrase) override;
942 
943   // Simple accessors to some of the internal state
transfer_encoding_is_chunked()944   bool transfer_encoding_is_chunked() const {
945     return transfer_encoding_is_chunked_;
946   }
947 
ResponseCodeImpliesNoBody(size_t code)948   static bool ResponseCodeImpliesNoBody(size_t code) {
949     // From HTTP spec section 6.1.1 all 1xx responses must not have a body,
950     // as well as 204 No Content and 304 Not Modified.
951     return ((code >= 100) && (code <= 199)) || (code == 204) || (code == 304);
952   }
953 
954   // Note: never check this for requests. Nothing bad will happen if you do,
955   // but spec does not allow requests framed by connection close.
956   // TODO(vitaliyl): refactor.
is_framed_by_connection_close()957   bool is_framed_by_connection_close() const {
958     // We declare that response is framed by connection close if it has no
959     // content-length, no transfer encoding, and is allowed to have a body by
960     // the HTTP spec.
961     // parsed_response_code_ is 0 for requests, so ResponseCodeImpliesNoBody
962     // will return false.
963     return (content_length_status_ == BalsaHeadersEnums::NO_CONTENT_LENGTH) &&
964            !transfer_encoding_is_chunked_ &&
965            !ResponseCodeImpliesNoBody(parsed_response_code_);
966   }
967 
content_length()968   size_t content_length() const override { return content_length_; }
content_length_status()969   BalsaHeadersEnums::ContentLengthStatus content_length_status() const {
970     return content_length_status_;
971   }
content_length_valid()972   bool content_length_valid() const override {
973     return content_length_status_ == BalsaHeadersEnums::VALID_CONTENT_LENGTH;
974   }
975 
976   // SetContentLength, SetTransferEncodingToChunkedAndClearContentLength, and
977   // SetNoTransferEncoding modifies the header object to use
978   // content-length and transfer-encoding headers in a consistent
979   // manner. They set all internal flags and status so client can get
980   // a consistent view from various accessors.
981   void SetContentLength(size_t length) override;
982   // Sets transfer-encoding to chunked and updates internal state.
983   void SetTransferEncodingToChunkedAndClearContentLength() override;
984   // Removes transfer-encoding headers and updates internal state.
985   void SetNoTransferEncoding() override;
986 
987   // If you have a response that needs framing by connection close, use this
988   // method instead of RemoveAllOfHeader("Content-Length"). Has no effect if
989   // transfer_encoding_is_chunked().
990   void ClearContentLength();
991 
992   // This should be called if balsa headers are created entirely manually (not
993   // by any of the framer classes) to make sure that function calls like
994   // DumpToString will work correctly.
WillNotWriteFromFramer()995   void WillNotWriteFromFramer() {
996     balsa_buffer_.NoMoreWriteToContiguousBuffer();
997   }
998 
999   // True if DoneWritingFromFramer or WillNotWriteFromFramer is called.
FramerIsDoneWriting()1000   bool FramerIsDoneWriting() const {
1001     return !balsa_buffer_.can_write_to_contiguous_buffer();
1002   }
1003 
1004   bool IsEmpty() const override;
1005 
1006   // From HeaderApi and ConstHeaderApi.
1007   absl::string_view Authority() const override;
1008   void ReplaceOrAppendAuthority(absl::string_view value) override;
1009   void RemoveAuthority() override;
1010   void ApplyToCookie(
1011       std::function<void(absl::string_view cookie)> f) const override;
1012 
set_enforce_header_policy(bool enforce)1013   void set_enforce_header_policy(bool enforce) override {
1014     enforce_header_policy_ = enforce;
1015   }
1016 
1017   // Removes the last token from the header value. In the presence of multiple
1018   // header lines with given key, will remove the last token of the last line.
1019   // Can be useful if the last encoding has to be removed.
1020   void RemoveLastTokenFromHeaderValue(absl::string_view key);
1021 
1022   // Gets the list of names of headers that are multivalued in Envoy.
1023   static const MultivaluedHeadersSet& multivalued_envoy_headers();
1024 
1025   // Returns true if HTTP responses with this response code have bodies.
1026   static bool ResponseCanHaveBody(int response_code);
1027 
1028   // Given a pointer to the beginning and the end of the header value
1029   // in some buffer, populates tokens list with beginning and end indices
1030   // of all tokens present in the value string.
1031   static void ParseTokenList(absl::string_view header_value,
1032                              HeaderTokenList* tokens);
1033 
1034  private:
1035   typedef std::vector<HeaderLineDescription> HeaderLines;
1036 
1037   class iterator_base;
1038 
1039   friend class BalsaFrame;
1040   friend class gfe2::Http2HeaderValidator;
1041   friend class SpdyPayloadFramer;
1042   friend class HTTPMessage;
1043   friend class test::BalsaHeadersTestPeer;
1044 
1045   friend bool ParseHTTPFirstLine(const char* begin, const char* end,
1046                                  bool is_request, BalsaHeaders* headers,
1047                                  BalsaFrameEnums::ErrorCode* error_code);
1048 
1049   // Reverse iterators have been removed for lack of use, refer to
1050   // cl/30618773 in case they are needed.
1051 
BeginningOfFirstLine()1052   const char* BeginningOfFirstLine() const {
1053     return GetPtr(firstline_buffer_base_idx_);
1054   }
1055 
BeginningOfFirstLine()1056   char* BeginningOfFirstLine() { return GetPtr(firstline_buffer_base_idx_); }
1057 
GetPtr(BalsaBuffer::Blocks::size_type block_idx)1058   char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) {
1059     return balsa_buffer_.GetPtr(block_idx);
1060   }
1061 
GetPtr(BalsaBuffer::Blocks::size_type block_idx)1062   const char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) const {
1063     return balsa_buffer_.GetPtr(block_idx);
1064   }
1065 
WriteFromFramer(const char * ptr,size_t size)1066   void WriteFromFramer(const char* ptr, size_t size) {
1067     balsa_buffer_.WriteToContiguousBuffer(absl::string_view(ptr, size));
1068   }
1069 
DoneWritingFromFramer()1070   void DoneWritingFromFramer() {
1071     balsa_buffer_.NoMoreWriteToContiguousBuffer();
1072   }
1073 
OriginalHeaderStreamBegin()1074   const char* OriginalHeaderStreamBegin() const {
1075     return balsa_buffer_.StartOfFirstBlock();
1076   }
1077 
OriginalHeaderStreamEnd()1078   const char* OriginalHeaderStreamEnd() const {
1079     return balsa_buffer_.EndOfFirstBlock();
1080   }
1081 
GetReadableBytesFromHeaderStream()1082   size_t GetReadableBytesFromHeaderStream() const {
1083     return balsa_buffer_.GetReadableBytesOfFirstBlock();
1084   }
1085 
GetReadablePtrFromHeaderStream()1086   absl::string_view GetReadablePtrFromHeaderStream() {
1087     return {OriginalHeaderStreamBegin(), GetReadableBytesFromHeaderStream()};
1088   }
1089 
1090   absl::string_view GetValueFromHeaderLineDescription(
1091       const HeaderLineDescription& line) const;
1092 
1093   void AddAndMakeDescription(absl::string_view key, absl::string_view value,
1094                              HeaderLineDescription* d);
1095 
1096   void AppendAndMakeDescription(absl::string_view key, absl::string_view value,
1097                                 HeaderLineDescription* d);
1098 
1099   // Removes all header lines with the given key starting at start.
1100   void RemoveAllOfHeaderStartingAt(absl::string_view key,
1101                                    HeaderLines::iterator start);
1102 
1103   HeaderLines::const_iterator GetConstHeaderLinesIterator(
1104       absl::string_view key) const;
1105 
1106   HeaderLines::iterator GetHeaderLinesIterator(absl::string_view key,
1107                                                HeaderLines::iterator start);
1108 
1109   HeaderLines::iterator GetHeaderLinesIteratorForLastMultivaluedHeader(
1110       absl::string_view key);
1111 
1112   template <typename IteratorType>
1113   const IteratorType HeaderLinesBeginHelper() const;
1114 
1115   template <typename IteratorType>
1116   const IteratorType HeaderLinesEndHelper() const;
1117 
1118   // Helper function for HeaderHasValue and HeaderHasValueIgnoreCase that
1119   // does most of the work.
1120   bool HeaderHasValueHelper(absl::string_view key, absl::string_view value,
1121                             bool case_sensitive) const;
1122 
1123   // Called by header removal methods to reset internal values for transfer
1124   // encoding or content length if we're removing the corresponding headers.
1125   void MaybeClearSpecialHeaderValues(absl::string_view key);
1126 
1127   void SetFirstlineFromStringPieces(absl::string_view firstline_a,
1128                                     absl::string_view firstline_b,
1129                                     absl::string_view firstline_c);
1130   BalsaBuffer balsa_buffer_;
1131 
1132   size_t content_length_;
1133   BalsaHeadersEnums::ContentLengthStatus content_length_status_;
1134   size_t parsed_response_code_;
1135   // HTTP firstlines all have the following structure:
1136   //  LWS         NONWS  LWS    NONWS   LWS    NONWS   NOTCRLF  CRLF
1137   //  [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"
1138   //  ws1        nws1    ws2    nws2    ws3    nws3             ws4
1139   //  |          [-------)      [-------)      [----------------)
1140   //    REQ:     method         request_uri    version
1141   //   RESP:     version        statuscode     reason
1142   //
1143   //   The first NONWS->LWS component we'll call firstline_a.
1144   //   The second firstline_b, and the third firstline_c.
1145   //
1146   //   firstline_a goes from nws1 to (but not including) ws2
1147   //   firstline_b goes from nws2 to (but not including) ws3
1148   //   firstline_c goes from nws3 to (but not including) ws4
1149   //
1150   // In the code:
1151   //    ws1 == whitespace_1_idx_
1152   //   nws1 == non_whitespace_1_idx_
1153   //    ws2 == whitespace_2_idx_
1154   //   nws2 == non_whitespace_2_idx_
1155   //    ws3 == whitespace_3_idx_
1156   //   nws3 == non_whitespace_3_idx_
1157   //    ws4 == whitespace_4_idx_
1158   BalsaBuffer::Blocks::size_type firstline_buffer_base_idx_;
1159   size_t whitespace_1_idx_;
1160   size_t non_whitespace_1_idx_;
1161   size_t whitespace_2_idx_;
1162   size_t non_whitespace_2_idx_;
1163   size_t whitespace_3_idx_;
1164   size_t non_whitespace_3_idx_;
1165   size_t whitespace_4_idx_;
1166 
1167   bool transfer_encoding_is_chunked_;
1168 
1169   // If true, QUICHE_BUG if a header that starts with an invalid prefix is
1170   // explicitly set.
1171   bool enforce_header_policy_ = true;
1172 
1173   HeaderLines header_lines_;
1174 };
1175 
1176 // Base class for iterating the headers in a BalsaHeaders object, returning a
1177 // pair of string_view's for each header.
1178 class QUICHE_EXPORT BalsaHeaders::iterator_base
1179     : public std::iterator<std::forward_iterator_tag,
1180                            std::pair<absl::string_view, absl::string_view>> {
1181  public:
iterator_base()1182   iterator_base() : headers_(nullptr), idx_(0) {}
1183 
1184   std::pair<absl::string_view, absl::string_view>& operator*() const {
1185     return Lookup(idx_);
1186   }
1187 
1188   std::pair<absl::string_view, absl::string_view>* operator->() const {
1189     return &(this->operator*());
1190   }
1191 
1192   bool operator==(const BalsaHeaders::iterator_base& it) const {
1193     return idx_ == it.idx_;
1194   }
1195 
1196   bool operator<(const BalsaHeaders::iterator_base& it) const {
1197     return idx_ < it.idx_;
1198   }
1199 
1200   bool operator<=(const BalsaHeaders::iterator_base& it) const {
1201     return idx_ <= it.idx_;
1202   }
1203 
1204   bool operator!=(const BalsaHeaders::iterator_base& it) const {
1205     return !(*this == it);
1206   }
1207 
1208   bool operator>(const BalsaHeaders::iterator_base& it) const {
1209     return it < *this;
1210   }
1211 
1212   bool operator>=(const BalsaHeaders::iterator_base& it) const {
1213     return it <= *this;
1214   }
1215 
1216   // This mainly exists so that we can have interesting output for
1217   // unittesting. The EXPECT_EQ, EXPECT_NE functions require that
1218   // operator<< work for the classes it sees.  It would be better if there
1219   // was an additional traits-like system for the gUnit output... but oh
1220   // well.
1221   friend QUICHE_EXPORT std::ostream& operator<<(std::ostream& os,
1222                                                 const iterator_base& it) {
1223     os << "[" << it.headers_ << ", " << it.idx_ << "]";
1224     return os;
1225   }
1226 
1227  private:
1228   friend class BalsaHeaders;
1229 
iterator_base(const BalsaHeaders * headers,HeaderLines::size_type index)1230   iterator_base(const BalsaHeaders* headers, HeaderLines::size_type index)
1231       : headers_(headers), idx_(index) {}
1232 
increment()1233   void increment() {
1234     value_.reset();
1235     const HeaderLines& header_lines = headers_->header_lines_;
1236     const HeaderLines::size_type header_lines_size = header_lines.size();
1237     const HeaderLines::size_type original_idx = idx_;
1238     do {
1239       ++idx_;
1240     } while (idx_ < header_lines_size && header_lines[idx_].skip == true);
1241     // The condition below exists so that ++(end() - 1) == end(), even
1242     // if there are only 'skip == true' elements between the end() iterator
1243     // and the end of the vector of HeaderLineDescriptions.
1244     if (idx_ == header_lines_size) {
1245       idx_ = original_idx + 1;
1246     }
1247   }
1248 
Lookup(HeaderLines::size_type index)1249   std::pair<absl::string_view, absl::string_view>& Lookup(
1250       HeaderLines::size_type index) const {
1251     QUICHE_DCHECK_LT(index, headers_->header_lines_.size());
1252     if (!value_.has_value()) {
1253       const HeaderLineDescription& line = headers_->header_lines_[index];
1254       const char* stream_begin = headers_->GetPtr(line.buffer_base_idx);
1255       value_ =
1256           std::make_pair(absl::string_view(stream_begin + line.first_char_idx,
1257                                            line.KeyLength()),
1258                          absl::string_view(stream_begin + line.value_begin_idx,
1259                                            line.ValuesLength()));
1260     }
1261     return value_.value();
1262   }
1263 
1264   const BalsaHeaders* headers_;
1265   HeaderLines::size_type idx_;
1266   mutable absl::optional<std::pair<absl::string_view, absl::string_view>>
1267       value_;
1268 };
1269 
1270 // A const iterator for all the header lines.
1271 class QUICHE_EXPORT BalsaHeaders::const_header_lines_iterator
1272     : public BalsaHeaders::iterator_base {
1273  public:
const_header_lines_iterator()1274   const_header_lines_iterator() : iterator_base() {}
1275 
1276   const_header_lines_iterator& operator++() {
1277     iterator_base::increment();
1278     return *this;
1279   }
1280 
1281  private:
1282   friend class BalsaHeaders;
1283 
const_header_lines_iterator(const BalsaHeaders * headers,HeaderLines::size_type index)1284   const_header_lines_iterator(const BalsaHeaders* headers,
1285                               HeaderLines::size_type index)
1286       : iterator_base(headers, index) {}
1287 };
1288 
1289 // A const iterator that stops only on header lines for a particular key.
1290 class QUICHE_EXPORT BalsaHeaders::const_header_lines_key_iterator
1291     : public BalsaHeaders::iterator_base {
1292  public:
1293   const_header_lines_key_iterator& operator++() {
1294     do {
1295       iterator_base::increment();
1296     } while (!AtEnd() && !absl::EqualsIgnoreCase(key_, (**this).first));
1297     return *this;
1298   }
1299 
1300   // Only forward-iteration makes sense, so no operator-- defined.
1301 
1302  private:
1303   friend class BalsaHeaders;
1304 
const_header_lines_key_iterator(const BalsaHeaders * headers,HeaderLines::size_type index,absl::string_view key)1305   const_header_lines_key_iterator(const BalsaHeaders* headers,
1306                                   HeaderLines::size_type index,
1307                                   absl::string_view key)
1308       : iterator_base(headers, index), key_(key) {}
1309 
1310   // Should only be used for creating an end iterator.
const_header_lines_key_iterator(const BalsaHeaders * headers,HeaderLines::size_type index)1311   const_header_lines_key_iterator(const BalsaHeaders* headers,
1312                                   HeaderLines::size_type index)
1313       : iterator_base(headers, index) {}
1314 
AtEnd()1315   bool AtEnd() const { return *this >= headers_->lines().end(); }
1316 
1317   absl::string_view key_;
1318 };
1319 
1320 inline BalsaHeaders::iterator_range<BalsaHeaders::const_header_lines_iterator>
lines()1321 BalsaHeaders::lines() const {
1322   return {HeaderLinesBeginHelper<const_header_lines_iterator>(),
1323           HeaderLinesEndHelper<const_header_lines_iterator>()};
1324 }
1325 
1326 inline BalsaHeaders::iterator_range<
1327     BalsaHeaders::const_header_lines_key_iterator>
lines(absl::string_view key)1328 BalsaHeaders::lines(absl::string_view key) const {
1329   return {GetIteratorForKey(key), header_lines_key_end()};
1330 }
1331 
1332 inline BalsaHeaders::const_header_lines_key_iterator
header_lines_key_end()1333 BalsaHeaders::header_lines_key_end() const {
1334   return HeaderLinesEndHelper<const_header_lines_key_iterator>();
1335 }
1336 
erase(const const_header_lines_iterator & it)1337 inline void BalsaHeaders::erase(const const_header_lines_iterator& it) {
1338   QUICHE_DCHECK_EQ(it.headers_, this);
1339   QUICHE_DCHECK_LT(it.idx_, header_lines_.size());
1340   header_lines_[it.idx_].skip = true;
1341 }
1342 
1343 template <typename Buffer>
WriteToBuffer(Buffer * buffer,CaseOption case_option,CoalesceOption coalesce_option)1344 void BalsaHeaders::WriteToBuffer(Buffer* buffer, CaseOption case_option,
1345                                  CoalesceOption coalesce_option) const {
1346   // write the first line.
1347   const absl::string_view firstline = first_line();
1348   if (!firstline.empty()) {
1349     buffer->WriteString(firstline);
1350   }
1351   buffer->WriteString("\r\n");
1352   if (coalesce_option != CoalesceOption::kCoalesce) {
1353     const HeaderLines::size_type end = header_lines_.size();
1354     for (HeaderLines::size_type i = 0; i < end; ++i) {
1355       const HeaderLineDescription& line = header_lines_[i];
1356       if (line.skip) {
1357         continue;
1358       }
1359       const char* line_ptr = GetPtr(line.buffer_base_idx);
1360       WriteHeaderLineToBuffer(
1361           buffer,
1362           absl::string_view(line_ptr + line.first_char_idx, line.KeyLength()),
1363           absl::string_view(line_ptr + line.value_begin_idx,
1364                             line.ValuesLength()),
1365           case_option);
1366     }
1367   } else {
1368     WriteToBufferCoalescingMultivaluedHeaders(
1369         buffer, multivalued_envoy_headers(), case_option);
1370   }
1371 }
1372 
GetValuesOfMultivaluedHeaders(const MultivaluedHeadersSet & multivalued_headers,MultivaluedHeadersValuesMap * multivalues)1373 inline void BalsaHeaders::GetValuesOfMultivaluedHeaders(
1374     const MultivaluedHeadersSet& multivalued_headers,
1375     MultivaluedHeadersValuesMap* multivalues) const {
1376   multivalues->reserve(header_lines_.capacity());
1377 
1378   // Find lines that need to be coalesced and store them in |multivalues|.
1379   for (const auto& line : header_lines_) {
1380     if (line.skip) {
1381       continue;
1382     }
1383     const char* line_ptr = GetPtr(line.buffer_base_idx);
1384     absl::string_view header_key =
1385         absl::string_view(line_ptr + line.first_char_idx, line.KeyLength());
1386     // If this is multivalued header, it may need to be coalesced.
1387     if (multivalued_headers.contains(header_key)) {
1388       absl::string_view header_value = absl::string_view(
1389           line_ptr + line.value_begin_idx, line.ValuesLength());
1390       // Add |header_value| to the vector of values for this |header_key|,
1391       // therefore preserving the order of values for the same key.
1392       (*multivalues)[header_key].push_back(header_value);
1393     }
1394   }
1395 }
1396 
1397 template <typename Buffer>
WriteToBufferCoalescingMultivaluedHeaders(Buffer * buffer,const MultivaluedHeadersSet & multivalued_headers,CaseOption case_option)1398 void BalsaHeaders::WriteToBufferCoalescingMultivaluedHeaders(
1399     Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers,
1400     CaseOption case_option) const {
1401   MultivaluedHeadersValuesMap multivalues;
1402   GetValuesOfMultivaluedHeaders(multivalued_headers, &multivalues);
1403 
1404   // Write out header lines while coalescing those that need to be coalesced.
1405   for (const auto& line : header_lines_) {
1406     if (line.skip) {
1407       continue;
1408     }
1409     const char* line_ptr = GetPtr(line.buffer_base_idx);
1410     absl::string_view header_key =
1411         absl::string_view(line_ptr + line.first_char_idx, line.KeyLength());
1412     auto header_multivalue = multivalues.find(header_key);
1413     // If current line doesn't need to be coalesced (as it is either not
1414     // multivalue, or has just a single value so it equals to current line),
1415     // then just write it out.
1416     if (header_multivalue == multivalues.end() ||
1417         header_multivalue->second.size() == 1) {
1418       WriteHeaderLineToBuffer(buffer, header_key,
1419                               absl::string_view(line_ptr + line.value_begin_idx,
1420                                                 line.ValuesLength()),
1421                               case_option);
1422     } else {
1423       // If this line needs to be coalesced, then write all its values and clear
1424       // them, so the subsequent same header keys will not be written.
1425       if (!header_multivalue->second.empty()) {
1426         WriteHeaderLineValuesToBuffer(buffer, header_key,
1427                                       header_multivalue->second, case_option);
1428         // Clear the multivalue list as it is already written out, so subsequent
1429         // same header keys will not be written.
1430         header_multivalue->second.clear();
1431       }
1432     }
1433   }
1434 }
1435 
1436 template <typename IteratorType>
HeaderLinesBeginHelper()1437 const IteratorType BalsaHeaders::HeaderLinesBeginHelper() const {
1438   if (header_lines_.empty()) {
1439     return IteratorType(this, 0);
1440   }
1441   const HeaderLines::size_type header_lines_size = header_lines_.size();
1442   for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
1443     if (header_lines_[i].skip == false) {
1444       return IteratorType(this, i);
1445     }
1446   }
1447   return IteratorType(this, 0);
1448 }
1449 
1450 template <typename IteratorType>
HeaderLinesEndHelper()1451 const IteratorType BalsaHeaders::HeaderLinesEndHelper() const {
1452   if (header_lines_.empty()) {
1453     return IteratorType(this, 0);
1454   }
1455   const HeaderLines::size_type header_lines_size = header_lines_.size();
1456   HeaderLines::size_type i = header_lines_size;
1457   do {
1458     --i;
1459     if (header_lines_[i].skip == false) {
1460       return IteratorType(this, i + 1);
1461     }
1462   } while (i != 0);
1463   return IteratorType(this, 0);
1464 }
1465 
1466 }  // namespace quiche
1467 
1468 #endif  // QUICHE_BALSA_BALSA_HEADERS_H_
1469