• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
17 
18 #include <cassert>
19 
20 #include "absl/base/config.h"
21 #include "absl/strings/internal/cord_internal.h"
22 #include "absl/strings/internal/cord_rep_btree.h"
23 #include "absl/strings/internal/cord_rep_btree_navigator.h"
24 #include "absl/strings/internal/cord_rep_flat.h"
25 
26 namespace absl {
27 ABSL_NAMESPACE_BEGIN
28 namespace cord_internal {
29 
30 // CordRepBtreeReader implements logic to iterate over cord btrees.
31 // References to the underlying data are returned as absl::string_view values.
32 // The most typical use case is a forward only iteration over tree data.
33 // The class also provides `Skip()`, `Seek()` and `Read()` methods similar to
34 // CordRepBtreeNavigator that allow more advanced navigation. The class provides
35 // a `consumed` property which contains the end offset of the chunk last
36 // returned to the user which is useful in cord iteration logic.
37 //
38 // Example: iterate over all data inside a cord btree:
39 //
40 //   CordRepBtreeReader reader;
41 //   for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) {
42 //     DoSomethingWithDataIn(sv);
43 //   }
44 //
45 // All navigation methods always return the next 'chunk' of data. The class
46 // assumes that all data is directly 'consumed' by the caller. For example:
47 // invoking `Skip()` will skip the desired number of bytes, and directly
48 // read and return the next chunk of data directly after the skipped bytes.
49 //
50 // Example: iterate over all data inside a btree skipping the first 100 bytes:
51 //
52 //   CordRepBtreeReader reader;
53 //   absl::string_view sv = reader.Init(tree);
54 //   if (sv.length() > 100) {
55 //     sv.RemovePrefix(100);
56 //   } else {
57 //     sv = reader.Skip(100 - sv.length());
58 //   }
59 //   while (!sv.empty()) {
60 //     DoSomethingWithDataIn(sv);
61 //     absl::string_view sv = reader.Next();
62 //   }
63 //
64 // It is important to notice that `consumed` represents the end position of the
65 // last data edge returned to the caller, not the cumulative data returned to
66 // the caller which can be less in cases of skipping or seeking over data.
67 //
68 // For example, consider a cord btree with five data edges: "abc", "def", "ghi",
69 // "jkl" and "mno":
70 //
71 //   absl::string_view sv;
72 //   CordRepBtreeReader reader;
73 //
74 //   sv = reader.Init(tree); // sv = "abc", reader.consumed() = 3
75 //   sv = reader.Skip(4);    // sv = "hi",  reader.consumed() = 9
76 //   sv = reader.Skip(2);    // sv = "l",   reader.consumed() = 12
77 //   sv = reader.Next();     // sv = "mno", reader.consumed() = 15
78 //
79 // In the above example, `reader.consumed()` reflects the data edges iterated
80 // over or skipped by the reader, not the amount of data 'consumed' by the
81 // caller.
82 class CordRepBtreeReader {
83  public:
84   using ReadResult = CordRepBtreeNavigator::ReadResult;
85   using Position = CordRepBtreeNavigator::Position;
86 
87   // Returns true if this instance is not empty.
88   explicit operator bool() const { return navigator_.btree() != nullptr; }
89 
90   // Returns the tree referenced by this instance or nullptr if empty.
btree()91   CordRepBtree* btree() const { return navigator_.btree(); }
92 
93   // Returns the current data edge inside the referenced btree.
94   // Requires that the current instance is not empty.
node()95   CordRep* node() const { return navigator_.Current(); }
96 
97   // Returns the length of the referenced tree.
98   // Requires that the current instance is not empty.
99   size_t length() const;
100 
101   // Returns the end offset of the last navigated to chunk, which represents the
102   // total bytes 'consumed' relative to the start of the tree. The returned
103   // value is never zero. For example, initializing a reader with a tree with a
104   // first data edge of 19 bytes will return `consumed() = 19`. See also the
105   // class comments on the meaning of `consumed`.
106   // Requires that the current instance is not empty.
107   size_t consumed() const;
108 
109   // Resets this instance to an empty value.
Reset()110   void Reset() { navigator_.Reset(); }
111 
112   // Initializes this instance with `tree`. `tree` must not be null.
113   // Returns a reference to the first data edge of the provided tree.
114   absl::string_view Init(CordRepBtree* tree);
115 
116   // Navigates to and returns the next data edge of the referenced tree.
117   // Returns an empty string_view if an attempt is made to read beyond the end
118   // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition.
119   // Requires that the current instance is not empty.
120   absl::string_view Next();
121 
122   // Skips the provided amount of bytes and returns a reference to the data
123   // directly following the skipped bytes.
124   absl::string_view Skip(size_t skip);
125 
126   // Reads `n` bytes into `tree`.
127   // If `chunk_size` is zero, starts reading at the next data edge. If
128   // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of
129   // the last returned data edge. Effectively, this means that the read starts
130   // at offset `consumed() - chunk_size`.
131   // Requires that `chunk_size` is less than or equal to the length of the
132   // last returned data edge. The purpose of `chunk_size` is to simplify code
133   // partially consuming a returned chunk and wanting to include the remaining
134   // bytes in the Read call. For example, the below code will read 1000 bytes of
135   // data into a cord tree if the first chunk starts with "big:":
136   //
137   //   CordRepBtreeReader reader;
138   //   absl::string_view sv = reader.Init(tree);
139   //   if (absl::StartsWith(sv, "big:")) {
140   //     CordRepBtree tree;
141   //     sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree);
142   //   }
143   //
144   // This method will return an empty string view if all remaining data was
145   // read. If `n` exceeded the amount of remaining data this function will
146   // return an empty string view and `tree` will be set to nullptr.
147   // In both cases, `consumed` will be set to `length`.
148   absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree);
149 
150   // Navigates to the chunk at offset `offset`.
151   // Returns a reference into the navigated to chunk, adjusted for the relative
152   // position of `offset` into that chunk. For example, calling `Seek(13)` on a
153   // cord tree containing 2 chunks of 10 and 20 bytes respectively will return
154   // a string view into the second chunk starting at offset 3 with a size of 17.
155   // Returns an empty string view if `offset` is equal to or greater than the
156   // length of the referenced tree.
157   absl::string_view Seek(size_t offset);
158 
159  private:
160   size_t consumed_;
161   CordRepBtreeNavigator navigator_;
162 };
163 
length()164 inline size_t CordRepBtreeReader::length() const {
165   assert(btree() != nullptr);
166   return btree()->length;
167 }
168 
consumed()169 inline size_t CordRepBtreeReader::consumed() const {
170   assert(btree() != nullptr);
171   return consumed_;
172 }
173 
Init(CordRepBtree * tree)174 inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
175   assert(tree != nullptr);
176   const CordRep* edge = navigator_.InitFirst(tree);
177   consumed_ = edge->length;
178   return CordRepBtree::EdgeData(edge);
179 }
180 
Next()181 inline absl::string_view CordRepBtreeReader::Next() {
182   assert(consumed() < length());
183   const CordRep* edge = navigator_.Next();
184   assert(edge != nullptr);
185   consumed_ += edge->length;
186   return CordRepBtree::EdgeData(edge);
187 }
188 
Skip(size_t skip)189 inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
190   // As we are always positioned on the last 'consumed' edge, we
191   // need to skip the current edge as well as `skip`.
192   const size_t edge_length = navigator_.Current()->length;
193   CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length);
194   if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
195     consumed_ = length();
196     return {};
197   }
198   // The combined length of all edges skipped before `pos.edge` is `skip -
199   // pos.offset`, all of which are 'consumed', as well as the current edge.
200   consumed_ += skip - pos.offset + pos.edge->length;
201   return CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
202 }
203 
Seek(size_t offset)204 inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
205   const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset);
206   if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
207     consumed_ = length();
208     return {};
209   }
210   absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
211   consumed_ = offset + chunk.length();
212   return chunk;
213 }
214 
215 }  // namespace cord_internal
216 ABSL_NAMESPACE_END
217 }  // namespace absl
218 
219 #endif  // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
220