1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
17
18 #include <cassert>
19 #include <cstdint>
20 #include <iosfwd>
21
22 #include "absl/base/config.h"
23 #include "absl/base/internal/raw_logging.h"
24 #include "absl/base/optimization.h"
25 #include "absl/strings/internal/cord_data_edge.h"
26 #include "absl/strings/internal/cord_internal.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/string_view.h"
29 #include "absl/types/span.h"
30
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34
35 // `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive
36 // validation in debug assertions, and code that calls `IsValid()`
37 // explicitly. By default, assertions should be relatively cheap and
38 // AssertValid() can easily lead to O(n^2) complexity as recursive / full tree
39 // validation is O(n).
40 void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation);
41 bool IsCordBtreeExhaustiveValidationEnabled();
42
43 class CordRepBtreeNavigator;
44
45 // CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
46 // Data is stored at the leaf level only, non leaf nodes contain down pointers
47 // only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
48 // or EXTERNAL nodes. The implementation allows for data to be added to either
49 // end of the tree only, it does not provide any 'insert' logic. This has the
50 // benefit that we can expect good fill ratios: all nodes except the outer
51 // 'legs' will have 100% fill ratios for trees built using Append/Prepend
52 // methods. Merged trees will typically have a fill ratio well above 50% as in a
53 // similar fashion, one side of the merged tree will typically have a 100% fill
54 // ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
55 // better, and the tree never needs balancing.
56 //
57 // All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
58 // input unless explicitly stated otherwise. All functions returning a CordRep*
59 // or CordRepBtree* instance transfer a reference back to the caller.
60 // Simplified, callers both 'donate' and 'consume' a reference count on each
61 // call, simplifying the API. An example of building a tree:
62 //
63 // CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
64 // tree = CordRepBtree::Append(tree, MakeFlat("world"));
65 //
66 // In the above example, all inputs are consumed, making each call affecting
67 // `tree` reference count neutral. The returned `tree` value can be different
68 // from the input if the input is shared with other threads, or if the tree
69 // grows in height, but callers typically never have to concern themselves with
70 // that and trust that all methods DTRT at all times.
71 class CordRepBtree : public CordRep {
72 public:
73 // EdgeType identifies `front` and `back` enum values.
74 // Various implementations in CordRepBtree such as `Add` and `Edge` are
75 // generic and templated on operating on either of the boundary edges.
76 // For more information on the possible edges contained in a CordRepBtree
77 // instance see the documentation for `edges_`.
78 enum class EdgeType { kFront, kBack };
79
80 // Convenience constants into `EdgeType`
81 static constexpr EdgeType kFront = EdgeType::kFront;
82 static constexpr EdgeType kBack = EdgeType::kBack;
83
84 // Maximum number of edges: based on experiments and performance data, we can
85 // pick suitable values resulting in optimum cacheline aligned values. The
86 // preferred values are based on 64-bit systems where we aim to align this
87 // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
88 // TODO(b/192061034): experiment with alternative sizes.
89 static constexpr size_t kMaxCapacity = 6;
90
91 // Reasonable maximum height of the btree. We can expect a fill ratio of at
92 // least 50%: trees are always expanded at the front or back. Concatenating
93 // trees will then typically fold at the top most node, where the lower nodes
94 // are at least at capacity on one side of joined inputs. At a lower fill
95 // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
96 // We will fail / abort if an application ever exceeds this height, which
97 // should be extremely rare (near impossible) and be an indication of an
98 // application error: we do not assume it reasonable for any application to
99 // operate correctly with such monster trees.
100 // Another compelling reason for the number `12` is that any contextual stack
101 // required for navigation or insertion requires 12 words and 12 bytes, which
102 // fits inside 2 cache lines with some room to spare, and is reasonable as a
103 // local stack variable compared to Cord's current near 400 bytes stack use.
104 // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
105 // values start with a value of 0 for leaf nodes.
106 static constexpr size_t kMaxDepth = 12;
107 // See comments on height() for why this is an int and not a size_t.
108 static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1);
109
110 // `Action` defines the action for unwinding changes done at the btree's leaf
111 // level that need to be propagated up to the parent node(s). Each operation
112 // on a node has an effect / action defined as follows:
113 // - kSelf
114 // The operation (add / update, etc) was performed directly on the node as
115 // the node is private to the current thread (i.e.: not shared directly or
116 // indirectly through a refcount > 1). Changes can be propagated directly to
117 // all parent nodes as all parent nodes are also then private to the current
118 // thread.
119 // - kCopied
120 // The operation (add / update, etc) was performed on a copy of the original
121 // node, as the node is (potentially) directly or indirectly shared with
122 // other threads. Changes need to be propagated into the parent nodes where
123 // the old down pointer must be unreffed and replaced with this new copy.
124 // Such changes to parent nodes may themselves require a copy if the parent
125 // node is also shared. A kCopied action can propagate all the way to the
126 // top node where we then must unref the `tree` input provided by the
127 // caller, and return the new copy.
128 // - kPopped
129 // The operation (typically add) could not be satisfied due to insufficient
130 // capacity in the targeted node, and a new 'leg' was created that needs to
131 // be added into the parent node. For example, adding a FLAT inside a leaf
132 // node that is at capacity will create a new leaf node containing that
133 // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
134 // cascade up the tree if parent nodes are also at capacity. A 'Popped'
135 // action propagating all the way to the top of the tree will result in
136 // the tree becoming one level higher than the current tree through a final
137 // `CordRepBtree::New(tree, popped)` call, resulting in a new top node
138 // referencing the old tree and the new (fully popped upwards) 'leg'.
139 enum Action { kSelf, kCopied, kPopped };
140
141 // Result of an operation on a node. See the `Action` enum for details.
142 struct OpResult {
143 CordRepBtree* tree;
144 Action action;
145 };
146
147 // Return value of the CopyPrefix and CopySuffix methods which can
148 // return a node or data edge at any height inside the tree.
149 // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
150 // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
151 struct CopyResult {
152 CordRep* edge;
153 int height;
154 };
155
156 // Logical position inside a node:
157 // - index: index of the edge.
158 // - n: size or offset value depending on context.
159 struct Position {
160 size_t index;
161 size_t n;
162 };
163
164 // Creates a btree from the given input. Adopts a ref of `rep`.
165 // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
166 // function immediately returns `rep->btree()`. If the input is a valid data
167 // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
168 // as the sole data edge. Else, the input is assumed to be a (legacy) concat
169 // tree, and the input is consumed and transformed into a btree().
170 static CordRepBtree* Create(CordRep* rep);
171
172 // Destroys the provided tree. Should only be called by cord internal API's,
173 // typically after a ref_count.Decrement() on the last reference count.
174 static void Destroy(CordRepBtree* tree);
175
176 // Destruction
Delete(CordRepBtree * tree)177 static void Delete(CordRepBtree* tree) { delete tree; }
178
179 // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
180 using CordRep::Unref;
181
182 // Unrefs all edges in `edges` which are assumed to be 'likely one'.
183 static void Unref(absl::Span<CordRep* const> edges);
184
185 // Appends / Prepends an existing CordRep instance to this tree.
186 // The below methods accept three types of input:
187 // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
188 // `rep` is appended or prepended to this tree 'as is'.
189 // 2) `rep` is a BTREE.
190 // `rep` is merged into `tree` respecting the Append/Prepend order.
191 // 3) `rep` is some other (legacy) type.
192 // `rep` is converted in place and added to `tree`
193 // Requires `tree` and `rep` to be not null.
194 static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
195 static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
196
197 // Append/Prepend the data in `data` to this tree.
198 // The `extra` parameter defines how much extra capacity should be allocated
199 // for any additional FLAT being allocated. This is an optimization hint from
200 // the caller. For example, a caller may need to add 2 string_views of data
201 // "abc" and "defghi" which are not consecutive. The caller can in this case
202 // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
203 // where possible with at least 6 bytes of extra capacity beyond `length`.
204 // This helps avoiding data getting fragmented over multiple flats.
205 // There is no limit on the size of `data`. If `data` can not be stored inside
206 // a single flat, then the function will iteratively add flats until all data
207 // has been consumed and appended or prepended to the tree.
208 static CordRepBtree* Append(CordRepBtree* tree, string_view data,
209 size_t extra = 0);
210 static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
211 size_t extra = 0);
212
213 // Returns a new tree, containing `n` bytes of data from this instance
214 // starting at offset `offset`. Where possible, the returned tree shares
215 // (re-uses) data edges and nodes with this instance to minimize the
216 // combined memory footprint of both trees.
217 // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
218 CordRep* SubTree(size_t offset, size_t n);
219
220 // Removes `n` trailing bytes from `tree`, and returns the resulting tree
221 // or data edge. Returns `tree` if n is zero, and nullptr if n == length.
222 // This function is logically identical to:
223 // result = tree->SubTree(0, tree->length - n);
224 // Unref(tree);
225 // return result;
226 // However, the actual implementation will as much as possible perform 'in
227 // place' modifications on the tree on all nodes and edges that are mutable.
228 // For example, in a fully privately owned tree with the last edge being a
229 // flat of length 12, RemoveSuffix(1) will simply set the length of that data
230 // edge to 11, and reduce the length of all nodes on the edge path by 1.
231 static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
232
233 // Returns the character at the given offset.
234 char GetCharacter(size_t offset) const;
235
236 // Returns true if this node holds a single data edge, and if so, sets
237 // `fragment` to reference the contained data. `fragment` is an optional
238 // output parameter and allowed to be null.
239 bool IsFlat(absl::string_view* fragment) const;
240
241 // Returns true if the data of `n` bytes starting at offset `offset`
242 // is contained in a single data edge, and if so, sets fragment to reference
243 // the contained data. `fragment` is an optional output parameter and allowed
244 // to be null.
245 bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
246
247 // Returns a span (mutable range of bytes) of up to `size` bytes into the
248 // last FLAT data edge inside this tree under the following conditions:
249 // - none of the nodes down into the FLAT node are shared.
250 // - the last data edge in this tree is a non-shared FLAT.
251 // - the referenced FLAT has additional capacity available.
252 // If all these conditions are met, a non-empty span is returned, and the
253 // length of the flat node and involved tree nodes have been increased by
254 // `span.length()`. The caller is responsible for immediately assigning values
255 // to all uninitialized data reference by the returned span.
256 // Requires `this->refcount.IsOne()`: this function forces the caller to do
257 // this fast path check on the top level node, as this is the most commonly
258 // shared node of a cord tree.
259 Span<char> GetAppendBuffer(size_t size);
260
261 // Extracts the right-most data edge from this tree iff:
262 // - the tree and all internal edges to the right-most node are not shared.
263 // - the right-most node is a FLAT node and not shared.
264 // - the right-most node has at least the desired extra capacity.
265 //
266 // Returns {tree, nullptr} if any of the above conditions are not met.
267 // This method effectively removes data from the tree. The intent of this
268 // method is to allow applications appending small string data to use
269 // pre-existing capacity, and add the modified rep back to the tree.
270 //
271 // Simplified such code would look similar to this:
272 // void MyTreeBuilder::Append(string_view data) {
273 // ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1);
274 // if (CordRep* rep = result.extracted) {
275 // size_t available = rep->Capacity() - rep->length;
276 // size_t n = std::min(data.size(), n);
277 // memcpy(rep->Data(), data.data(), n);
278 // rep->length += n;
279 // data.remove_prefix(n);
280 // if (!result.tree->IsBtree()) {
281 // tree_ = CordRepBtree::Create(result.tree);
282 // }
283 // tree_ = CordRepBtree::Append(tree_, rep);
284 // }
285 // ...
286 // // Remaining edge in `result.tree`.
287 // }
288 static ExtractResult ExtractAppendBuffer(CordRepBtree* tree,
289 size_t extra_capacity = 1);
290
291 // Returns the `height` of the tree. The height of a tree is limited to
292 // kMaxHeight. `height` is implemented as an `int` as in some places we
293 // use negative (-1) values for 'data edges'.
height()294 int height() const { return static_cast<int>(storage[0]); }
295
296 // Properties: begin, back, end, front/back boundary indexes.
begin()297 size_t begin() const { return static_cast<size_t>(storage[1]); }
back()298 size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
end()299 size_t end() const { return static_cast<size_t>(storage[2]); }
index(EdgeType edge)300 size_t index(EdgeType edge) const {
301 return edge == kFront ? begin() : back();
302 }
303
304 // Properties: size and capacity.
305 // `capacity` contains the current capacity of this instance, where
306 // `kMaxCapacity` contains the maximum capacity of a btree node.
307 // For now, `capacity` and `kMaxCapacity` return the same value, but this may
308 // change in the future if we see benefit in dynamically sizing 'small' nodes
309 // to 'large' nodes for large data trees.
size()310 size_t size() const { return end() - begin(); }
capacity()311 size_t capacity() const { return kMaxCapacity; }
312
313 // Edge access
314 inline CordRep* Edge(size_t index) const;
315 inline CordRep* Edge(EdgeType edge_type) const;
316 inline absl::Span<CordRep* const> Edges() const;
317 inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
318
319 // Returns reference to the data edge at `index`.
320 // Requires this instance to be a leaf node, and `index` to be valid index.
321 inline absl::string_view Data(size_t index) const;
322
323 // Diagnostics: returns true if `tree` is valid and internally consistent.
324 // If `shallow` is false, then the provided top level node and all child nodes
325 // below it are recursively checked. If `shallow` is true, only the provided
326 // node in `tree` and the cumulative length, type and height of the direct
327 // child nodes of `tree` are checked. The value of `shallow` is ignored if the
328 // internal `cord_btree_exhaustive_validation` diagnostics variable is true,
329 // in which case the performed validations works as if `shallow` were false.
330 // This function is intended for debugging and testing purposes only.
331 static bool IsValid(const CordRepBtree* tree, bool shallow = false);
332
333 // Diagnostics: asserts that the provided tree is valid.
334 // `AssertValid()` performs a shallow validation by default. `shallow` can be
335 // set to false in which case an exhaustive validation is performed. This
336 // function is implemented in terms of calling `IsValid()` and asserting the
337 // return value to be true. See `IsValid()` for more information.
338 // This function is intended for debugging and testing purposes only.
339 static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
340 static const CordRepBtree* AssertValid(const CordRepBtree* tree,
341 bool shallow = true);
342
343 // Diagnostics: dump the contents of this tree to `stream`.
344 // This function is intended for debugging and testing purposes only.
345 static void Dump(const CordRep* rep, std::ostream& stream);
346 static void Dump(const CordRep* rep, absl::string_view label,
347 std::ostream& stream);
348 static void Dump(const CordRep* rep, absl::string_view label,
349 bool include_contents, std::ostream& stream);
350
351 // Adds the edge `edge` to this node if possible. `owned` indicates if the
352 // current node is potentially shared or not with other threads. Returns:
353 // - {kSelf, <this>}
354 // The edge was directly added to this node.
355 // - {kCopied, <node>}
356 // The edge was added to a copy of this node.
357 // - {kPopped, New(edge, height())}
358 // A new leg with the edge was created as this node has no extra capacity.
359 template <EdgeType edge_type>
360 inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
361
362 // Replaces the front or back edge with the provided new edge. Returns:
363 // - {kSelf, <this>}
364 // The edge was directly set in this node. The old edge is unreffed.
365 // - {kCopied, <node>}
366 // A copy of this node was created with the new edge value.
367 // In both cases, the function adopts a reference on `edge`.
368 template <EdgeType edge_type>
369 OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
370
371 // Creates a new empty node at the specified height.
372 static CordRepBtree* New(int height = 0);
373
374 // Creates a new node containing `rep`, with the height being computed
375 // automatically based on the type of `rep`.
376 static CordRepBtree* New(CordRep* rep);
377
378 // Creates a new node containing both `front` and `back` at height
379 // `front.height() + 1`. Requires `back.height() == front.height()`.
380 static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
381
382 // Creates a fully balanced tree from the provided tree by rebuilding a new
383 // tree from all data edges in the input. This function is automatically
384 // invoked internally when the tree exceeds the maximum height.
385 static CordRepBtree* Rebuild(CordRepBtree* tree);
386
387 private:
388 CordRepBtree() = default;
389 ~CordRepBtree() = default;
390
391 // Initializes the main properties `tag`, `begin`, `end`, `height`.
392 inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
393
394 // Direct property access begin / end
set_begin(size_t begin)395 void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
set_end(size_t end)396 void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
397
398 // Decreases the value of `begin` by `n`, and returns the new value. Notice
399 // how this returns the new value unlike atomic::fetch_add which returns the
400 // old value. This is because this is used to prepend edges at 'begin - 1'.
sub_fetch_begin(size_t n)401 size_t sub_fetch_begin(size_t n) {
402 storage[1] -= static_cast<uint8_t>(n);
403 return storage[1];
404 }
405
406 // Increases the value of `end` by `n`, and returns the previous value. This
407 // function is typically used to append edges at 'end'.
fetch_add_end(size_t n)408 size_t fetch_add_end(size_t n) {
409 const uint8_t current = storage[2];
410 storage[2] = static_cast<uint8_t>(current + n);
411 return current;
412 }
413
414 // Returns the index of the last edge starting on, or before `offset`, with
415 // `n` containing the relative offset of `offset` inside that edge.
416 // Requires `offset` < length.
417 Position IndexOf(size_t offset) const;
418
419 // Returns the index of the last edge starting before `offset`, with `n`
420 // containing the relative offset of `offset` inside that edge.
421 // This function is useful to find the edges for some span of bytes ending at
422 // `offset` (i.e., `n` bytes). For example:
423 //
424 // Position pos = IndexBefore(n)
425 // edges = Edges(begin(), pos.index) // All full edges (may be empty)
426 // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
427 //
428 // Requires 0 < `offset` <= length.
429 Position IndexBefore(size_t offset) const;
430
431 // Returns the index of the edge ending at (or on) length `length`, and the
432 // number of bytes inside that edge up to `length`. For example, if we have a
433 // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
434 // will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
435 Position IndexOfLength(size_t n) const;
436
437 // Identical to the above function except starting from the position `front`.
438 // This function is equivalent to `IndexBefore(front.n + offset)`, with
439 // the difference that this function is optimized to start at `front.index`.
440 Position IndexBefore(Position front, size_t offset) const;
441
442 // Returns the index of the edge directly beyond the edge containing offset
443 // `offset`, with `n` containing the distance of that edge from `offset`.
444 // This function is useful for iteratively finding suffix nodes and remaining
445 // partial bytes in left-most suffix nodes as for example in CopySuffix.
446 // Requires `offset` < length.
447 Position IndexBeyond(size_t offset) const;
448
449 // Creates a new leaf node containing as much data as possible from `data`.
450 // The data is added either forwards or reversed depending on `edge_type`.
451 // Callers must check the length of the returned node to determine if all data
452 // was copied or not.
453 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
454 template <EdgeType edge_type>
455 static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
456
457 // Creates a raw copy of this Btree node with the specified length, copying
458 // all properties, but without adding any references to existing edges.
459 CordRepBtree* CopyRaw(size_t new_length) const;
460
461 // Creates a full copy of this Btree node, adding a reference on all edges.
462 CordRepBtree* Copy() const;
463
464 // Creates a partial copy of this Btree node, copying all edges up to `end`,
465 // adding a reference on each copied edge, and sets the length of the newly
466 // created copy to `new_length`.
467 CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
468
469 // Returns a tree containing the edges [tree->begin(), end) and length
470 // of `new_length`. This method consumes a reference on the provided
471 // tree, and logically performs the following operation:
472 // result = tree->CopyBeginTo(end, new_length);
473 // CordRep::Unref(tree);
474 // return result;
475 static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
476 size_t new_length);
477
478 // Creates a partial copy of this Btree node, copying all edges starting at
479 // `begin`, adding a reference on each copied edge, and sets the length of
480 // the newly created copy to `new_length`.
481 CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
482
483 // Extracts and returns the front edge from the provided tree.
484 // This method consumes a reference on the provided tree, and logically
485 // performs the following operation:
486 // edge = CordRep::Ref(tree->Edge(kFront));
487 // CordRep::Unref(tree);
488 // return edge;
489 static CordRep* ExtractFront(CordRepBtree* tree);
490
491 // Returns a tree containing the result of appending `right` to `left`.
492 static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
493
494 // Fallback functions for `Create()`, `Append()` and `Prepend()` which
495 // deal with legacy / non conforming input, i.e.: CONCAT trees.
496 static CordRepBtree* CreateSlow(CordRep* rep);
497 static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
498 static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
499
500 // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
501 // function will consume a reference on `tree`. `stack` is a null terminated
502 // array containing the new tree's state, with the current leaf node at
503 // stack[0], and parent nodes above that, or null for 'top of tree'.
504 static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
505
506 // Aligns existing edges to start at index 0, to allow for a new edge to be
507 // added to the back of the current edges.
508 inline void AlignBegin();
509
510 // Aligns existing edges to end at `capacity`, to allow for a new edge to be
511 // added in front of the current edges.
512 inline void AlignEnd();
513
514 // Adds the provided edge to this node.
515 // Requires this node to have capacity for the edge. Realigns / moves
516 // existing edges as needed to prepend or append the new edge.
517 template <EdgeType edge_type>
518 inline void Add(CordRep* rep);
519
520 // Adds the provided edges to this node.
521 // Requires this node to have capacity for the edges. Realigns / moves
522 // existing edges as needed to prepend or append the new edges.
523 template <EdgeType edge_type>
524 inline void Add(absl::Span<CordRep* const>);
525
526 // Adds data from `data` to this node until either all data has been consumed,
527 // or there is no more capacity for additional flat nodes inside this node.
528 // Requires the current node to be a leaf node, data to be non empty, and the
529 // current node to have capacity for at least one more data edge.
530 // Returns any remaining data from `data` that was not added, which is
531 // depending on the edge type (front / back) either the remaining prefix of
532 // suffix of the input.
533 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
534 template <EdgeType edge_type>
535 absl::string_view AddData(absl::string_view data, size_t extra);
536
537 // Replace the front or back edge with the provided value.
538 // Adopts a reference on `edge` and unrefs the old edge.
539 template <EdgeType edge_type>
540 inline void SetEdge(CordRep* edge);
541
542 // Returns a partial copy of the current tree containing the first `n` bytes
543 // of data. `CopyResult` contains both the resulting edge and its height. The
544 // resulting tree may be less high than the current tree, or even be a single
545 // matching data edge if `allow_folding` is set to true.
546 // For example, if `n == 1`, then the result will be the single data edge, and
547 // height will be set to -1 (one below the owning leaf node). If n == 0, this
548 // function returns null. Requires `n <= length`
549 CopyResult CopyPrefix(size_t n, bool allow_folding = true);
550
551 // Returns a partial copy of the current tree containing all data starting
552 // after `offset`. `CopyResult` contains both the resulting edge and its
553 // height. The resulting tree may be less high than the current tree, or even
554 // be a single matching data edge. For example, if `n == length - 1`, then the
555 // result will be a single data edge, and height will be set to -1 (one below
556 // the owning leaf node).
557 // Requires `offset < length`
558 CopyResult CopySuffix(size_t offset);
559
560 // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
561 // depending on the value of `owned`.
562 inline OpResult ToOpResult(bool owned);
563
564 // Adds `rep` to the specified tree, returning the modified tree.
565 template <EdgeType edge_type>
566 static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
567
568 // Adds `data` to the specified tree, returning the modified tree.
569 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
570 template <EdgeType edge_type>
571 static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
572 size_t extra = 0);
573
574 // Merges `src` into `dst` with `src` being added either before (kFront) or
575 // after (kBack) `dst`. Requires the height of `dst` to be greater than or
576 // equal to the height of `src`.
577 template <EdgeType edge_type>
578 static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
579
580 // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
581 // implements an inlined version for trees of limited height (3 levels),
582 // GetAppendBufferSlow implements the logic for large trees.
583 Span<char> GetAppendBufferSlow(size_t size);
584
585 // `edges_` contains all edges starting from this instance.
586 // These are explicitly `child` edges only, a cord btree (or any cord tree in
587 // that respect) does not store `parent` pointers anywhere: multiple trees /
588 // parents can reference the same shared child edge. The type of these edges
589 // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
590 // edges` (external or flat nodes, or sub-strings thereof). All other nodes
591 // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
592 CordRep* edges_[kMaxCapacity];
593
594 friend class CordRepBtreeTestPeer;
595 friend class CordRepBtreeNavigator;
596 };
597
btree()598 inline CordRepBtree* CordRep::btree() {
599 assert(IsBtree());
600 return static_cast<CordRepBtree*>(this);
601 }
602
btree()603 inline const CordRepBtree* CordRep::btree() const {
604 assert(IsBtree());
605 return static_cast<const CordRepBtree*>(this);
606 }
607
InitInstance(int height,size_t begin,size_t end)608 inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
609 tag = BTREE;
610 storage[0] = static_cast<uint8_t>(height);
611 storage[1] = static_cast<uint8_t>(begin);
612 storage[2] = static_cast<uint8_t>(end);
613 }
614
Edge(size_t index)615 inline CordRep* CordRepBtree::Edge(size_t index) const {
616 assert(index >= begin());
617 assert(index < end());
618 return edges_[index];
619 }
620
Edge(EdgeType edge_type)621 inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
622 return edges_[edge_type == kFront ? begin() : back()];
623 }
624
Edges()625 inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
626 return {edges_ + begin(), size()};
627 }
628
Edges(size_t begin,size_t end)629 inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
630 size_t end) const {
631 assert(begin <= end);
632 assert(begin >= this->begin());
633 assert(end <= this->end());
634 return {edges_ + begin, static_cast<size_t>(end - begin)};
635 }
636
Data(size_t index)637 inline absl::string_view CordRepBtree::Data(size_t index) const {
638 assert(height() == 0);
639 return EdgeData(Edge(index));
640 }
641
New(int height)642 inline CordRepBtree* CordRepBtree::New(int height) {
643 CordRepBtree* tree = new CordRepBtree;
644 tree->length = 0;
645 tree->InitInstance(height);
646 return tree;
647 }
648
New(CordRep * rep)649 inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
650 CordRepBtree* tree = new CordRepBtree;
651 int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
652 tree->length = rep->length;
653 tree->InitInstance(height, /*begin=*/0, /*end=*/1);
654 tree->edges_[0] = rep;
655 return tree;
656 }
657
New(CordRepBtree * front,CordRepBtree * back)658 inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
659 CordRepBtree* back) {
660 assert(front->height() == back->height());
661 CordRepBtree* tree = new CordRepBtree;
662 tree->length = front->length + back->length;
663 tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
664 tree->edges_[0] = front;
665 tree->edges_[1] = back;
666 return tree;
667 }
668
Unref(absl::Span<CordRep * const> edges)669 inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
670 for (CordRep* edge : edges) {
671 if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
672 CordRep::Destroy(edge);
673 }
674 }
675 }
676
CopyRaw(size_t new_length)677 inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
678 CordRepBtree* tree = new CordRepBtree;
679
680 // `length` and `refcount` are the first members of `CordRepBtree`.
681 // We initialize `length` using the given length, have `refcount` be set to
682 // ref = 1 through its default constructor, and copy all data beyond
683 // 'refcount' which starts with `tag` using a single memcpy: all contents
684 // except `refcount` is trivially copyable, and the compiler does not
685 // efficiently coalesce member-wise copy of these members.
686 // See https://gcc.godbolt.org/z/qY8zsca6z
687 // LINT.IfChange(copy_raw)
688 tree->length = new_length;
689 uint8_t* dst = &tree->tag;
690 const uint8_t* src = &tag;
691 const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
692 memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
693 return tree;
694 // LINT.ThenChange()
695 }
696
Copy()697 inline CordRepBtree* CordRepBtree::Copy() const {
698 CordRepBtree* tree = CopyRaw(length);
699 for (CordRep* rep : Edges()) CordRep::Ref(rep);
700 return tree;
701 }
702
CopyToEndFrom(size_t begin,size_t new_length)703 inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
704 size_t new_length) const {
705 assert(begin >= this->begin());
706 assert(begin <= this->end());
707 CordRepBtree* tree = CopyRaw(new_length);
708 tree->set_begin(begin);
709 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
710 return tree;
711 }
712
CopyBeginTo(size_t end,size_t new_length)713 inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
714 size_t new_length) const {
715 assert(end <= capacity());
716 assert(end >= this->begin());
717 CordRepBtree* tree = CopyRaw(new_length);
718 tree->set_end(end);
719 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
720 return tree;
721 }
722
AlignBegin()723 inline void CordRepBtree::AlignBegin() {
724 // The below code itself does not need to be fast as typically we have
725 // mono-directional append/prepend calls, and `begin` / `end` are typically
726 // adjusted no more than once. But we want to avoid potential register clobber
727 // effects, making the compiler emit register save/store/spills, and minimize
728 // the size of code.
729 const size_t delta = begin();
730 if (ABSL_PREDICT_FALSE(delta != 0)) {
731 const size_t new_end = end() - delta;
732 set_begin(0);
733 set_end(new_end);
734 // TODO(mvels): we can write this using 2 loads / 2 stores depending on
735 // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
736 // size, and then do overlapping load/store of up to 4 pointers (inlined as
737 // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
738 // compact and b) not clobbering any registers.
739 ABSL_ASSUME(new_end <= kMaxCapacity);
740 #ifdef __clang__
741 #pragma unroll 1
742 #endif
743 for (size_t i = 0; i < new_end; ++i) {
744 edges_[i] = edges_[i + delta];
745 }
746 }
747 }
748
AlignEnd()749 inline void CordRepBtree::AlignEnd() {
750 // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
751 const size_t delta = capacity() - end();
752 if (delta != 0) {
753 const size_t new_begin = begin() + delta;
754 const size_t new_end = end() + delta;
755 set_begin(new_begin);
756 set_end(new_end);
757 ABSL_ASSUME(new_end <= kMaxCapacity);
758 #ifdef __clang__
759 #pragma unroll 1
760 #endif
761 for (size_t i = new_end - 1; i >= new_begin; --i) {
762 edges_[i] = edges_[i - delta];
763 }
764 }
765 }
766
767 template <>
768 inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
769 AlignBegin();
770 edges_[fetch_add_end(1)] = rep;
771 }
772
773 template <>
774 inline void CordRepBtree::Add<CordRepBtree::kBack>(
775 absl::Span<CordRep* const> edges) {
776 AlignBegin();
777 size_t new_end = end();
778 for (CordRep* edge : edges) edges_[new_end++] = edge;
779 set_end(new_end);
780 }
781
782 template <>
783 inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
784 AlignEnd();
785 edges_[sub_fetch_begin(1)] = rep;
786 }
787
788 template <>
789 inline void CordRepBtree::Add<CordRepBtree::kFront>(
790 absl::Span<CordRep* const> edges) {
791 AlignEnd();
792 size_t new_begin = begin() - edges.size();
793 set_begin(new_begin);
794 for (CordRep* edge : edges) edges_[new_begin++] = edge;
795 }
796
797 template <CordRepBtree::EdgeType edge_type>
SetEdge(CordRep * edge)798 inline void CordRepBtree::SetEdge(CordRep* edge) {
799 const int idx = edge_type == kFront ? begin() : back();
800 CordRep::Unref(edges_[idx]);
801 edges_[idx] = edge;
802 }
803
ToOpResult(bool owned)804 inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
805 return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
806 }
807
IndexOf(size_t offset)808 inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
809 assert(offset < length);
810 size_t index = begin();
811 while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
812 return {index, offset};
813 }
814
IndexBefore(size_t offset)815 inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
816 assert(offset > 0);
817 assert(offset <= length);
818 size_t index = begin();
819 while (offset > edges_[index]->length) offset -= edges_[index++]->length;
820 return {index, offset};
821 }
822
IndexBefore(Position front,size_t offset)823 inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
824 size_t offset) const {
825 size_t index = front.index;
826 offset = offset + front.n;
827 while (offset > edges_[index]->length) offset -= edges_[index++]->length;
828 return {index, offset};
829 }
830
IndexOfLength(size_t n)831 inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
832 assert(n <= length);
833 size_t index = back();
834 size_t strip = length - n;
835 while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
836 return {index, edges_[index]->length - strip};
837 }
838
IndexBeyond(const size_t offset)839 inline CordRepBtree::Position CordRepBtree::IndexBeyond(
840 const size_t offset) const {
841 // We need to find the edge which `starting offset` is beyond (>=)`offset`.
842 // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
843 // track the offset of the `current edge` in `off`, which we increase as we
844 // iterate over the edges until we find the matching edge.
845 size_t off = 0;
846 size_t index = begin();
847 while (offset > off) off += edges_[index++]->length;
848 return {index, off - offset};
849 }
850
Create(CordRep * rep)851 inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
852 if (IsDataEdge(rep)) return New(rep);
853 return CreateSlow(rep);
854 }
855
GetAppendBuffer(size_t size)856 inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
857 assert(refcount.IsOne());
858 CordRepBtree* tree = this;
859 const int height = this->height();
860 CordRepBtree* n1 = tree;
861 CordRepBtree* n2 = tree;
862 CordRepBtree* n3 = tree;
863 switch (height) {
864 case 3:
865 tree = tree->Edge(kBack)->btree();
866 if (!tree->refcount.IsOne()) return {};
867 n2 = tree;
868 ABSL_FALLTHROUGH_INTENDED;
869 case 2:
870 tree = tree->Edge(kBack)->btree();
871 if (!tree->refcount.IsOne()) return {};
872 n1 = tree;
873 ABSL_FALLTHROUGH_INTENDED;
874 case 1:
875 tree = tree->Edge(kBack)->btree();
876 if (!tree->refcount.IsOne()) return {};
877 ABSL_FALLTHROUGH_INTENDED;
878 case 0:
879 CordRep* edge = tree->Edge(kBack);
880 if (!edge->refcount.IsOne()) return {};
881 if (edge->tag < FLAT) return {};
882 size_t avail = edge->flat()->Capacity() - edge->length;
883 if (avail == 0) return {};
884 size_t delta = (std::min)(size, avail);
885 Span<char> span = {edge->flat()->Data() + edge->length, delta};
886 edge->length += delta;
887 switch (height) {
888 case 3:
889 n3->length += delta;
890 ABSL_FALLTHROUGH_INTENDED;
891 case 2:
892 n2->length += delta;
893 ABSL_FALLTHROUGH_INTENDED;
894 case 1:
895 n1->length += delta;
896 ABSL_FALLTHROUGH_INTENDED;
897 case 0:
898 tree->length += delta;
899 return span;
900 }
901 break;
902 }
903 return GetAppendBufferSlow(size);
904 }
905
906 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
907 CordRepBtree* tree, CordRep* rep);
908
909 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
910 CordRepBtree* tree, CordRep* rep);
911
Append(CordRepBtree * tree,CordRep * rep)912 inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
913 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
914 return CordRepBtree::AddCordRep<kBack>(tree, rep);
915 }
916 return AppendSlow(tree, rep);
917 }
918
Prepend(CordRepBtree * tree,CordRep * rep)919 inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
920 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
921 return CordRepBtree::AddCordRep<kFront>(tree, rep);
922 }
923 return PrependSlow(tree, rep);
924 }
925
926 #ifdef NDEBUG
927
AssertValid(CordRepBtree * tree,bool)928 inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
929 bool /* shallow */) {
930 return tree;
931 }
932
AssertValid(const CordRepBtree * tree,bool)933 inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
934 bool /* shallow */) {
935 return tree;
936 }
937
938 #endif
939
940 } // namespace cord_internal
941 ABSL_NAMESPACE_END
942 } // namespace absl
943
944 #endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
945