1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
17
18 #include <cassert>
19 #include <cstdint>
20 #include <iosfwd>
21
22 #include "absl/base/config.h"
23 #include "absl/base/internal/raw_logging.h"
24 #include "absl/base/optimization.h"
25 #include "absl/strings/internal/cord_internal.h"
26 #include "absl/strings/internal/cord_rep_btree.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/string_view.h"
29 #include "absl/types/span.h"
30
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34
35 class CordRepBtreeNavigator;
36
37 // CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
38 // Data is stored at the leaf level only, non leaf nodes contain down pointers
39 // only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
40 // or EXTERNAL nodes. The implementation allows for data to be added to either
41 // end of the tree only, it does not provide any 'insert' logic. This has the
42 // benefit that we can expect good fill ratios: all nodes except the outer
43 // 'legs' will have 100% fill ratios for trees built using Append/Prepend
44 // methods. Merged trees will typically have a fill ratio well above 50% as in a
45 // similar fashion, one side of the merged tree will typically have a 100% fill
46 // ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
47 // better, and the tree never needs balancing.
48 //
49 // All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
50 // input unless explicitly stated otherwise. All functions returning a CordRep*
51 // or CordRepBtree* instance transfer a reference back to the caller.
52 // Simplified, callers both 'donate' and 'consume' a reference count on each
53 // call, simplifying the API. An example of building a tree:
54 //
55 // CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
56 // tree = CordRepBtree::Append(tree, MakeFlat("world"));
57 //
58 // In the above example, all inputs are consumed, making each call affecting
59 // `tree` reference count neutral. The returned `tree` value can be different
60 // from the input if the input is shared with other threads, or if the tree
61 // grows in height, but callers typically never have to concern themselves with
62 // that and trust that all methods DTRT at all times.
63 class CordRepBtree : public CordRep {
64 public:
65 // EdgeType identifies `front` and `back` enum values.
66 // Various implementations in CordRepBtree such as `Add` and `Edge` are
67 // generic and templated on operating on either of the boundary edges.
68 // For more information on the possible edges contained in a CordRepBtree
69 // instance see the documentation for `edges_`.
70 enum class EdgeType { kFront, kBack };
71
72 // Convenience constants into `EdgeType`
73 static constexpr EdgeType kFront = EdgeType::kFront;
74 static constexpr EdgeType kBack = EdgeType::kBack;
75
76 // Maximum number of edges: based on experiments and performance data, we can
77 // pick suitable values resulting in optimum cacheline aligned values. The
78 // preferred values are based on 64-bit systems where we aim to align this
79 // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
80 // TODO(b/192061034): experiment with alternative sizes.
81 static constexpr size_t kMaxCapacity = 6;
82
83 // Reasonable maximum height of the btree. We can expect a fill ratio of at
84 // least 50%: trees are always expanded at the front or back. Concatenating
85 // trees will then typically fold at the top most node, where the lower nodes
86 // are at least at capacity on one side of joined inputs. At a lower fill
87 // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
88 // We will fail / abort if an application ever exceeds this height, which
89 // should be extremely rare (near impossible) and be an indication of an
90 // application error: we do not assume it reasonable for any application to
91 // operate correctly with such monster trees.
92 // Another compelling reason for the number `12` is that any contextual stack
93 // required for navigation or insertion requires 12 words and 12 bytes, which
94 // fits inside 2 cache lines with some room to spare, and is reasonable as a
95 // local stack variable compared to Cord's current near 400 bytes stack use.
96 // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
97 // values start with a value of 0 for leaf nodes.
98 static constexpr int kMaxDepth = 12;
99 static constexpr int kMaxHeight = kMaxDepth - 1;
100
101 // `Action` defines the action for unwinding changes done at the btree's leaf
102 // level that need to be propagated up to the parent node(s). Each operation
103 // on a node has an effect / action defined as follows:
104 // - kSelf
105 // The operation (add / update, etc) was performed directly on the node as
106 // the node is private to the current thread (i.e.: not shared directly or
107 // indirectly through a refcount > 1). Changes can be propagated directly to
108 // all parent nodes as all parent nodes are also then private to the current
109 // thread.
110 // - kCopied
111 // The operation (add / update, etc) was performed on a copy of the original
112 // node, as the node is (potentially) directly or indirectly shared with
113 // other threads. Changes need to be propagated into the parent nodes where
114 // the old down pointer must be unreffed and replaced with this new copy.
115 // Such changes to parent nodes may themselves require a copy if the parent
116 // node is also shared. A kCopied action can propagate all the way to the
117 // top node where we then must unref the `tree` input provided by the
118 // caller, and return the new copy.
119 // - kPopped
120 // The operation (typically add) could not be satisfied due to insufficient
121 // capacity in the targeted node, and a new 'leg' was created that needs to
122 // be added into the parent node. For example, adding a FLAT inside a leaf
123 // node that is at capacity will create a new leaf node containing that
124 // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
125 // cascade up the tree if parent nodes are also at capacity. A 'Popped'
126 // action propagating all the way to the top of the tree will result in
127 // the tree becoming one level higher than the current tree through a final
128 // `CordRepBtree::New(tree, popped)` call, resulting in a new top node
129 // referencing the old tree and the new (fully popped upwards) 'leg'.
130 enum Action { kSelf, kCopied, kPopped };
131
132 // Result of an operation on a node. See the `Action` enum for details.
133 struct OpResult {
134 CordRepBtree* tree;
135 Action action;
136 };
137
138 // Return value of the CopyPrefix and CopySuffix methods which can
139 // return a node or data edge at any height inside the tree.
140 // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
141 // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
142 struct CopyResult {
143 CordRep* edge;
144 int height;
145 };
146
147 // Logical position inside a node:
148 // - index: index of the edge.
149 // - n: size or offset value depending on context.
150 struct Position {
151 size_t index;
152 size_t n;
153 };
154
155 // Creates a btree from the given input. Adopts a ref of `rep`.
156 // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
157 // function immediately returns `rep->btree()`. If the input is a valid data
158 // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
159 // as the sole data edge. Else, the input is assumed to be a (legacy) concat
160 // tree, and the input is consumed and transformed into a btree().
161 static CordRepBtree* Create(CordRep* rep);
162
163 // Destroys the provided tree. Should only be called by cord internal API's,
164 // typically after a ref_count.Decrement() on the last reference count.
165 static void Destroy(CordRepBtree* tree);
166
167 // Appends / Prepends an existing CordRep instance to this tree.
168 // The below methods accept three types of input:
169 // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
170 // `rep` is appended or prepended to this tree 'as is'.
171 // 2) `rep` is a BTREE.
172 // `rep` is merged into `tree` respecting the Append/Prepend order.
173 // 3) `rep` is some other (legacy) type.
174 // `rep` is converted in place and added to `tree`
175 // Requires `tree` and `rep` to be not null.
176 static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
177 static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
178
179 // Append/Prepend the data in `data` to this tree.
180 // The `extra` parameter defines how much extra capacity should be allocated
181 // for any additional FLAT being allocated. This is an optimization hint from
182 // the caller. For example, a caller may need to add 2 string_views of data
183 // "abc" and "defghi" which are not consecutive. The caller can in this case
184 // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
185 // where possible with at least 6 bytes of extra capacity beyond `length`.
186 // This helps avoiding data getting fragmented over multiple flats.
187 // There is no limit on the size of `data`. If `data` can not be stored inside
188 // a single flat, then the function will iteratively add flats until all data
189 // has been consumed and appended or prepended to the tree.
190 static CordRepBtree* Append(CordRepBtree* tree, string_view data,
191 size_t extra = 0);
192 static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
193 size_t extra = 0);
194
195 // Returns a new tree, containing `n` bytes of data from this instance
196 // starting at offset `offset`. Where possible, the returned tree shares
197 // (re-uses) data edges and nodes with this instance to minimize the
198 // combined memory footprint of both trees.
199 // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
200 CordRep* SubTree(size_t offset, size_t n);
201
202 // Returns the character at the given offset.
203 char GetCharacter(size_t offset) const;
204
205 // Returns true if this node holds a single data edge, and if so, sets
206 // `fragment` to reference the contained data. `fragment` is an optional
207 // output parameter and allowed to be null.
208 bool IsFlat(absl::string_view* fragment) const;
209
210 // Returns true if the data of `n` bytes starting at offset `offset`
211 // is contained in a single data edge, and if so, sets fragment to reference
212 // the contained data. `fragment` is an optional output parameter and allowed
213 // to be null.
214 bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
215
216 // Returns a span (mutable range of bytes) of up to `size` bytes into the
217 // last FLAT data edge inside this tree under the following conditions:
218 // - none of the nodes down into the FLAT node are shared.
219 // - the last data edge in this tree is a non-shared FLAT.
220 // - the referenced FLAT has additional capacity available.
221 // If all these conditions are met, a non-empty span is returned, and the
222 // length of the flat node and involved tree nodes have been increased by
223 // `span.length()`. The caller is responsible for immediately assigning values
224 // to all uninitialized data reference by the returned span.
225 // Requires `this->refcount.IsOne()`: this function forces the caller to do
226 // this fast path check on the top level node, as this is the most commonly
227 // shared node of a cord tree.
228 Span<char> GetAppendBuffer(size_t size);
229
230 // Returns the `height` of the tree. The height of a tree is limited to
231 // kMaxHeight. `height` is implemented as an `int` as in some places we
232 // use negative (-1) values for 'data edges'.
height()233 int height() const { return static_cast<int>(storage[0]); }
234
235 // Properties: begin, back, end, front/back boundary indexes.
begin()236 size_t begin() const { return static_cast<size_t>(storage[1]); }
back()237 size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
end()238 size_t end() const { return static_cast<size_t>(storage[2]); }
index(EdgeType edge)239 size_t index(EdgeType edge) const {
240 return edge == kFront ? begin() : back();
241 }
242
243 // Properties: size and capacity.
244 // `capacity` contains the current capacity of this instance, where
245 // `kMaxCapacity` contains the maximum capacity of a btree node.
246 // For now, `capacity` and `kMaxCapacity` return the same value, but this may
247 // change in the future if we see benefit in dynamically sizing 'small' nodes
248 // to 'large' nodes for large data trees.
size()249 size_t size() const { return end() - begin(); }
capacity()250 size_t capacity() const { return kMaxCapacity; }
251
252 // Edge access
253 inline CordRep* Edge(size_t index) const;
254 inline CordRep* Edge(EdgeType edge_type) const;
255 inline absl::Span<CordRep* const> Edges() const;
256 inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
257
258 // Returns reference to the data edge at `index`.
259 // Requires this instance to be a leaf node, and `index` to be valid index.
260 inline absl::string_view Data(size_t index) const;
261
262 static const char* EdgeDataPtr(const CordRep* r);
263 static absl::string_view EdgeData(const CordRep* r);
264
265 // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
266 // holding a FLAT or EXTERNAL child rep.
267 static bool IsDataEdge(const CordRep* rep);
268
269 // Diagnostics: returns true if `tree` is valid and internally consistent.
270 // If `shallow` is false, then the provided top level node and all child nodes
271 // below it are recursively checked. If `shallow` is true, only the provided
272 // node in `tree` and the cumulative length, type and height of the direct
273 // child nodes of `tree` are checked. The value of `shallow` is ignored if the
274 // internal `cord_btree_exhaustive_validation` diagnostics variable is true,
275 // in which case the performed validations works as if `shallow` were false.
276 // This function is intended for debugging and testing purposes only.
277 static bool IsValid(const CordRepBtree* tree, bool shallow = false);
278
279 // Diagnostics: asserts that the provided tree is valid.
280 // `AssertValid()` performs a shallow validation by default. `shallow` can be
281 // set to false in which case an exhaustive validation is performed. This
282 // function is implemented in terms of calling `IsValid()` and asserting the
283 // return value to be true. See `IsValid()` for more information.
284 // This function is intended for debugging and testing purposes only.
285 static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
286 static const CordRepBtree* AssertValid(const CordRepBtree* tree,
287 bool shallow = true);
288
289 // Diagnostics: dump the contents of this tree to `stream`.
290 // This function is intended for debugging and testing purposes only.
291 static void Dump(const CordRep* rep, std::ostream& stream);
292 static void Dump(const CordRep* rep, absl::string_view label,
293 std::ostream& stream);
294 static void Dump(const CordRep* rep, absl::string_view label,
295 bool include_contents, std::ostream& stream);
296
297 // Adds the edge `edge` to this node if possible. `owned` indicates if the
298 // current node is potentially shared or not with other threads. Returns:
299 // - {kSelf, <this>}
300 // The edge was directly added to this node.
301 // - {kCopied, <node>}
302 // The edge was added to a copy of this node.
303 // - {kPopped, New(edge, height())}
304 // A new leg with the edge was created as this node has no extra capacity.
305 template <EdgeType edge_type>
306 inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
307
308 // Replaces the front or back edge with the provided new edge. Returns:
309 // - {kSelf, <this>}
310 // The edge was directly set in this node. The old edge is unreffed.
311 // - {kCopied, <node>}
312 // A copy of this node was created with the new edge value.
313 // In both cases, the function adopts a reference on `edge`.
314 template <EdgeType edge_type>
315 OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
316
317 // Creates a new empty node at the specified height.
318 static CordRepBtree* New(int height = 0);
319
320 // Creates a new node containing `rep`, with the height being computed
321 // automatically based on the type of `rep`.
322 static CordRepBtree* New(CordRep* rep);
323
324 // Creates a new node containing both `front` and `back` at height
325 // `front.height() + 1`. Requires `back.height() == front.height()`.
326 static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
327
328 private:
329 CordRepBtree() = default;
330 ~CordRepBtree() = default;
331
332 // Initializes the main properties `tag`, `begin`, `end`, `height`.
333 inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
334
335 // Direct property access begin / end
set_begin(size_t begin)336 void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
set_end(size_t end)337 void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
338
339 // Decreases the value of `begin` by `n`, and returns the new value. Notice
340 // how this returns the new value unlike atomic::fetch_add which returns the
341 // old value. This is because this is used to prepend edges at 'begin - 1'.
sub_fetch_begin(size_t n)342 size_t sub_fetch_begin(size_t n) {
343 storage[1] -= static_cast<uint8_t>(n);
344 return storage[1];
345 }
346
347 // Increases the value of `end` by `n`, and returns the previous value. This
348 // function is typically used to append edges at 'end'.
fetch_add_end(size_t n)349 size_t fetch_add_end(size_t n) {
350 const uint8_t current = storage[2];
351 storage[2] = static_cast<uint8_t>(current + n);
352 return current;
353 }
354
355 // Returns the index of the last edge starting on, or before `offset`, with
356 // `n` containing the relative offset of `offset` inside that edge.
357 // Requires `offset` < length.
358 Position IndexOf(size_t offset) const;
359
360 // Returns the index of the last edge starting before `offset`, with `n`
361 // containing the relative offset of `offset` inside that edge.
362 // This function is useful to find the edges for some span of bytes ending at
363 // `offset` (i.e., `n` bytes). For example:
364 //
365 // Position pos = IndexBefore(n)
366 // edges = Edges(begin(), pos.index) // All full edges (may be empty)
367 // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
368 //
369 // Requires 0 < `offset` <= length.
370 Position IndexBefore(size_t offset) const;
371
372 // Identical to the above function except starting from the position `front`.
373 // This function is equivalent to `IndexBefore(front.n + offset)`, with
374 // the difference that this function is optimized to start at `front.index`.
375 Position IndexBefore(Position front, size_t offset) const;
376
377 // Returns the index of the edge directly beyond the edge containing offset
378 // `offset`, with `n` containing the distance of that edge from `offset`.
379 // This function is useful for iteratively finding suffix nodes and remaining
380 // partial bytes in left-most suffix nodes as for example in CopySuffix.
381 // Requires `offset` < length.
382 Position IndexBeyond(size_t offset) const;
383
384 // Destruction
385 static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
386 static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
387 static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
Delete(CordRepBtree * tree)388 static void Delete(CordRepBtree* tree) { delete tree; }
389
390 // Creates a new leaf node containing as much data as possible from `data`.
391 // The data is added either forwards or reversed depending on `edge_type`.
392 // Callers must check the length of the returned node to determine if all data
393 // was copied or not.
394 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
395 template <EdgeType edge_type>
396 static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
397
398 // Creates a raw copy of this Btree node, copying all properties, but
399 // without adding any references to existing edges.
400 CordRepBtree* CopyRaw() const;
401
402 // Creates a full copy of this Btree node, adding a reference on all edges.
403 CordRepBtree* Copy() const;
404
405 // Creates a partial copy of this Btree node, copying all edges up to `end`,
406 // adding a reference on each copied edge, and sets the length of the newly
407 // created copy to `new_length`.
408 CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
409
410 // Creates a partial copy of this Btree node, copying all edges starting at
411 // `begin`, adding a reference on each copied edge, and sets the length of
412 // the newly created copy to `new_length`.
413 CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
414
415 // Returns a tree containing the result of appending `right` to `left`.
416 static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
417
418 // Fallback functions for `Create()`, `Append()` and `Prepend()` which
419 // deal with legacy / non conforming input, i.e.: CONCAT trees.
420 static CordRepBtree* CreateSlow(CordRep* rep);
421 static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
422 static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
423
424 // Aligns existing edges to start at index 0, to allow for a new edge to be
425 // added to the back of the current edges.
426 inline void AlignBegin();
427
428 // Aligns existing edges to end at `capacity`, to allow for a new edge to be
429 // added in front of the current edges.
430 inline void AlignEnd();
431
432 // Adds the provided edge to this node.
433 // Requires this node to have capacity for the edge. Realigns / moves
434 // existing edges as needed to prepend or append the new edge.
435 template <EdgeType edge_type>
436 inline void Add(CordRep* rep);
437
438 // Adds the provided edges to this node.
439 // Requires this node to have capacity for the edges. Realigns / moves
440 // existing edges as needed to prepend or append the new edges.
441 template <EdgeType edge_type>
442 inline void Add(absl::Span<CordRep* const>);
443
444 // Adds data from `data` to this node until either all data has been consumed,
445 // or there is no more capacity for additional flat nodes inside this node.
446 // Requires the current node to be a leaf node, data to be non empty, and the
447 // current node to have capacity for at least one more data edge.
448 // Returns any remaining data from `data` that was not added, which is
449 // depending on the edge type (front / back) either the remaining prefix of
450 // suffix of the input.
451 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
452 template <EdgeType edge_type>
453 absl::string_view AddData(absl::string_view data, size_t extra);
454
455 // Replace the front or back edge with the provided value.
456 // Adopts a reference on `edge` and unrefs the old edge.
457 template <EdgeType edge_type>
458 inline void SetEdge(CordRep* edge);
459
460 // Returns a partial copy of the current tree containing the first `n` bytes
461 // of data. `CopyResult` contains both the resulting edge and its height. The
462 // resulting tree may be less high than the current tree, or even be a single
463 // matching data edge. For example, if `n == 1`, then the result will be the
464 // single data edge, and height will be set to -1 (one below the owning leaf
465 // node). If n == 0, this function returns null.
466 // Requires `n <= length`
467 CopyResult CopyPrefix(size_t n);
468
469 // Returns a partial copy of the current tree containing all data starting
470 // after `offset`. `CopyResult` contains both the resulting edge and its
471 // height. The resulting tree may be less high than the current tree, or even
472 // be a single matching data edge. For example, if `n == length - 1`, then the
473 // result will be a single data edge, and height will be set to -1 (one below
474 // the owning leaf node).
475 // Requires `offset < length`
476 CopyResult CopySuffix(size_t offset);
477
478 // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
479 // depending on the value of `owned`.
480 inline OpResult ToOpResult(bool owned);
481
482 // Adds `rep` to the specified tree, returning the modified tree.
483 template <EdgeType edge_type>
484 static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
485
486 // Adds `data` to the specified tree, returning the modified tree.
487 // See the `Append/Prepend` function for the meaning and purpose of `extra`.
488 template <EdgeType edge_type>
489 static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
490 size_t extra = 0);
491
492 // Merges `src` into `dst` with `src` being added either before (kFront) or
493 // after (kBack) `dst`. Requires the height of `dst` to be greater than or
494 // equal to the height of `src`.
495 template <EdgeType edge_type>
496 static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
497
498 // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
499 // implements an inlined version for trees of limited height (3 levels),
500 // GetAppendBufferSlow implements the logic for large trees.
501 Span<char> GetAppendBufferSlow(size_t size);
502
503 // `edges_` contains all edges starting from this instance.
504 // These are explicitly `child` edges only, a cord btree (or any cord tree in
505 // that respect) does not store `parent` pointers anywhere: multiple trees /
506 // parents can reference the same shared child edge. The type of these edges
507 // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
508 // edges` (external or flat nodes, or sub-strings thereof). All other nodes
509 // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
510 CordRep* edges_[kMaxCapacity];
511
512 friend class CordRepBtreeTestPeer;
513 friend class CordRepBtreeNavigator;
514 };
515
btree()516 inline CordRepBtree* CordRep::btree() {
517 assert(IsBtree());
518 return static_cast<CordRepBtree*>(this);
519 }
520
btree()521 inline const CordRepBtree* CordRep::btree() const {
522 assert(IsBtree());
523 return static_cast<const CordRepBtree*>(this);
524 }
525
InitInstance(int height,size_t begin,size_t end)526 inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
527 tag = BTREE;
528 storage[0] = static_cast<uint8_t>(height);
529 storage[1] = static_cast<uint8_t>(begin);
530 storage[2] = static_cast<uint8_t>(end);
531 }
532
Edge(size_t index)533 inline CordRep* CordRepBtree::Edge(size_t index) const {
534 assert(index >= begin());
535 assert(index < end());
536 return edges_[index];
537 }
538
Edge(EdgeType edge_type)539 inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
540 return edges_[edge_type == kFront ? begin() : back()];
541 }
542
Edges()543 inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
544 return {edges_ + begin(), size()};
545 }
546
Edges(size_t begin,size_t end)547 inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
548 size_t end) const {
549 assert(begin <= end);
550 assert(begin >= this->begin());
551 assert(end <= this->end());
552 return {edges_ + begin, static_cast<size_t>(end - begin)};
553 }
554
EdgeDataPtr(const CordRep * r)555 inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
556 assert(IsDataEdge(r));
557 size_t offset = 0;
558 if (r->tag == SUBSTRING) {
559 offset = r->substring()->start;
560 r = r->substring()->child;
561 }
562 return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
563 }
564
EdgeData(const CordRep * r)565 inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
566 return absl::string_view(EdgeDataPtr(r), r->length);
567 }
568
Data(size_t index)569 inline absl::string_view CordRepBtree::Data(size_t index) const {
570 assert(height() == 0);
571 return EdgeData(Edge(index));
572 }
573
IsDataEdge(const CordRep * rep)574 inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
575 // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
576 // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
577 // check in the slow path the SUBSTRING check to optimize for the hot path.
578 if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
579 if (rep->tag == SUBSTRING) rep = rep->substring()->child;
580 return rep->tag == EXTERNAL || rep->tag >= FLAT;
581 }
582
New(int height)583 inline CordRepBtree* CordRepBtree::New(int height) {
584 CordRepBtree* tree = new CordRepBtree;
585 tree->length = 0;
586 tree->InitInstance(height);
587 return tree;
588 }
589
New(CordRep * rep)590 inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
591 CordRepBtree* tree = new CordRepBtree;
592 int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
593 tree->length = rep->length;
594 tree->InitInstance(height, /*begin=*/0, /*end=*/1);
595 tree->edges_[0] = rep;
596 return tree;
597 }
598
New(CordRepBtree * front,CordRepBtree * back)599 inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
600 CordRepBtree* back) {
601 assert(front->height() == back->height());
602 CordRepBtree* tree = new CordRepBtree;
603 tree->length = front->length + back->length;
604 tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
605 tree->edges_[0] = front;
606 tree->edges_[1] = back;
607 return tree;
608 }
609
DestroyTree(CordRepBtree * tree,size_t begin,size_t end)610 inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
611 size_t end) {
612 if (tree->height() == 0) {
613 DestroyLeaf(tree, begin, end);
614 } else {
615 DestroyNonLeaf(tree, begin, end);
616 }
617 }
618
Destroy(CordRepBtree * tree)619 inline void CordRepBtree::Destroy(CordRepBtree* tree) {
620 DestroyTree(tree, tree->begin(), tree->end());
621 }
622
CopyRaw()623 inline CordRepBtree* CordRepBtree::CopyRaw() const {
624 auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
625 memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
626 new (&tree->refcount) RefcountAndFlags;
627 return tree;
628 }
629
Copy()630 inline CordRepBtree* CordRepBtree::Copy() const {
631 CordRepBtree* tree = CopyRaw();
632 for (CordRep* rep : Edges()) CordRep::Ref(rep);
633 return tree;
634 }
635
CopyToEndFrom(size_t begin,size_t new_length)636 inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
637 size_t new_length) const {
638 assert(begin >= this->begin());
639 assert(begin <= this->end());
640 CordRepBtree* tree = CopyRaw();
641 tree->length = new_length;
642 tree->set_begin(begin);
643 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
644 return tree;
645 }
646
CopyBeginTo(size_t end,size_t new_length)647 inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
648 size_t new_length) const {
649 assert(end <= capacity());
650 assert(end >= this->begin());
651 CordRepBtree* tree = CopyRaw();
652 tree->length = new_length;
653 tree->set_end(end);
654 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
655 return tree;
656 }
657
AlignBegin()658 inline void CordRepBtree::AlignBegin() {
659 // The below code itself does not need to be fast as typically we have
660 // mono-directional append/prepend calls, and `begin` / `end` are typically
661 // adjusted no more than once. But we want to avoid potential register clobber
662 // effects, making the compiler emit register save/store/spills, and minimize
663 // the size of code.
664 const size_t delta = begin();
665 if (ABSL_PREDICT_FALSE(delta != 0)) {
666 const size_t new_end = end() - delta;
667 set_begin(0);
668 set_end(new_end);
669 // TODO(mvels): we can write this using 2 loads / 2 stores depending on
670 // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
671 // size, and then do overlapping load/store of up to 4 pointers (inlined as
672 // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
673 // compact and b) not clobbering any registers.
674 ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
675 #ifdef __clang__
676 #pragma unroll 1
677 #endif
678 for (size_t i = 0; i < new_end; ++i) {
679 edges_[i] = edges_[i + delta];
680 }
681 }
682 }
683
AlignEnd()684 inline void CordRepBtree::AlignEnd() {
685 // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
686 const size_t delta = capacity() - end();
687 if (delta != 0) {
688 const size_t new_begin = begin() + delta;
689 const size_t new_end = end() + delta;
690 set_begin(new_begin);
691 set_end(new_end);
692 ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
693 #ifdef __clang__
694 #pragma unroll 1
695 #endif
696 for (size_t i = new_end - 1; i >= new_begin; --i) {
697 edges_[i] = edges_[i - delta];
698 }
699 }
700 }
701
702 template <>
703 inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
704 AlignBegin();
705 edges_[fetch_add_end(1)] = rep;
706 }
707
708 template <>
709 inline void CordRepBtree::Add<CordRepBtree::kBack>(
710 absl::Span<CordRep* const> edges) {
711 AlignBegin();
712 size_t new_end = end();
713 for (CordRep* edge : edges) edges_[new_end++] = edge;
714 set_end(new_end);
715 }
716
717 template <>
718 inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
719 AlignEnd();
720 edges_[sub_fetch_begin(1)] = rep;
721 }
722
723 template <>
724 inline void CordRepBtree::Add<CordRepBtree::kFront>(
725 absl::Span<CordRep* const> edges) {
726 AlignEnd();
727 size_t new_begin = begin() - edges.size();
728 set_begin(new_begin);
729 for (CordRep* edge : edges) edges_[new_begin++] = edge;
730 }
731
732 template <CordRepBtree::EdgeType edge_type>
SetEdge(CordRep * edge)733 inline void CordRepBtree::SetEdge(CordRep* edge) {
734 const int idx = edge_type == kFront ? begin() : back();
735 CordRep::Unref(edges_[idx]);
736 edges_[idx] = edge;
737 }
738
ToOpResult(bool owned)739 inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
740 return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
741 }
742
IndexOf(size_t offset)743 inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
744 assert(offset < length);
745 size_t index = begin();
746 while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
747 return {index, offset};
748 }
749
IndexBefore(size_t offset)750 inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
751 assert(offset > 0);
752 assert(offset <= length);
753 size_t index = begin();
754 while (offset > edges_[index]->length) offset -= edges_[index++]->length;
755 return {index, offset};
756 }
757
IndexBefore(Position front,size_t offset)758 inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
759 size_t offset) const {
760 size_t index = front.index;
761 offset = offset + front.n;
762 while (offset > edges_[index]->length) offset -= edges_[index++]->length;
763 return {index, offset};
764 }
765
IndexBeyond(const size_t offset)766 inline CordRepBtree::Position CordRepBtree::IndexBeyond(
767 const size_t offset) const {
768 // We need to find the edge which `starting offset` is beyond (>=)`offset`.
769 // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
770 // track the offset of the `current edge` in `off`, which we increase as we
771 // iterate over the edges until we find the matching edge.
772 size_t off = 0;
773 size_t index = begin();
774 while (offset > off) off += edges_[index++]->length;
775 return {index, off - offset};
776 }
777
Create(CordRep * rep)778 inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
779 if (IsDataEdge(rep)) return New(rep);
780 return CreateSlow(rep);
781 }
782
GetAppendBuffer(size_t size)783 inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
784 assert(refcount.IsOne());
785 CordRepBtree* tree = this;
786 const int height = this->height();
787 CordRepBtree* n1 = tree;
788 CordRepBtree* n2 = tree;
789 CordRepBtree* n3 = tree;
790 switch (height) {
791 case 3:
792 tree = tree->Edge(kBack)->btree();
793 if (!tree->refcount.IsOne()) return {};
794 n2 = tree;
795 ABSL_FALLTHROUGH_INTENDED;
796 case 2:
797 tree = tree->Edge(kBack)->btree();
798 if (!tree->refcount.IsOne()) return {};
799 n1 = tree;
800 ABSL_FALLTHROUGH_INTENDED;
801 case 1:
802 tree = tree->Edge(kBack)->btree();
803 if (!tree->refcount.IsOne()) return {};
804 ABSL_FALLTHROUGH_INTENDED;
805 case 0:
806 CordRep* edge = tree->Edge(kBack);
807 if (!edge->refcount.IsOne()) return {};
808 if (edge->tag < FLAT) return {};
809 size_t avail = edge->flat()->Capacity() - edge->length;
810 if (avail == 0) return {};
811 size_t delta = (std::min)(size, avail);
812 Span<char> span = {edge->flat()->Data() + edge->length, delta};
813 edge->length += delta;
814 switch (height) {
815 case 3:
816 n3->length += delta;
817 ABSL_FALLTHROUGH_INTENDED;
818 case 2:
819 n2->length += delta;
820 ABSL_FALLTHROUGH_INTENDED;
821 case 1:
822 n1->length += delta;
823 ABSL_FALLTHROUGH_INTENDED;
824 case 0:
825 tree->length += delta;
826 return span;
827 }
828 break;
829 }
830 return GetAppendBufferSlow(size);
831 }
832
833 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
834 CordRepBtree* tree, CordRep* rep);
835
836 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
837 CordRepBtree* tree, CordRep* rep);
838
Append(CordRepBtree * tree,CordRep * rep)839 inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
840 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
841 return CordRepBtree::AddCordRep<kBack>(tree, rep);
842 }
843 return AppendSlow(tree, rep);
844 }
845
Prepend(CordRepBtree * tree,CordRep * rep)846 inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
847 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
848 return CordRepBtree::AddCordRep<kFront>(tree, rep);
849 }
850 return PrependSlow(tree, rep);
851 }
852
853 #ifdef NDEBUG
854
AssertValid(CordRepBtree * tree,bool)855 inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
856 bool /* shallow */) {
857 return tree;
858 }
859
AssertValid(const CordRepBtree * tree,bool)860 inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
861 bool /* shallow */) {
862 return tree;
863 }
864
865 #endif
866
867 } // namespace cord_internal
868 ABSL_NAMESPACE_END
869 } // namespace absl
870
871 #endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
872