1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/cord_rep_btree.h"
16
17 #include <atomic>
18 #include <cassert>
19 #include <cstdint>
20 #include <iostream>
21 #include <ostream>
22 #include <string>
23
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/raw_logging.h"
27 #include "absl/base/optimization.h"
28 #include "absl/strings/internal/cord_data_edge.h"
29 #include "absl/strings/internal/cord_internal.h"
30 #include "absl/strings/internal/cord_rep_consume.h"
31 #include "absl/strings/internal/cord_rep_flat.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34
35 namespace absl {
36 ABSL_NAMESPACE_BEGIN
37 namespace cord_internal {
38
39 #ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
40 constexpr size_t CordRepBtree::kMaxCapacity;
41 #endif
42
43 namespace {
44
45 using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
46 using EdgeType = CordRepBtree::EdgeType;
47 using OpResult = CordRepBtree::OpResult;
48 using CopyResult = CordRepBtree::CopyResult;
49
50 constexpr auto kFront = CordRepBtree::kFront;
51 constexpr auto kBack = CordRepBtree::kBack;
52
53 ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
54
55 // Implementation of the various 'Dump' functions.
56 // Prints the entire tree structure or 'rep'. External callers should
57 // not specify 'depth' and leave it to its default (0) value.
58 // Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
DumpAll(const CordRep * rep,bool include_contents,std::ostream & stream,size_t depth=0)59 void DumpAll(const CordRep* rep,
60 bool include_contents,
61 std::ostream& stream,
62 size_t depth = 0) {
63 // Allow for full height trees + substring -> flat / external nodes.
64 assert(depth <= CordRepBtree::kMaxDepth + 2);
65 std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
66 ? std::string("Private")
67 : absl::StrCat("Shared(", rep->refcount.Get(), ")");
68 std::string sptr = absl::StrCat("0x", absl::Hex(rep));
69
70 // Dumps the data contents of `rep` if `include_contents` is true.
71 // Always emits a new line character.
72 auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
73 if (include_contents) {
74 // Allow for up to 60 wide display of content data, which with some
75 // indentation and prefix / labels keeps us within roughly 80-100 wide.
76 constexpr size_t kMaxDataLength = 60;
77 stream << ", data = \""
78 << EdgeData(r).substr(0, kMaxDataLength)
79 << (r->length > kMaxDataLength ? "\"..." : "\"");
80 }
81 stream << '\n';
82 };
83
84 // For each level, we print the 'shared/private' state and the rep pointer,
85 // indented by two spaces per recursive depth.
86 stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
87
88 if (rep->IsBtree()) {
89 const CordRepBtree* node = rep->btree();
90 std::string label =
91 node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
92 stream << label << ", len = " << node->length
93 << ", begin = " << node->begin() << ", end = " << node->end()
94 << "\n";
95 for (CordRep* edge : node->Edges()) {
96 DumpAll(edge, include_contents, stream, depth + 1);
97 }
98 } else if (rep->tag == SUBSTRING) {
99 const CordRepSubstring* substring = rep->substring();
100 stream << "Substring, len = " << rep->length
101 << ", start = " << substring->start;
102 maybe_dump_data(rep);
103 DumpAll(substring->child, include_contents, stream, depth + 1);
104 } else if (rep->tag >= FLAT) {
105 stream << "Flat, len = " << rep->length
106 << ", cap = " << rep->flat()->Capacity();
107 maybe_dump_data(rep);
108 } else if (rep->tag == EXTERNAL) {
109 stream << "Extn, len = " << rep->length;
110 maybe_dump_data(rep);
111 }
112 }
113
114 // TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
115 // small data out of large reps, and general efficiency of 'always copy small
116 // data'. Consider making this a cord rep internal library function.
CreateSubstring(CordRep * rep,size_t offset,size_t n)117 CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
118 assert(n != 0);
119 assert(offset + n <= rep->length);
120 assert(offset != 0 || n != rep->length);
121
122 if (rep->tag == SUBSTRING) {
123 CordRepSubstring* substring = rep->substring();
124 offset += substring->start;
125 rep = CordRep::Ref(substring->child);
126 CordRep::Unref(substring);
127 }
128 assert(rep->IsExternal() || rep->IsFlat());
129 CordRepSubstring* substring = new CordRepSubstring();
130 substring->length = n;
131 substring->tag = SUBSTRING;
132 substring->start = offset;
133 substring->child = rep;
134 return substring;
135 }
136
137 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset,size_t n)138 inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
139 if (n == rep->length) return rep;
140 if (n == 0) return CordRep::Unref(rep), nullptr;
141 return CreateSubstring(rep, offset, n);
142 }
143
144 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset)145 inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
146 if (offset == 0) return rep;
147 return CreateSubstring(rep, offset, rep->length - offset);
148 }
149
150 // Resizes `edge` to the provided `length`. Adopts a reference on `edge`.
151 // This method directly returns `edge` if `length` equals `edge->length`.
152 // If `is_mutable` is set to true, this function may return `edge` with
153 // `edge->length` set to the new length depending on the type and size of
154 // `edge`. Otherwise, this function returns a new CordRepSubstring value.
155 // Requires `length > 0 && length <= edge->length`.
ResizeEdge(CordRep * edge,size_t length,bool is_mutable)156 CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
157 assert(length > 0);
158 assert(length <= edge->length);
159 assert(IsDataEdge(edge));
160 if (length >= edge->length) return edge;
161
162 if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
163 edge->length = length;
164 return edge;
165 }
166
167 return CreateSubstring(edge, 0, length);
168 }
169
170 template <EdgeType edge_type>
Consume(absl::string_view s,size_t n)171 inline absl::string_view Consume(absl::string_view s, size_t n) {
172 return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
173 }
174
175 template <EdgeType edge_type>
Consume(char * dst,absl::string_view s,size_t n)176 inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
177 if (edge_type == kBack) {
178 memcpy(dst, s.data(), n);
179 return s.substr(n);
180 } else {
181 const size_t offset = s.size() - n;
182 memcpy(dst, s.data() + offset, n);
183 return s.substr(0, offset);
184 }
185 }
186
187 // Known issue / optimization weirdness: the store associated with the
188 // decrement introduces traffic between cpus (even if the result of that
189 // traffic does nothing), making this faster than a single call to
190 // refcount.Decrement() checking the zero refcount condition.
191 template <typename R, typename Fn>
FastUnref(R * r,Fn && fn)192 inline void FastUnref(R* r, Fn&& fn) {
193 if (r->refcount.IsOne()) {
194 fn(r);
195 } else if (!r->refcount.DecrementExpectHighRefcount()) {
196 fn(r);
197 }
198 }
199
200
DeleteSubstring(CordRepSubstring * substring)201 void DeleteSubstring(CordRepSubstring* substring) {
202 CordRep* rep = substring->child;
203 if (!rep->refcount.Decrement()) {
204 if (rep->tag >= FLAT) {
205 CordRepFlat::Delete(rep->flat());
206 } else {
207 assert(rep->tag == EXTERNAL);
208 CordRepExternal::Delete(rep->external());
209 }
210 }
211 delete substring;
212 }
213
214 // Deletes a leaf node data edge. Requires `IsDataEdge(rep)`.
DeleteLeafEdge(CordRep * rep)215 void DeleteLeafEdge(CordRep* rep) {
216 assert(IsDataEdge(rep));
217 if (rep->tag >= FLAT) {
218 CordRepFlat::Delete(rep->flat());
219 } else if (rep->tag == EXTERNAL) {
220 CordRepExternal::Delete(rep->external());
221 } else {
222 DeleteSubstring(rep->substring());
223 }
224 }
225
226 // StackOperations contains the logic to build a left-most or right-most stack
227 // (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
228 // propagate node changes up the stack.
229 template <EdgeType edge_type>
230 struct StackOperations {
231 // Returns true if the node at 'depth' is not shared, i.e. has a refcount
232 // of one and all of its parent nodes have a refcount of one.
ownedabsl::cord_internal::__anon698d3fdd0111::StackOperations233 inline bool owned(int depth) const { return depth < share_depth; }
234
235 // Returns the node at 'depth'.
nodeabsl::cord_internal::__anon698d3fdd0111::StackOperations236 inline CordRepBtree* node(int depth) const { return stack[depth]; }
237
238 // Builds a `depth` levels deep stack starting at `tree` recording which nodes
239 // are private in the form of the 'share depth' where nodes are shared.
BuildStackabsl::cord_internal::__anon698d3fdd0111::StackOperations240 inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
241 assert(depth <= tree->height());
242 int current_depth = 0;
243 while (current_depth < depth && tree->refcount.IsOne()) {
244 stack[current_depth++] = tree;
245 tree = tree->Edge(edge_type)->btree();
246 }
247 share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
248 while (current_depth < depth) {
249 stack[current_depth++] = tree;
250 tree = tree->Edge(edge_type)->btree();
251 }
252 return tree;
253 }
254
255 // Builds a stack with the invariant that all nodes are private owned / not
256 // shared. This is used in iterative updates where a previous propagation
257 // guaranteed all nodes are owned / private.
BuildOwnedStackabsl::cord_internal::__anon698d3fdd0111::StackOperations258 inline void BuildOwnedStack(CordRepBtree* tree, int height) {
259 assert(height <= CordRepBtree::kMaxHeight);
260 int depth = 0;
261 while (depth < height) {
262 assert(tree->refcount.IsOne());
263 stack[depth++] = tree;
264 tree = tree->Edge(edge_type)->btree();
265 }
266 assert(tree->refcount.IsOne());
267 share_depth = depth + 1;
268 }
269
270 // Processes the final 'top level' result action for the tree.
271 // See the 'Action' enum for the various action implications.
Finalizeabsl::cord_internal::__anon698d3fdd0111::StackOperations272 static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
273 switch (result.action) {
274 case CordRepBtree::kPopped:
275 tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree)
276 : CordRepBtree::New(result.tree, tree);
277 if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) {
278 tree = CordRepBtree::Rebuild(tree);
279 ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight,
280 "Max height exceeded");
281 }
282 return tree;
283 case CordRepBtree::kCopied:
284 CordRep::Unref(tree);
285 ABSL_FALLTHROUGH_INTENDED;
286 case CordRepBtree::kSelf:
287 return result.tree;
288 }
289 ABSL_UNREACHABLE();
290 return result.tree;
291 }
292
293 // Propagate the action result in 'result' up into all nodes of the stack
294 // starting at depth 'depth'. 'length' contains the extra length of data that
295 // was added at the lowest level, and is updated into all nodes of the stack.
296 // See the 'Action' enum for the various action implications.
297 // If 'propagate' is true, then any copied node values are updated into the
298 // stack, which is used for iterative processing on the same stack.
299 template <bool propagate = false>
Unwindabsl::cord_internal::__anon698d3fdd0111::StackOperations300 inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
301 OpResult result) {
302 // TODO(mvels): revisit the below code to check if 3 loops with 3
303 // (incremental) conditions is faster than 1 loop with a switch.
304 // Benchmarking and perf recordings indicate the loop with switch is
305 // fastest, likely because of indirect jumps on the tight case values and
306 // dense branches. But it's worth considering 3 loops, as the `action`
307 // transitions are mono directional. E.g.:
308 // while (action == kPopped) {
309 // ...
310 // }
311 // while (action == kCopied) {
312 // ...
313 // }
314 // ...
315 // We also found that an "if () do {}" loop here seems faster, possibly
316 // because it allows the branch predictor more granular heuristics on
317 // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
318 // which appear to be the most common use cases.
319 if (depth != 0) {
320 do {
321 CordRepBtree* node = stack[--depth];
322 const bool owned = depth < share_depth;
323 switch (result.action) {
324 case CordRepBtree::kPopped:
325 assert(!propagate);
326 result = node->AddEdge<edge_type>(owned, result.tree, length);
327 break;
328 case CordRepBtree::kCopied:
329 result = node->SetEdge<edge_type>(owned, result.tree, length);
330 if (propagate) stack[depth] = result.tree;
331 break;
332 case CordRepBtree::kSelf:
333 node->length += length;
334 while (depth > 0) {
335 node = stack[--depth];
336 node->length += length;
337 }
338 return node;
339 }
340 } while (depth > 0);
341 }
342 return Finalize(tree, result);
343 }
344
345 // Invokes `Unwind` with `propagate=true` to update the stack node values.
Propagateabsl::cord_internal::__anon698d3fdd0111::StackOperations346 inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
347 OpResult result) {
348 return Unwind</*propagate=*/true>(tree, depth, length, result);
349 }
350
351 // `share_depth` contains the depth at which the nodes in the stack become
352 // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
353 // then `share_depth` is 0. If the 2nd node is shared (and implicitly all
354 // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
355 // than the depth of the stack indicates that none of the nodes in the stack
356 // are shared.
357 int share_depth;
358
359 NodeStack stack;
360 };
361
362 } // namespace
363
SetCordBtreeExhaustiveValidation(bool do_exaustive_validation)364 void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation) {
365 cord_btree_exhaustive_validation.store(do_exaustive_validation,
366 std::memory_order_relaxed);
367 }
368
IsCordBtreeExhaustiveValidationEnabled()369 bool IsCordBtreeExhaustiveValidationEnabled() {
370 return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
371 }
372
Dump(const CordRep * rep,absl::string_view label,bool include_contents,std::ostream & stream)373 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
374 bool include_contents, std::ostream& stream) {
375 stream << "===================================\n";
376 if (!label.empty()) {
377 stream << label << '\n';
378 stream << "-----------------------------------\n";
379 }
380 if (rep) {
381 DumpAll(rep, include_contents, stream);
382 } else {
383 stream << "NULL\n";
384 }
385 }
386
Dump(const CordRep * rep,absl::string_view label,std::ostream & stream)387 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
388 std::ostream& stream) {
389 Dump(rep, label, false, stream);
390 }
391
Dump(const CordRep * rep,std::ostream & stream)392 void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
393 Dump(rep, absl::string_view(), false, stream);
394 }
395
396 template <size_t size>
DestroyTree(CordRepBtree * tree)397 static void DestroyTree(CordRepBtree* tree) {
398 for (CordRep* node : tree->Edges()) {
399 if (node->refcount.Decrement()) continue;
400 for (CordRep* edge : node->btree()->Edges()) {
401 if (edge->refcount.Decrement()) continue;
402 if (size == 1) {
403 DeleteLeafEdge(edge);
404 } else {
405 CordRepBtree::Destroy(edge->btree());
406 }
407 }
408 CordRepBtree::Delete(node->btree());
409 }
410 CordRepBtree::Delete(tree);
411 }
412
Destroy(CordRepBtree * tree)413 void CordRepBtree::Destroy(CordRepBtree* tree) {
414 switch (tree->height()) {
415 case 0:
416 for (CordRep* edge : tree->Edges()) {
417 if (!edge->refcount.Decrement()) {
418 DeleteLeafEdge(edge);
419 }
420 }
421 return CordRepBtree::Delete(tree);
422 case 1:
423 return DestroyTree<1>(tree);
424 default:
425 return DestroyTree<2>(tree);
426 }
427 }
428
IsValid(const CordRepBtree * tree,bool shallow)429 bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
430 #define NODE_CHECK_VALID(x) \
431 if (!(x)) { \
432 ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
433 return false; \
434 }
435 #define NODE_CHECK_EQ(x, y) \
436 if ((x) != (y)) { \
437 ABSL_RAW_LOG(ERROR, \
438 "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
439 #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
440 return false; \
441 }
442
443 NODE_CHECK_VALID(tree != nullptr);
444 NODE_CHECK_VALID(tree->IsBtree());
445 NODE_CHECK_VALID(tree->height() <= kMaxHeight);
446 NODE_CHECK_VALID(tree->begin() < tree->capacity());
447 NODE_CHECK_VALID(tree->end() <= tree->capacity());
448 NODE_CHECK_VALID(tree->begin() <= tree->end());
449 size_t child_length = 0;
450 for (CordRep* edge : tree->Edges()) {
451 NODE_CHECK_VALID(edge != nullptr);
452 if (tree->height() > 0) {
453 NODE_CHECK_VALID(edge->IsBtree());
454 NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
455 } else {
456 NODE_CHECK_VALID(IsDataEdge(edge));
457 }
458 child_length += edge->length;
459 }
460 NODE_CHECK_EQ(child_length, tree->length);
461 if ((!shallow || IsCordBtreeExhaustiveValidationEnabled()) &&
462 tree->height() > 0) {
463 for (CordRep* edge : tree->Edges()) {
464 if (!IsValid(edge->btree(), shallow)) return false;
465 }
466 }
467 return true;
468
469 #undef NODE_CHECK_VALID
470 #undef NODE_CHECK_EQ
471 }
472
473 #ifndef NDEBUG
474
AssertValid(CordRepBtree * tree,bool shallow)475 CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
476 if (!IsValid(tree, shallow)) {
477 Dump(tree, "CordRepBtree validation failed:", false, std::cout);
478 ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
479 }
480 return tree;
481 }
482
AssertValid(const CordRepBtree * tree,bool shallow)483 const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
484 bool shallow) {
485 if (!IsValid(tree, shallow)) {
486 Dump(tree, "CordRepBtree validation failed:", false, std::cout);
487 ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
488 }
489 return tree;
490 }
491
492 #endif // NDEBUG
493
494 template <EdgeType edge_type>
AddEdge(bool owned,CordRep * edge,size_t delta)495 inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
496 if (size() >= kMaxCapacity) return {New(edge), kPopped};
497 OpResult result = ToOpResult(owned);
498 result.tree->Add<edge_type>(edge);
499 result.tree->length += delta;
500 return result;
501 }
502
503 template <EdgeType edge_type>
SetEdge(bool owned,CordRep * edge,size_t delta)504 OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
505 OpResult result;
506 const size_t idx = index(edge_type);
507 if (owned) {
508 result = {this, kSelf};
509 CordRep::Unref(edges_[idx]);
510 } else {
511 // Create a copy containing all unchanged edges. Unchanged edges are the
512 // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
513 // We conveniently cover both case using a constexpr `shift` being 0 or 1
514 // as `end :== back + 1`.
515 result = {CopyRaw(length), kCopied};
516 constexpr int shift = edge_type == kFront ? 1 : 0;
517 for (CordRep* r : Edges(begin() + shift, back() + shift)) {
518 CordRep::Ref(r);
519 }
520 }
521 result.tree->edges_[idx] = edge;
522 result.tree->length += delta;
523 return result;
524 }
525
526 template <EdgeType edge_type>
AddCordRep(CordRepBtree * tree,CordRep * rep)527 CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
528 const int depth = tree->height();
529 const size_t length = rep->length;
530 StackOperations<edge_type> ops;
531 CordRepBtree* leaf = ops.BuildStack(tree, depth);
532 const OpResult result =
533 leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
534 return ops.Unwind(tree, depth, length, result);
535 }
536
537 template <>
NewLeaf(absl::string_view data,size_t extra)538 CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
539 size_t extra) {
540 CordRepBtree* leaf = CordRepBtree::New(0);
541 size_t length = 0;
542 size_t end = 0;
543 const size_t cap = leaf->capacity();
544 while (!data.empty() && end != cap) {
545 auto* flat = CordRepFlat::New(data.length() + extra);
546 flat->length = (std::min)(data.length(), flat->Capacity());
547 length += flat->length;
548 leaf->edges_[end++] = flat;
549 data = Consume<kBack>(flat->Data(), data, flat->length);
550 }
551 leaf->length = length;
552 leaf->set_end(end);
553 return leaf;
554 }
555
556 template <>
NewLeaf(absl::string_view data,size_t extra)557 CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
558 size_t extra) {
559 CordRepBtree* leaf = CordRepBtree::New(0);
560 size_t length = 0;
561 size_t begin = leaf->capacity();
562 leaf->set_end(leaf->capacity());
563 while (!data.empty() && begin != 0) {
564 auto* flat = CordRepFlat::New(data.length() + extra);
565 flat->length = (std::min)(data.length(), flat->Capacity());
566 length += flat->length;
567 leaf->edges_[--begin] = flat;
568 data = Consume<kFront>(flat->Data(), data, flat->length);
569 }
570 leaf->length = length;
571 leaf->set_begin(begin);
572 return leaf;
573 }
574
575 template <>
AddData(absl::string_view data,size_t extra)576 absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
577 size_t extra) {
578 assert(!data.empty());
579 assert(size() < capacity());
580 AlignBegin();
581 const size_t cap = capacity();
582 do {
583 CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
584 const size_t n = (std::min)(data.length(), flat->Capacity());
585 flat->length = n;
586 edges_[fetch_add_end(1)] = flat;
587 data = Consume<kBack>(flat->Data(), data, n);
588 } while (!data.empty() && end() != cap);
589 return data;
590 }
591
592 template <>
AddData(absl::string_view data,size_t extra)593 absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
594 size_t extra) {
595 assert(!data.empty());
596 assert(size() < capacity());
597 AlignEnd();
598 do {
599 CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
600 const size_t n = (std::min)(data.length(), flat->Capacity());
601 flat->length = n;
602 edges_[sub_fetch_begin(1)] = flat;
603 data = Consume<kFront>(flat->Data(), data, n);
604 } while (!data.empty() && begin() != 0);
605 return data;
606 }
607
608 template <EdgeType edge_type>
AddData(CordRepBtree * tree,absl::string_view data,size_t extra)609 CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
610 size_t extra) {
611 if (ABSL_PREDICT_FALSE(data.empty())) return tree;
612
613 const size_t original_data_size = data.size();
614 int depth = tree->height();
615 StackOperations<edge_type> ops;
616 CordRepBtree* leaf = ops.BuildStack(tree, depth);
617
618 // If there is capacity in the last edge, append as much data
619 // as possible into this last edge.
620 if (leaf->size() < leaf->capacity()) {
621 OpResult result = leaf->ToOpResult(ops.owned(depth));
622 data = result.tree->AddData<edge_type>(data, extra);
623 if (data.empty()) {
624 result.tree->length += original_data_size;
625 return ops.Unwind(tree, depth, original_data_size, result);
626 }
627
628 // We added some data into this leaf, but not all. Propagate the added
629 // length to the top most node, and rebuild the stack with any newly copied
630 // or updated nodes. From this point on, the path (leg) from the top most
631 // node to the right-most node towards the leaf node is privately owned.
632 size_t delta = original_data_size - data.size();
633 assert(delta > 0);
634 result.tree->length += delta;
635 tree = ops.Propagate(tree, depth, delta, result);
636 ops.share_depth = depth + 1;
637 }
638
639 // We were unable to append all data into the existing right-most leaf node.
640 // This means all remaining data must be put into (a) new leaf node(s) which
641 // we append to the tree. To make this efficient, we iteratively build full
642 // leaf nodes from `data` until the created leaf contains all remaining data.
643 // We utilize the `Unwind` method to merge the created leaf into the first
644 // level towards root that has capacity. On each iteration with remaining
645 // data, we rebuild the stack in the knowledge that right-most nodes are
646 // privately owned after the first `Unwind` completes.
647 for (;;) {
648 OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
649 if (result.tree->length == data.size()) {
650 return ops.Unwind(tree, depth, result.tree->length, result);
651 }
652 data = Consume<edge_type>(data, result.tree->length);
653 tree = ops.Unwind(tree, depth, result.tree->length, result);
654 depth = tree->height();
655 ops.BuildOwnedStack(tree, depth);
656 }
657 }
658
659 template <EdgeType edge_type>
Merge(CordRepBtree * dst,CordRepBtree * src)660 CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
661 assert(dst->height() >= src->height());
662
663 // Capture source length as we may consume / destroy `src`.
664 const size_t length = src->length;
665
666 // We attempt to merge `src` at its corresponding height in `dst`.
667 const int depth = dst->height() - src->height();
668 StackOperations<edge_type> ops;
669 CordRepBtree* merge_node = ops.BuildStack(dst, depth);
670
671 // If there is enough space in `merge_node` for all edges from `src`, add all
672 // edges to this node, making a fresh copy as needed if not privately owned.
673 // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
674 // `Finalize` to merge `src` into the first level towards `root` where there
675 // is capacity for another edge, or create a new top level node.
676 OpResult result;
677 if (merge_node->size() + src->size() <= kMaxCapacity) {
678 result = merge_node->ToOpResult(ops.owned(depth));
679 result.tree->Add<edge_type>(src->Edges());
680 result.tree->length += src->length;
681 if (src->refcount.IsOne()) {
682 Delete(src);
683 } else {
684 for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
685 CordRepBtree::Unref(src);
686 }
687 } else {
688 result = {src, kPopped};
689 }
690
691 // Unless we merged at the top level (i.e.: src and dst are equal height),
692 // unwind the result towards the top level, and finalize the result.
693 if (depth) {
694 return ops.Unwind(dst, depth, length, result);
695 }
696 return ops.Finalize(dst, result);
697 }
698
CopySuffix(size_t offset)699 CopyResult CordRepBtree::CopySuffix(size_t offset) {
700 assert(offset < this->length);
701
702 // As long as `offset` starts inside the last edge, we can 'drop' the current
703 // depth. For the most extreme example: if offset references the last data
704 // edge in the tree, there is only a single edge / path from the top of the
705 // tree to that last edge, so we can drop all the nodes except that edge.
706 // The fast path check for this is `back->length >= length - offset`.
707 int height = this->height();
708 CordRepBtree* node = this;
709 size_t len = node->length - offset;
710 CordRep* back = node->Edge(kBack);
711 while (back->length >= len) {
712 offset = back->length - len;
713 if (--height < 0) {
714 return {MakeSubstring(CordRep::Ref(back), offset), height};
715 }
716 node = back->btree();
717 back = node->Edge(kBack);
718 }
719 if (offset == 0) return {CordRep::Ref(node), height};
720
721 // Offset does not point into the last edge, so we span at least two edges.
722 // Find the index of offset with `IndexBeyond` which provides us the edge
723 // 'beyond' the offset if offset is not a clean starting point of an edge.
724 Position pos = node->IndexBeyond(offset);
725 CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
726 const CopyResult result = {sub, height};
727
728 // `pos.n` contains a non zero value if the offset is not an exact starting
729 // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
730 // bytes of the edge preceding that in `pos.index`. We need to iteratively
731 // adjust the preceding edge with the 'broken' offset until we have a perfect
732 // start of the edge.
733 while (pos.n != 0) {
734 assert(pos.index >= 1);
735 const size_t begin = pos.index - 1;
736 sub->set_begin(begin);
737 CordRep* const edge = node->Edge(begin);
738
739 len = pos.n;
740 offset = edge->length - len;
741
742 if (--height < 0) {
743 sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
744 return result;
745 }
746
747 node = edge->btree();
748 pos = node->IndexBeyond(offset);
749
750 CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
751 sub->edges_[begin] = nsub;
752 sub = nsub;
753 }
754 sub->set_begin(pos.index);
755 return result;
756 }
757
CopyPrefix(size_t n,bool allow_folding)758 CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) {
759 assert(n > 0);
760 assert(n <= this->length);
761
762 // As long as `n` does not exceed the length of the first edge, we can 'drop'
763 // the current depth. For the most extreme example: if we'd copy a 1 byte
764 // prefix from a tree, there is only a single edge / path from the top of the
765 // tree to the single data edge containing this byte, so we can drop all the
766 // nodes except the data node.
767 int height = this->height();
768 CordRepBtree* node = this;
769 CordRep* front = node->Edge(kFront);
770 if (allow_folding) {
771 while (front->length >= n) {
772 if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
773 node = front->btree();
774 front = node->Edge(kFront);
775 }
776 }
777 if (node->length == n) return {CordRep::Ref(node), height};
778
779 // `n` spans at least two nodes, find the end point of the span.
780 Position pos = node->IndexOf(n);
781
782 // Create a partial copy of the node up to `pos.index`, with a defined length
783 // of `n`. Any 'partial last edge' is added further below as needed.
784 CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
785 const CopyResult result = {sub, height};
786
787 // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
788 // this is not zero, we don't have a 'clean cut', so we need to make a
789 // (partial) copy of that last edge, and repeat this until pos.n is zero.
790 while (pos.n != 0) {
791 size_t end = pos.index;
792 n = pos.n;
793
794 CordRep* edge = node->Edge(pos.index);
795 if (--height < 0) {
796 sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
797 sub->set_end(end);
798 AssertValid(result.edge->btree());
799 return result;
800 }
801
802 node = edge->btree();
803 pos = node->IndexOf(n);
804 CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
805 sub->edges_[end++] = nsub;
806 sub->set_end(end);
807 sub = nsub;
808 }
809 sub->set_end(pos.index);
810 AssertValid(result.edge->btree());
811 return result;
812 }
813
ExtractFront(CordRepBtree * tree)814 CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) {
815 CordRep* front = tree->Edge(tree->begin());
816 if (tree->refcount.IsOne()) {
817 Unref(tree->Edges(tree->begin() + 1, tree->end()));
818 CordRepBtree::Delete(tree);
819 } else {
820 CordRep::Ref(front);
821 CordRep::Unref(tree);
822 }
823 return front;
824 }
825
ConsumeBeginTo(CordRepBtree * tree,size_t end,size_t new_length)826 CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end,
827 size_t new_length) {
828 assert(end <= tree->end());
829 if (tree->refcount.IsOne()) {
830 Unref(tree->Edges(end, tree->end()));
831 tree->set_end(end);
832 tree->length = new_length;
833 } else {
834 CordRepBtree* old = tree;
835 tree = tree->CopyBeginTo(end, new_length);
836 CordRep::Unref(old);
837 }
838 return tree;
839 }
840
RemoveSuffix(CordRepBtree * tree,size_t n)841 CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) {
842 // Check input and deal with trivial cases 'Remove all/none'
843 assert(tree != nullptr);
844 assert(n <= tree->length);
845 const size_t len = tree->length;
846 if (ABSL_PREDICT_FALSE(n == 0)) {
847 return tree;
848 }
849 if (ABSL_PREDICT_FALSE(n >= len)) {
850 CordRepBtree::Unref(tree);
851 return nullptr;
852 }
853
854 size_t length = len - n;
855 int height = tree->height();
856 bool is_mutable = tree->refcount.IsOne();
857
858 // Extract all top nodes which are reduced to size = 1
859 Position pos = tree->IndexOfLength(length);
860 while (pos.index == tree->begin()) {
861 CordRep* edge = ExtractFront(tree);
862 is_mutable &= edge->refcount.IsOne();
863 if (height-- == 0) return ResizeEdge(edge, length, is_mutable);
864 tree = edge->btree();
865 pos = tree->IndexOfLength(length);
866 }
867
868 // Repeat the following sequence traversing down the tree:
869 // - Crop the top node to the 'last remaining edge' adjusting length.
870 // - Set the length for down edges to the partial length in that last edge.
871 // - Repeat this until the last edge is 'included in full'
872 // - If we hit the data edge level, resize and return the last data edge
873 CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length);
874 CordRep* edge = tree->Edge(pos.index);
875 length = pos.n;
876 while (length != edge->length) {
877 // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy.
878 assert(tree->refcount.IsOne());
879 const bool edge_is_mutable = edge->refcount.IsOne();
880
881 if (height-- == 0) {
882 tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable);
883 return AssertValid(top);
884 }
885
886 if (!edge_is_mutable) {
887 // We can't 'in place' remove any suffixes down this edge.
888 // Replace this edge with a prefix copy instead.
889 tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge;
890 CordRep::Unref(edge);
891 return AssertValid(top);
892 }
893
894 // Move down one level, rinse repeat.
895 tree = edge->btree();
896 pos = tree->IndexOfLength(length);
897 tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length);
898 edge = tree->Edge(pos.index);
899 length = pos.n;
900 }
901
902 return AssertValid(top);
903 }
904
SubTree(size_t offset,size_t n)905 CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
906 assert(n <= this->length);
907 assert(offset <= this->length - n);
908 if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
909
910 CordRepBtree* node = this;
911 int height = node->height();
912 Position front = node->IndexOf(offset);
913 CordRep* left = node->edges_[front.index];
914 while (front.n + n <= left->length) {
915 if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
916 node = left->btree();
917 front = node->IndexOf(front.n);
918 left = node->edges_[front.index];
919 }
920
921 const Position back = node->IndexBefore(front, n);
922 CordRep* const right = node->edges_[back.index];
923 assert(back.index > front.index);
924
925 // Get partial suffix and prefix entries.
926 CopyResult prefix;
927 CopyResult suffix;
928 if (height > 0) {
929 // Copy prefix and suffix of the boundary nodes.
930 prefix = left->btree()->CopySuffix(front.n);
931 suffix = right->btree()->CopyPrefix(back.n);
932
933 // If there is an edge between the prefix and suffix edges, then the tree
934 // must remain at its previous (full) height. If we have no edges between
935 // prefix and suffix edges, then the tree must be as high as either the
936 // suffix or prefix edges (which are collapsed to their minimum heights).
937 if (front.index + 1 == back.index) {
938 height = (std::max)(prefix.height, suffix.height) + 1;
939 }
940
941 // Raise prefix and suffixes to the new tree height.
942 for (int h = prefix.height + 1; h < height; ++h) {
943 prefix.edge = CordRepBtree::New(prefix.edge);
944 }
945 for (int h = suffix.height + 1; h < height; ++h) {
946 suffix.edge = CordRepBtree::New(suffix.edge);
947 }
948 } else {
949 // Leaf node, simply take substrings for prefix and suffix.
950 prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
951 suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
952 }
953
954 // Compose resulting tree.
955 CordRepBtree* sub = CordRepBtree::New(height);
956 size_t end = 0;
957 sub->edges_[end++] = prefix.edge;
958 for (CordRep* r : node->Edges(front.index + 1, back.index)) {
959 sub->edges_[end++] = CordRep::Ref(r);
960 }
961 sub->edges_[end++] = suffix.edge;
962 sub->set_end(end);
963 sub->length = n;
964 return AssertValid(sub);
965 }
966
MergeTrees(CordRepBtree * left,CordRepBtree * right)967 CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
968 CordRepBtree* right) {
969 return left->height() >= right->height() ? Merge<kBack>(left, right)
970 : Merge<kFront>(right, left);
971 }
972
IsFlat(absl::string_view * fragment) const973 bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
974 if (height() == 0 && size() == 1) {
975 if (fragment) *fragment = Data(begin());
976 return true;
977 }
978 return false;
979 }
980
IsFlat(size_t offset,const size_t n,absl::string_view * fragment) const981 bool CordRepBtree::IsFlat(size_t offset, const size_t n,
982 absl::string_view* fragment) const {
983 assert(n <= this->length);
984 assert(offset <= this->length - n);
985 if (ABSL_PREDICT_FALSE(n == 0)) return false;
986 int height = this->height();
987 const CordRepBtree* node = this;
988 for (;;) {
989 const Position front = node->IndexOf(offset);
990 const CordRep* edge = node->Edge(front.index);
991 if (edge->length < front.n + n) return false;
992 if (--height < 0) {
993 if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
994 return true;
995 }
996 offset = front.n;
997 node = node->Edge(front.index)->btree();
998 }
999 }
1000
GetCharacter(size_t offset) const1001 char CordRepBtree::GetCharacter(size_t offset) const {
1002 assert(offset < length);
1003 const CordRepBtree* node = this;
1004 int height = node->height();
1005 for (;;) {
1006 Position front = node->IndexOf(offset);
1007 if (--height < 0) return node->Data(front.index)[front.n];
1008 offset = front.n;
1009 node = node->Edge(front.index)->btree();
1010 }
1011 }
1012
GetAppendBufferSlow(size_t size)1013 Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
1014 // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
1015 assert(height() >= 4);
1016 assert(refcount.IsOne());
1017
1018 // Build a stack of nodes we may potentially need to update if we find a
1019 // non-shared FLAT with capacity at the leaf level.
1020 const int depth = height();
1021 CordRepBtree* node = this;
1022 CordRepBtree* stack[kMaxDepth];
1023 for (int i = 0; i < depth; ++i) {
1024 node = node->Edge(kBack)->btree();
1025 if (!node->refcount.IsOne()) return {};
1026 stack[i] = node;
1027 }
1028
1029 // Must be a privately owned, mutable flat.
1030 CordRep* const edge = node->Edge(kBack);
1031 if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
1032
1033 // Must have capacity.
1034 const size_t avail = edge->flat()->Capacity() - edge->length;
1035 if (avail == 0) return {};
1036
1037 // Build span on remaining capacity.
1038 size_t delta = (std::min)(size, avail);
1039 Span<char> span = {edge->flat()->Data() + edge->length, delta};
1040 edge->length += delta;
1041 this->length += delta;
1042 for (int i = 0; i < depth; ++i) {
1043 stack[i]->length += delta;
1044 }
1045 return span;
1046 }
1047
CreateSlow(CordRep * rep)1048 CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
1049 if (rep->IsBtree()) return rep->btree();
1050
1051 CordRepBtree* node = nullptr;
1052 auto consume = [&node](CordRep* r, size_t offset, size_t length) {
1053 r = MakeSubstring(r, offset, length);
1054 if (node == nullptr) {
1055 node = New(r);
1056 } else {
1057 node = CordRepBtree::AddCordRep<kBack>(node, r);
1058 }
1059 };
1060 Consume(rep, consume);
1061 return node;
1062 }
1063
AppendSlow(CordRepBtree * tree,CordRep * rep)1064 CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
1065 if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
1066 return MergeTrees(tree, rep->btree());
1067 }
1068 auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
1069 r = MakeSubstring(r, offset, length);
1070 tree = CordRepBtree::AddCordRep<kBack>(tree, r);
1071 };
1072 Consume(rep, consume);
1073 return tree;
1074 }
1075
PrependSlow(CordRepBtree * tree,CordRep * rep)1076 CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
1077 if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
1078 return MergeTrees(rep->btree(), tree);
1079 }
1080 auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
1081 r = MakeSubstring(r, offset, length);
1082 tree = CordRepBtree::AddCordRep<kFront>(tree, r);
1083 };
1084 ReverseConsume(rep, consume);
1085 return tree;
1086 }
1087
Append(CordRepBtree * tree,absl::string_view data,size_t extra)1088 CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
1089 size_t extra) {
1090 return CordRepBtree::AddData<kBack>(tree, data, extra);
1091 }
1092
Prepend(CordRepBtree * tree,absl::string_view data,size_t extra)1093 CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
1094 size_t extra) {
1095 return CordRepBtree::AddData<kFront>(tree, data, extra);
1096 }
1097
1098 template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
1099 CordRep* rep);
1100 template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
1101 CordRep* rep);
1102 template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
1103 absl::string_view data,
1104 size_t extra);
1105 template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
1106 absl::string_view data,
1107 size_t extra);
1108
Rebuild(CordRepBtree ** stack,CordRepBtree * tree,bool consume)1109 void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree,
1110 bool consume) {
1111 bool owned = consume && tree->refcount.IsOne();
1112 if (tree->height() == 0) {
1113 for (CordRep* edge : tree->Edges()) {
1114 if (!owned) edge = CordRep::Ref(edge);
1115 size_t height = 0;
1116 size_t length = edge->length;
1117 CordRepBtree* node = stack[0];
1118 OpResult result = node->AddEdge<kBack>(true, edge, length);
1119 while (result.action == CordRepBtree::kPopped) {
1120 stack[height] = result.tree;
1121 if (stack[++height] == nullptr) {
1122 result.action = CordRepBtree::kSelf;
1123 stack[height] = CordRepBtree::New(node, result.tree);
1124 } else {
1125 node = stack[height];
1126 result = node->AddEdge<kBack>(true, result.tree, length);
1127 }
1128 }
1129 while (stack[++height] != nullptr) {
1130 stack[height]->length += length;
1131 }
1132 }
1133 } else {
1134 for (CordRep* rep : tree->Edges()) {
1135 Rebuild(stack, rep->btree(), owned);
1136 }
1137 }
1138 if (consume) {
1139 if (owned) {
1140 CordRepBtree::Delete(tree);
1141 } else {
1142 CordRepBtree::Unref(tree);
1143 }
1144 }
1145 }
1146
Rebuild(CordRepBtree * tree)1147 CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) {
1148 // Set up initial stack with empty leaf node.
1149 CordRepBtree* node = CordRepBtree::New();
1150 CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node};
1151
1152 // Recursively build the tree, consuming the input tree.
1153 Rebuild(stack, tree, /* consume reference */ true);
1154
1155 // Return top most node
1156 for (CordRepBtree* parent : stack) {
1157 if (parent == nullptr) return node;
1158 node = parent;
1159 }
1160
1161 // Unreachable
1162 assert(false);
1163 return nullptr;
1164 }
1165
ExtractAppendBuffer(CordRepBtree * tree,size_t extra_capacity)1166 CordRepBtree::ExtractResult CordRepBtree::ExtractAppendBuffer(
1167 CordRepBtree* tree, size_t extra_capacity) {
1168 int depth = 0;
1169 NodeStack stack;
1170
1171 // Set up default 'no success' result which is {tree, nullptr}.
1172 ExtractResult result;
1173 result.tree = tree;
1174 result.extracted = nullptr;
1175
1176 // Dive down the right side of the tree, making sure no edges are shared.
1177 while (tree->height() > 0) {
1178 if (!tree->refcount.IsOne()) return result;
1179 stack[depth++] = tree;
1180 tree = tree->Edge(kBack)->btree();
1181 }
1182 if (!tree->refcount.IsOne()) return result;
1183
1184 // Validate we ended on a non shared flat.
1185 CordRep* rep = tree->Edge(kBack);
1186 if (!(rep->IsFlat() && rep->refcount.IsOne())) return result;
1187
1188 // Verify it has at least the requested extra capacity.
1189 CordRepFlat* flat = rep->flat();
1190 const size_t length = flat->length;
1191 const size_t avail = flat->Capacity() - flat->length;
1192 if (extra_capacity > avail) return result;
1193
1194 // Set the extracted flat in the result.
1195 result.extracted = flat;
1196
1197 // Cascading delete all nodes that become empty.
1198 while (tree->size() == 1) {
1199 CordRepBtree::Delete(tree);
1200 if (--depth < 0) {
1201 // We consumed the entire tree: return nullptr for new tree.
1202 result.tree = nullptr;
1203 return result;
1204 }
1205 rep = tree;
1206 tree = stack[depth];
1207 }
1208
1209 // Remove the edge or cascaded up parent node.
1210 tree->set_end(tree->end() - 1);
1211 tree->length -= length;
1212
1213 // Adjust lengths up the tree.
1214 while (depth > 0) {
1215 tree = stack[--depth];
1216 tree->length -= length;
1217 }
1218
1219 // Remove unnecessary top nodes with size = 1. This may iterate all the way
1220 // down to the leaf node in which case we simply return the remaining last
1221 // edge in that node and the extracted flat.
1222 while (tree->size() == 1) {
1223 int height = tree->height();
1224 rep = tree->Edge(kBack);
1225 Delete(tree);
1226 if (height == 0) {
1227 // We consumed the leaf: return the sole data edge as the new tree.
1228 result.tree = rep;
1229 return result;
1230 }
1231 tree = rep->btree();
1232 }
1233
1234 // Done: return the (new) top level node and extracted flat.
1235 result.tree = tree;
1236 return result;
1237 }
1238
1239 } // namespace cord_internal
1240 ABSL_NAMESPACE_END
1241 } // namespace absl
1242