• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/cord_rep_btree.h"
16 
17 #include <cassert>
18 #include <cstdint>
19 #include <iostream>
20 #include <string>
21 
22 #include "absl/base/attributes.h"
23 #include "absl/base/config.h"
24 #include "absl/base/internal/raw_logging.h"
25 #include "absl/strings/internal/cord_internal.h"
26 #include "absl/strings/internal/cord_rep_consume.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/string_view.h"
30 
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34 
35 constexpr size_t CordRepBtree::kMaxCapacity;  // NOLINT: needed for c++ < c++17
36 
37 namespace {
38 
39 using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
40 using EdgeType = CordRepBtree::EdgeType;
41 using OpResult = CordRepBtree::OpResult;
42 using CopyResult = CordRepBtree::CopyResult;
43 
44 constexpr auto kFront = CordRepBtree::kFront;
45 constexpr auto kBack = CordRepBtree::kBack;
46 
exhaustive_validation()47 inline bool exhaustive_validation() {
48   return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
49 }
50 
51 // Implementation of the various 'Dump' functions.
52 // Prints the entire tree structure or 'rep'. External callers should
53 // not specify 'depth' and leave it to its default (0) value.
54 // Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
DumpAll(const CordRep * rep,bool include_contents,std::ostream & stream,int depth=0)55 void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
56              int depth = 0) {
57   // Allow for full height trees + substring -> flat / external nodes.
58   assert(depth <= CordRepBtree::kMaxDepth + 2);
59   std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
60                             ? std::string("Private")
61                             : absl::StrCat("Shared(", rep->refcount.Get(), ")");
62   std::string sptr = absl::StrCat("0x", absl::Hex(rep));
63 
64   // Dumps the data contents of `rep` if `include_contents` is true.
65   // Always emits a new line character.
66   auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
67     if (include_contents) {
68       // Allow for up to 60 wide display of content data, which with some
69       // indentation and prefix / labels keeps us within roughly 80-100 wide.
70       constexpr size_t kMaxDataLength = 60;
71       stream << ", data = \""
72              << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
73              << (r->length > kMaxDataLength ? "\"..." : "\"");
74     }
75     stream << '\n';
76   };
77 
78   // For each level, we print the 'shared/private' state and the rep pointer,
79   // indented by two spaces per recursive depth.
80   stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
81 
82   if (rep->IsBtree()) {
83     const CordRepBtree* node = rep->btree();
84     std::string label =
85         node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
86     stream << label << ", len = " << node->length
87            << ", begin = " << node->begin() << ", end = " << node->end()
88            << "\n";
89     for (CordRep* edge : node->Edges()) {
90       DumpAll(edge, include_contents, stream, depth + 1);
91     }
92   } else if (rep->tag == SUBSTRING) {
93     const CordRepSubstring* substring = rep->substring();
94     stream << "Substring, len = " << rep->length
95            << ", start = " << substring->start;
96     maybe_dump_data(rep);
97     DumpAll(substring->child, include_contents, stream, depth + 1);
98   } else if (rep->tag >= FLAT) {
99     stream << "Flat, len = " << rep->length;
100     maybe_dump_data(rep);
101   } else if (rep->tag == EXTERNAL) {
102     stream << "Extn, len = " << rep->length;
103     maybe_dump_data(rep);
104   }
105 }
106 
107 // TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
108 // small data out of large reps, and general efficiency of 'always copy small
109 // data'. Consider making this a cord rep internal library function.
CreateSubstring(CordRep * rep,size_t offset,size_t n)110 CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
111   assert(n != 0);
112   assert(offset + n <= rep->length);
113   assert(offset != 0 || n != rep->length);
114 
115   if (rep->tag == SUBSTRING) {
116     CordRepSubstring* substring = rep->substring();
117     offset += substring->start;
118     rep = CordRep::Ref(substring->child);
119     CordRep::Unref(substring);
120   }
121   CordRepSubstring* substring = new CordRepSubstring();
122   substring->length = n;
123   substring->tag = SUBSTRING;
124   substring->start = offset;
125   substring->child = rep;
126   return substring;
127 }
128 
129 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset,size_t n)130 inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
131   if (n == rep->length) return rep;
132   if (n == 0) return CordRep::Unref(rep), nullptr;
133   return CreateSubstring(rep, offset, n);
134 }
135 
136 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset)137 inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
138   if (offset == 0) return rep;
139   return CreateSubstring(rep, offset, rep->length - offset);
140 }
141 
142 template <EdgeType edge_type>
Consume(absl::string_view s,size_t n)143 inline absl::string_view Consume(absl::string_view s, size_t n) {
144   return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
145 }
146 
147 template <EdgeType edge_type>
Consume(char * dst,absl::string_view s,size_t n)148 inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
149   if (edge_type == kBack) {
150     memcpy(dst, s.data(), n);
151     return s.substr(n);
152   } else {
153     const size_t offset = s.size() - n;
154     memcpy(dst, s.data() + offset, n);
155     return s.substr(0, offset);
156   }
157 }
158 
159 // Known issue / optimization weirdness: the store associated with the
160 // decrement introduces traffic between cpus (even if the result of that
161 // traffic does nothing), making this faster than a single call to
162 // refcount.Decrement() checking the zero refcount condition.
163 template <typename R, typename Fn>
FastUnref(R * r,Fn && fn)164 inline void FastUnref(R* r, Fn&& fn) {
165   if (r->refcount.IsOne()) {
166     fn(r);
167   } else if (!r->refcount.DecrementExpectHighRefcount()) {
168     fn(r);
169   }
170 }
171 
172 // Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
173 // node, or a SUBSTRING of an EXTERNAL or FLAT node.
DeleteLeafEdge(CordRep * rep)174 void DeleteLeafEdge(CordRep* rep) {
175   for (;;) {
176     if (rep->tag >= FLAT) {
177       CordRepFlat::Delete(rep->flat());
178       return;
179     }
180     if (rep->tag == EXTERNAL) {
181       CordRepExternal::Delete(rep->external());
182       return;
183     }
184     assert(rep->tag == SUBSTRING);
185     CordRepSubstring* substring = rep->substring();
186     rep = substring->child;
187     assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
188     delete substring;
189     if (rep->refcount.Decrement()) return;
190   }
191 }
192 
193 // StackOperations contains the logic to build a left-most or right-most stack
194 // (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
195 // propagate node changes up the stack.
196 template <EdgeType edge_type>
197 struct StackOperations {
198   // Returns true if the node at 'depth' is not shared, i.e. has a refcount
199   // of one and all of its parent nodes have a refcount of one.
ownedabsl::cord_internal::__anon13e13ff00111::StackOperations200   inline bool owned(int depth) const { return depth < share_depth; }
201 
202   // Returns the node at 'depth'.
nodeabsl::cord_internal::__anon13e13ff00111::StackOperations203   inline CordRepBtree* node(int depth) const { return stack[depth]; }
204 
205   // Builds a `depth` levels deep stack starting at `tree` recording which nodes
206   // are private in the form of the 'share depth' where nodes are shared.
BuildStackabsl::cord_internal::__anon13e13ff00111::StackOperations207   inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
208     assert(depth <= tree->height());
209     int current_depth = 0;
210     while (current_depth < depth && tree->refcount.IsOne()) {
211       stack[current_depth++] = tree;
212       tree = tree->Edge(edge_type)->btree();
213     }
214     share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
215     while (current_depth < depth) {
216       stack[current_depth++] = tree;
217       tree = tree->Edge(edge_type)->btree();
218     }
219     return tree;
220   }
221 
222   // Builds a stack with the invariant that all nodes are private owned / not
223   // shared. This is used in iterative updates where a previous propagation
224   // guaranteed all nodes are owned / private.
BuildOwnedStackabsl::cord_internal::__anon13e13ff00111::StackOperations225   inline void BuildOwnedStack(CordRepBtree* tree, int height) {
226     assert(height <= CordRepBtree::kMaxHeight);
227     int depth = 0;
228     while (depth < height) {
229       assert(tree->refcount.IsOne());
230       stack[depth++] = tree;
231       tree = tree->Edge(edge_type)->btree();
232     }
233     assert(tree->refcount.IsOne());
234     share_depth = depth + 1;
235   }
236 
237   // Processes the final 'top level' result action for the tree.
238   // See the 'Action' enum for the various action implications.
Finalizeabsl::cord_internal::__anon13e13ff00111::StackOperations239   static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
240     switch (result.action) {
241       case CordRepBtree::kPopped:
242         if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
243           ABSL_RAW_LOG(FATAL, "Max height exceeded");
244         }
245         return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
246                                   : CordRepBtree::New(result.tree, tree);
247       case CordRepBtree::kCopied:
248         CordRep::Unref(tree);
249         ABSL_FALLTHROUGH_INTENDED;
250       case CordRepBtree::kSelf:
251         return result.tree;
252     }
253     ABSL_INTERNAL_UNREACHABLE;
254     return result.tree;
255   }
256 
257   // Propagate the action result in 'result' up into all nodes of the stack
258   // starting at depth 'depth'. 'length' contains the extra length of data that
259   // was added at the lowest level, and is updated into all nodes of the stack.
260   // See the 'Action' enum for the various action implications.
261   // If 'propagate' is true, then any copied node values are updated into the
262   // stack, which is used for iterative processing on the same stack.
263   template <bool propagate = false>
Unwindabsl::cord_internal::__anon13e13ff00111::StackOperations264   inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
265                               OpResult result) {
266     // TODO(mvels): revisit the below code to check if 3 loops with 3
267     // (incremental) conditions is faster than 1 loop with a switch.
268     // Benchmarking and perf recordings indicate the loop with switch is
269     // fastest, likely because of indirect jumps on the tight case values and
270     // dense branches. But it's worth considering 3 loops, as the `action`
271     // transitions are mono directional. E.g.:
272     //   while (action == kPopped) {
273     //     ...
274     //   }
275     //   while (action == kCopied) {
276     //     ...
277     //   }
278     //   ...
279     // We also  found that an "if () do {}" loop here seems faster, possibly
280     // because it allows the branch predictor more granular heuristics on
281     // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
282     // which appear to be the most common use cases.
283     if (depth != 0) {
284       do {
285         CordRepBtree* node = stack[--depth];
286         const bool owned = depth < share_depth;
287         switch (result.action) {
288           case CordRepBtree::kPopped:
289             assert(!propagate);
290             result = node->AddEdge<edge_type>(owned, result.tree, length);
291             break;
292           case CordRepBtree::kCopied:
293             result = node->SetEdge<edge_type>(owned, result.tree, length);
294             if (propagate) stack[depth] = result.tree;
295             break;
296           case CordRepBtree::kSelf:
297             node->length += length;
298             while (depth > 0) {
299               node = stack[--depth];
300               node->length += length;
301             }
302             return node;
303         }
304       } while (depth > 0);
305     }
306     return Finalize(tree, result);
307   }
308 
309   // Invokes `Unwind` with `propagate=true` to update the stack node values.
Propagateabsl::cord_internal::__anon13e13ff00111::StackOperations310   inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
311                                  OpResult result) {
312     return Unwind</*propagate=*/true>(tree, depth, length, result);
313   }
314 
315   // `share_depth` contains the depth at which the nodes in the stack become
316   // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
317   // then `share_depth` is 0. If the 2nd node is shared (and implicitly all
318   // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
319   // than the depth of the stack indicates that none of the nodes in the stack
320   // are shared.
321   int share_depth;
322 
323   NodeStack stack;
324 };
325 
326 }  // namespace
327 
Dump(const CordRep * rep,absl::string_view label,bool include_contents,std::ostream & stream)328 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
329                         bool include_contents, std::ostream& stream) {
330   stream << "===================================\n";
331   if (!label.empty()) {
332     stream << label << '\n';
333     stream << "-----------------------------------\n";
334   }
335   if (rep) {
336     DumpAll(rep, include_contents, stream);
337   } else {
338     stream << "NULL\n";
339   }
340 }
341 
Dump(const CordRep * rep,absl::string_view label,std::ostream & stream)342 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
343                         std::ostream& stream) {
344   Dump(rep, label, false, stream);
345 }
346 
Dump(const CordRep * rep,std::ostream & stream)347 void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
348   Dump(rep, absl::string_view(), false, stream);
349 }
350 
DestroyLeaf(CordRepBtree * tree,size_t begin,size_t end)351 void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
352   for (CordRep* edge : tree->Edges(begin, end)) {
353     FastUnref(edge, DeleteLeafEdge);
354   }
355   Delete(tree);
356 }
357 
DestroyNonLeaf(CordRepBtree * tree,size_t begin,size_t end)358 void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
359                                   size_t end) {
360   for (CordRep* edge : tree->Edges(begin, end)) {
361     FastUnref(edge->btree(), Destroy);
362   }
363   Delete(tree);
364 }
365 
IsValid(const CordRepBtree * tree,bool shallow)366 bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
367 #define NODE_CHECK_VALID(x)                                           \
368   if (!(x)) {                                                         \
369     ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
370     return false;                                                     \
371   }
372 #define NODE_CHECK_EQ(x, y)                                                    \
373   if ((x) != (y)) {                                                            \
374     ABSL_RAW_LOG(ERROR,                                                        \
375                  "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
376                  #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str());        \
377     return false;                                                              \
378   }
379 
380   NODE_CHECK_VALID(tree != nullptr);
381   NODE_CHECK_VALID(tree->IsBtree());
382   NODE_CHECK_VALID(tree->height() <= kMaxHeight);
383   NODE_CHECK_VALID(tree->begin() < tree->capacity());
384   NODE_CHECK_VALID(tree->end() <= tree->capacity());
385   NODE_CHECK_VALID(tree->begin() <= tree->end());
386   size_t child_length = 0;
387   for (CordRep* edge : tree->Edges()) {
388     NODE_CHECK_VALID(edge != nullptr);
389     if (tree->height() > 0) {
390       NODE_CHECK_VALID(edge->IsBtree());
391       NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
392     } else {
393       NODE_CHECK_VALID(IsDataEdge(edge));
394     }
395     child_length += edge->length;
396   }
397   NODE_CHECK_EQ(child_length, tree->length);
398   if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
399     for (CordRep* edge : tree->Edges()) {
400       if (!IsValid(edge->btree(), shallow)) return false;
401     }
402   }
403   return true;
404 
405 #undef NODE_CHECK_VALID
406 #undef NODE_CHECK_EQ
407 }
408 
409 #ifndef NDEBUG
410 
AssertValid(CordRepBtree * tree,bool shallow)411 CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
412   if (!IsValid(tree, shallow)) {
413     Dump(tree, "CordRepBtree validation failed:", false, std::cout);
414     ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
415   }
416   return tree;
417 }
418 
AssertValid(const CordRepBtree * tree,bool shallow)419 const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
420                                               bool shallow) {
421   if (!IsValid(tree, shallow)) {
422     Dump(tree, "CordRepBtree validation failed:", false, std::cout);
423     ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
424   }
425   return tree;
426 }
427 
428 #endif  // NDEBUG
429 
430 template <EdgeType edge_type>
AddEdge(bool owned,CordRep * edge,size_t delta)431 inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
432   if (size() >= kMaxCapacity) return {New(edge), kPopped};
433   OpResult result = ToOpResult(owned);
434   result.tree->Add<edge_type>(edge);
435   result.tree->length += delta;
436   return result;
437 }
438 
439 template <EdgeType edge_type>
SetEdge(bool owned,CordRep * edge,size_t delta)440 OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
441   OpResult result;
442   const size_t idx = index(edge_type);
443   if (owned) {
444     result = {this, kSelf};
445     CordRep::Unref(edges_[idx]);
446   } else {
447     // Create a copy containing all unchanged edges. Unchanged edges are the
448     // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
449     // We conveniently cover both case using a constexpr `shift` being 0 or 1
450     // as `end :== back + 1`.
451     result = {CopyRaw(), kCopied};
452     constexpr int shift = edge_type == kFront ? 1 : 0;
453     for (CordRep* r : Edges(begin() + shift, back() + shift)) {
454       CordRep::Ref(r);
455     }
456   }
457   result.tree->edges_[idx] = edge;
458   result.tree->length += delta;
459   return result;
460 }
461 
462 template <EdgeType edge_type>
AddCordRep(CordRepBtree * tree,CordRep * rep)463 CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
464   const int depth = tree->height();
465   const size_t length = rep->length;
466   StackOperations<edge_type> ops;
467   CordRepBtree* leaf = ops.BuildStack(tree, depth);
468   const OpResult result =
469       leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
470   return ops.Unwind(tree, depth, length, result);
471 }
472 
473 template <>
NewLeaf(absl::string_view data,size_t extra)474 CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
475                                            size_t extra) {
476   CordRepBtree* leaf = CordRepBtree::New(0);
477   size_t length = 0;
478   size_t end = 0;
479   const size_t cap = leaf->capacity();
480   while (!data.empty() && end != cap) {
481     auto* flat = CordRepFlat::New(data.length() + extra);
482     flat->length = (std::min)(data.length(), flat->Capacity());
483     length += flat->length;
484     leaf->edges_[end++] = flat;
485     data = Consume<kBack>(flat->Data(), data, flat->length);
486   }
487   leaf->length = length;
488   leaf->set_end(end);
489   return leaf;
490 }
491 
492 template <>
NewLeaf(absl::string_view data,size_t extra)493 CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
494                                             size_t extra) {
495   CordRepBtree* leaf = CordRepBtree::New(0);
496   size_t length = 0;
497   size_t begin = leaf->capacity();
498   leaf->set_end(leaf->capacity());
499   while (!data.empty() && begin != 0) {
500     auto* flat = CordRepFlat::New(data.length() + extra);
501     flat->length = (std::min)(data.length(), flat->Capacity());
502     length += flat->length;
503     leaf->edges_[--begin] = flat;
504     data = Consume<kFront>(flat->Data(), data, flat->length);
505   }
506   leaf->length = length;
507   leaf->set_begin(begin);
508   return leaf;
509 }
510 
511 template <>
AddData(absl::string_view data,size_t extra)512 absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
513                                                size_t extra) {
514   assert(!data.empty());
515   assert(size() < capacity());
516   AlignBegin();
517   const size_t cap = capacity();
518   do {
519     CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
520     const size_t n = (std::min)(data.length(), flat->Capacity());
521     flat->length = n;
522     edges_[fetch_add_end(1)] = flat;
523     data = Consume<kBack>(flat->Data(), data, n);
524   } while (!data.empty() && end() != cap);
525   return data;
526 }
527 
528 template <>
AddData(absl::string_view data,size_t extra)529 absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
530                                                 size_t extra) {
531   assert(!data.empty());
532   assert(size() < capacity());
533   AlignEnd();
534   do {
535     CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
536     const size_t n = (std::min)(data.length(), flat->Capacity());
537     flat->length = n;
538     edges_[sub_fetch_begin(1)] = flat;
539     data = Consume<kFront>(flat->Data(), data, n);
540   } while (!data.empty() && begin() != 0);
541   return data;
542 }
543 
544 template <EdgeType edge_type>
AddData(CordRepBtree * tree,absl::string_view data,size_t extra)545 CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
546                                     size_t extra) {
547   if (ABSL_PREDICT_FALSE(data.empty())) return tree;
548 
549   const size_t original_data_size = data.size();
550   int depth = tree->height();
551   StackOperations<edge_type> ops;
552   CordRepBtree* leaf = ops.BuildStack(tree, depth);
553 
554   // If there is capacity in the last edge, append as much data
555   // as possible into this last edge.
556   if (leaf->size() < leaf->capacity()) {
557     OpResult result = leaf->ToOpResult(ops.owned(depth));
558     data = result.tree->AddData<edge_type>(data, extra);
559     if (data.empty()) {
560       result.tree->length += original_data_size;
561       return ops.Unwind(tree, depth, original_data_size, result);
562     }
563 
564     // We added some data into this leaf, but not all. Propagate the added
565     // length to the top most node, and rebuild the stack with any newly copied
566     // or updated nodes. From this point on, the path (leg) from the top most
567     // node to the right-most node towards the leaf node is privately owned.
568     size_t delta = original_data_size - data.size();
569     assert(delta > 0);
570     result.tree->length += delta;
571     tree = ops.Propagate(tree, depth, delta, result);
572     ops.share_depth = depth + 1;
573   }
574 
575   // We were unable to append all data into the existing right-most leaf node.
576   // This means all remaining data must be put into (a) new leaf node(s) which
577   // we append to the tree. To make this efficient, we iteratively build full
578   // leaf nodes from `data` until the created leaf contains all remaining data.
579   // We utilize the `Unwind` method to merge the created leaf into the first
580   // level towards root that has capacity. On each iteration with remaining
581   // data, we rebuild the stack in the knowledge that right-most nodes are
582   // privately owned after the first `Unwind` completes.
583   for (;;) {
584     OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
585     if (result.tree->length == data.size()) {
586       return ops.Unwind(tree, depth, result.tree->length, result);
587     }
588     data = Consume<edge_type>(data, result.tree->length);
589     tree = ops.Unwind(tree, depth, result.tree->length, result);
590     depth = tree->height();
591     ops.BuildOwnedStack(tree, depth);
592   }
593 }
594 
595 template <EdgeType edge_type>
Merge(CordRepBtree * dst,CordRepBtree * src)596 CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
597   assert(dst->height() >= src->height());
598 
599   // Capture source length as we may consume / destroy `src`.
600   const size_t length = src->length;
601 
602   // We attempt to merge `src` at its corresponding height in `dst`.
603   const int depth = dst->height() - src->height();
604   StackOperations<edge_type> ops;
605   CordRepBtree* merge_node = ops.BuildStack(dst, depth);
606 
607   // If there is enough space in `merge_node` for all edges from `src`, add all
608   // edges to this node, making a fresh copy as needed if not privately owned.
609   // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
610   // `Finalize` to merge `src` into the first level towards `root` where there
611   // is capacity for another edge, or create a new top level node.
612   OpResult result;
613   if (merge_node->size() + src->size() <= kMaxCapacity) {
614     result = merge_node->ToOpResult(ops.owned(depth));
615     result.tree->Add<edge_type>(src->Edges());
616     result.tree->length += src->length;
617     if (src->refcount.IsOne()) {
618       Delete(src);
619     } else {
620       for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
621       CordRepBtree::Unref(src);
622     }
623   } else {
624     result = {src, kPopped};
625   }
626 
627   // Unless we merged at the top level (i.e.: src and dst are equal height),
628   // unwind the result towards the top level, and finalize the result.
629   if (depth) {
630     return ops.Unwind(dst, depth, length, result);
631   }
632   return ops.Finalize(dst, result);
633 }
634 
CopySuffix(size_t offset)635 CopyResult CordRepBtree::CopySuffix(size_t offset) {
636   assert(offset < this->length);
637 
638   // As long as `offset` starts inside the last edge, we can 'drop' the current
639   // depth. For the most extreme example: if offset references the last data
640   // edge in the tree, there is only a single edge / path from the top of the
641   // tree to that last edge, so we can drop all the nodes except that edge.
642   // The fast path check for this is `back->length >= length - offset`.
643   int height = this->height();
644   CordRepBtree* node = this;
645   size_t len = node->length - offset;
646   CordRep* back = node->Edge(kBack);
647   while (back->length >= len) {
648     offset = back->length - len;
649     if (--height < 0) {
650       return {MakeSubstring(CordRep::Ref(back), offset), height};
651     }
652     node = back->btree();
653     back = node->Edge(kBack);
654   }
655   if (offset == 0) return {CordRep::Ref(node), height};
656 
657   // Offset does not point into the last edge, so we span at least two edges.
658   // Find the index of offset with `IndexBeyond` which provides us the edge
659   // 'beyond' the offset if offset is not a clean starting point of an edge.
660   Position pos = node->IndexBeyond(offset);
661   CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
662   const CopyResult result = {sub, height};
663 
664   // `pos.n` contains a non zero value if the offset is not an exact starting
665   // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
666   // bytes of the edge preceding that in `pos.index`. We need to iteratively
667   // adjust the preceding edge with the 'broken' offset until we have a perfect
668   // start of the edge.
669   while (pos.n != 0) {
670     assert(pos.index >= 1);
671     const size_t begin = pos.index - 1;
672     sub->set_begin(begin);
673     CordRep* const edge = node->Edge(begin);
674 
675     len = pos.n;
676     offset = edge->length - len;
677 
678     if (--height < 0) {
679       sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
680       return result;
681     }
682 
683     node = edge->btree();
684     pos = node->IndexBeyond(offset);
685 
686     CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
687     sub->edges_[begin] = nsub;
688     sub = nsub;
689   }
690   sub->set_begin(pos.index);
691   return result;
692 }
693 
CopyPrefix(size_t n)694 CopyResult CordRepBtree::CopyPrefix(size_t n) {
695   assert(n > 0);
696   assert(n <= this->length);
697 
698   // As long as `n` does not exceed the length of the first edge, we can 'drop'
699   // the current depth. For the most extreme example: if we'd copy a 1 byte
700   // prefix from a tree, there is only a single edge / path from the top of the
701   // tree to the single data edge containing this byte, so we can drop all the
702   // nodes except the data node.
703   int height = this->height();
704   CordRepBtree* node = this;
705   CordRep* front = node->Edge(kFront);
706   while (front->length >= n) {
707     if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
708     node = front->btree();
709     front = node->Edge(kFront);
710   }
711   if (node->length == n) return {CordRep::Ref(node), height};
712 
713   // `n` spans at least two nodes, find the end point of the span.
714   Position pos = node->IndexOf(n);
715 
716   // Create a partial copy of the node up to `pos.index`, with a defined length
717   // of `n`. Any 'partial last edge' is added further below as needed.
718   CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
719   const CopyResult result = {sub, height};
720 
721   // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
722   // this is not zero, we don't have a 'clean cut', so we need to make a
723   // (partial) copy of that last edge, and repeat this until pos.n is zero.
724   while (pos.n != 0) {
725     size_t end = pos.index;
726     n = pos.n;
727 
728     CordRep* edge = node->Edge(pos.index);
729     if (--height < 0) {
730       sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
731       sub->set_end(end);
732       AssertValid(result.edge->btree());
733       return result;
734     }
735 
736     node = edge->btree();
737     pos = node->IndexOf(n);
738     CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
739     sub->edges_[end++] = nsub;
740     sub->set_end(end);
741     sub = nsub;
742   }
743   sub->set_end(pos.index);
744   AssertValid(result.edge->btree());
745   return result;
746 }
747 
SubTree(size_t offset,size_t n)748 CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
749   assert(n <= this->length);
750   assert(offset <= this->length - n);
751   if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
752 
753   CordRepBtree* node = this;
754   int height = node->height();
755   Position front = node->IndexOf(offset);
756   CordRep* left = node->edges_[front.index];
757   while (front.n + n <= left->length) {
758     if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
759     node = left->btree();
760     front = node->IndexOf(front.n);
761     left = node->edges_[front.index];
762   }
763 
764   const Position back = node->IndexBefore(front, n);
765   CordRep* const right = node->edges_[back.index];
766   assert(back.index > front.index);
767 
768   // Get partial suffix and prefix entries.
769   CopyResult prefix;
770   CopyResult suffix;
771   if (height > 0) {
772     // Copy prefix and suffix of the boundary nodes.
773     prefix = left->btree()->CopySuffix(front.n);
774     suffix = right->btree()->CopyPrefix(back.n);
775 
776     // If there is an edge between the prefix and suffix edges, then the tree
777     // must remain at its previous (full) height. If we have no edges between
778     // prefix and suffix edges, then the tree must be as high as either the
779     // suffix or prefix edges (which are collapsed to their minimum heights).
780     if (front.index + 1 == back.index) {
781       height = (std::max)(prefix.height, suffix.height) + 1;
782     }
783 
784     // Raise prefix and suffixes to the new tree height.
785     for (int h = prefix.height + 1; h < height; ++h) {
786       prefix.edge = CordRepBtree::New(prefix.edge);
787     }
788     for (int h = suffix.height + 1; h < height; ++h) {
789       suffix.edge = CordRepBtree::New(suffix.edge);
790     }
791   } else {
792     // Leaf node, simply take substrings for prefix and suffix.
793     prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
794     suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
795   }
796 
797   // Compose resulting tree.
798   CordRepBtree* sub = CordRepBtree::New(height);
799   size_t end = 0;
800   sub->edges_[end++] = prefix.edge;
801   for (CordRep* r : node->Edges(front.index + 1, back.index)) {
802     sub->edges_[end++] = CordRep::Ref(r);
803   }
804   sub->edges_[end++] = suffix.edge;
805   sub->set_end(end);
806   sub->length = n;
807   return AssertValid(sub);
808 }
809 
MergeTrees(CordRepBtree * left,CordRepBtree * right)810 CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
811                                        CordRepBtree* right) {
812   return left->height() >= right->height() ? Merge<kBack>(left, right)
813                                            : Merge<kFront>(right, left);
814 }
815 
IsFlat(absl::string_view * fragment) const816 bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
817   if (height() == 0 && size() == 1) {
818     if (fragment) *fragment = Data(begin());
819     return true;
820   }
821   return false;
822 }
823 
IsFlat(size_t offset,const size_t n,absl::string_view * fragment) const824 bool CordRepBtree::IsFlat(size_t offset, const size_t n,
825                           absl::string_view* fragment) const {
826   assert(n <= this->length);
827   assert(offset <= this->length - n);
828   if (ABSL_PREDICT_FALSE(n == 0)) return false;
829   int height = this->height();
830   const CordRepBtree* node = this;
831   for (;;) {
832     const Position front = node->IndexOf(offset);
833     const CordRep* edge = node->Edge(front.index);
834     if (edge->length < front.n + n) return false;
835     if (--height < 0) {
836       if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
837       return true;
838     }
839     offset = front.n;
840     node = node->Edge(front.index)->btree();
841   }
842 }
843 
GetCharacter(size_t offset) const844 char CordRepBtree::GetCharacter(size_t offset) const {
845   assert(offset < length);
846   const CordRepBtree* node = this;
847   int height = node->height();
848   for (;;) {
849     Position front = node->IndexOf(offset);
850     if (--height < 0) return node->Data(front.index)[front.n];
851     offset = front.n;
852     node = node->Edge(front.index)->btree();
853   }
854 }
855 
GetAppendBufferSlow(size_t size)856 Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
857   // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
858   assert(height() >= 4);
859   assert(refcount.IsOne());
860 
861   // Build a stack of nodes we may potentially need to update if we find a
862   // non-shared FLAT with capacity at the leaf level.
863   const int depth = height();
864   CordRepBtree* node = this;
865   CordRepBtree* stack[kMaxDepth];
866   for (int i = 0; i < depth; ++i) {
867     node = node->Edge(kBack)->btree();
868     if (!node->refcount.IsOne()) return {};
869     stack[i] = node;
870   }
871 
872   // Must be a privately owned flat.
873   CordRep* const edge = node->Edge(kBack);
874   if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
875 
876   // Must have capacity.
877   const size_t avail = edge->flat()->Capacity() - edge->length;
878   if (avail == 0) return {};
879 
880   // Build span on remaining capacity.
881   size_t delta = (std::min)(size, avail);
882   Span<char> span = {edge->flat()->Data() + edge->length, delta};
883   edge->length += delta;
884   this->length += delta;
885   for (int i = 0; i < depth; ++i) {
886     stack[i]->length += delta;
887   }
888   return span;
889 }
890 
CreateSlow(CordRep * rep)891 CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
892   if (rep->IsBtree()) return rep->btree();
893 
894   CordRepBtree* node = nullptr;
895   auto consume = [&node](CordRep* r, size_t offset, size_t length) {
896     r = MakeSubstring(r, offset, length);
897     if (node == nullptr) {
898       node = New(r);
899     } else {
900       node = CordRepBtree::AddCordRep<kBack>(node, r);
901     }
902   };
903   Consume(rep, consume);
904   return node;
905 }
906 
AppendSlow(CordRepBtree * tree,CordRep * rep)907 CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
908   if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
909     return MergeTrees(tree, rep->btree());
910   }
911   auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
912     r = MakeSubstring(r, offset, length);
913     tree = CordRepBtree::AddCordRep<kBack>(tree, r);
914   };
915   Consume(rep, consume);
916   return tree;
917 }
918 
PrependSlow(CordRepBtree * tree,CordRep * rep)919 CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
920   if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
921     return MergeTrees(rep->btree(), tree);
922   }
923   auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
924     r = MakeSubstring(r, offset, length);
925     tree = CordRepBtree::AddCordRep<kFront>(tree, r);
926   };
927   ReverseConsume(rep, consume);
928   return tree;
929 }
930 
Append(CordRepBtree * tree,absl::string_view data,size_t extra)931 CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
932                                    size_t extra) {
933   return CordRepBtree::AddData<kBack>(tree, data, extra);
934 }
935 
Prepend(CordRepBtree * tree,absl::string_view data,size_t extra)936 CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
937                                     size_t extra) {
938   return CordRepBtree::AddData<kFront>(tree, data, extra);
939 }
940 
941 template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
942                                                         CordRep* rep);
943 template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
944                                                        CordRep* rep);
945 template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
946                                                      absl::string_view data,
947                                                      size_t extra);
948 template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
949                                                     absl::string_view data,
950                                                     size_t extra);
951 
952 }  // namespace cord_internal
953 ABSL_NAMESPACE_END
954 }  // namespace absl
955