1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/cord_rep_btree.h"
16
17 #include <cassert>
18 #include <cstdint>
19 #include <iostream>
20 #include <string>
21
22 #include "absl/base/attributes.h"
23 #include "absl/base/config.h"
24 #include "absl/base/internal/raw_logging.h"
25 #include "absl/strings/internal/cord_internal.h"
26 #include "absl/strings/internal/cord_rep_consume.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/string_view.h"
30
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34
35 constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17
36
37 namespace {
38
39 using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
40 using EdgeType = CordRepBtree::EdgeType;
41 using OpResult = CordRepBtree::OpResult;
42 using CopyResult = CordRepBtree::CopyResult;
43
44 constexpr auto kFront = CordRepBtree::kFront;
45 constexpr auto kBack = CordRepBtree::kBack;
46
exhaustive_validation()47 inline bool exhaustive_validation() {
48 return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
49 }
50
51 // Implementation of the various 'Dump' functions.
52 // Prints the entire tree structure or 'rep'. External callers should
53 // not specify 'depth' and leave it to its default (0) value.
54 // Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
DumpAll(const CordRep * rep,bool include_contents,std::ostream & stream,int depth=0)55 void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
56 int depth = 0) {
57 // Allow for full height trees + substring -> flat / external nodes.
58 assert(depth <= CordRepBtree::kMaxDepth + 2);
59 std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
60 ? std::string("Private")
61 : absl::StrCat("Shared(", rep->refcount.Get(), ")");
62 std::string sptr = absl::StrCat("0x", absl::Hex(rep));
63
64 // Dumps the data contents of `rep` if `include_contents` is true.
65 // Always emits a new line character.
66 auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
67 if (include_contents) {
68 // Allow for up to 60 wide display of content data, which with some
69 // indentation and prefix / labels keeps us within roughly 80-100 wide.
70 constexpr size_t kMaxDataLength = 60;
71 stream << ", data = \""
72 << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
73 << (r->length > kMaxDataLength ? "\"..." : "\"");
74 }
75 stream << '\n';
76 };
77
78 // For each level, we print the 'shared/private' state and the rep pointer,
79 // indented by two spaces per recursive depth.
80 stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
81
82 if (rep->IsBtree()) {
83 const CordRepBtree* node = rep->btree();
84 std::string label =
85 node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
86 stream << label << ", len = " << node->length
87 << ", begin = " << node->begin() << ", end = " << node->end()
88 << "\n";
89 for (CordRep* edge : node->Edges()) {
90 DumpAll(edge, include_contents, stream, depth + 1);
91 }
92 } else if (rep->tag == SUBSTRING) {
93 const CordRepSubstring* substring = rep->substring();
94 stream << "Substring, len = " << rep->length
95 << ", start = " << substring->start;
96 maybe_dump_data(rep);
97 DumpAll(substring->child, include_contents, stream, depth + 1);
98 } else if (rep->tag >= FLAT) {
99 stream << "Flat, len = " << rep->length;
100 maybe_dump_data(rep);
101 } else if (rep->tag == EXTERNAL) {
102 stream << "Extn, len = " << rep->length;
103 maybe_dump_data(rep);
104 }
105 }
106
107 // TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
108 // small data out of large reps, and general efficiency of 'always copy small
109 // data'. Consider making this a cord rep internal library function.
CreateSubstring(CordRep * rep,size_t offset,size_t n)110 CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
111 assert(n != 0);
112 assert(offset + n <= rep->length);
113 assert(offset != 0 || n != rep->length);
114
115 if (rep->tag == SUBSTRING) {
116 CordRepSubstring* substring = rep->substring();
117 offset += substring->start;
118 rep = CordRep::Ref(substring->child);
119 CordRep::Unref(substring);
120 }
121 CordRepSubstring* substring = new CordRepSubstring();
122 substring->length = n;
123 substring->tag = SUBSTRING;
124 substring->start = offset;
125 substring->child = rep;
126 return substring;
127 }
128
129 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset,size_t n)130 inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
131 if (n == rep->length) return rep;
132 if (n == 0) return CordRep::Unref(rep), nullptr;
133 return CreateSubstring(rep, offset, n);
134 }
135
136 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset)137 inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
138 if (offset == 0) return rep;
139 return CreateSubstring(rep, offset, rep->length - offset);
140 }
141
142 template <EdgeType edge_type>
Consume(absl::string_view s,size_t n)143 inline absl::string_view Consume(absl::string_view s, size_t n) {
144 return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
145 }
146
147 template <EdgeType edge_type>
Consume(char * dst,absl::string_view s,size_t n)148 inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
149 if (edge_type == kBack) {
150 memcpy(dst, s.data(), n);
151 return s.substr(n);
152 } else {
153 const size_t offset = s.size() - n;
154 memcpy(dst, s.data() + offset, n);
155 return s.substr(0, offset);
156 }
157 }
158
159 // Known issue / optimization weirdness: the store associated with the
160 // decrement introduces traffic between cpus (even if the result of that
161 // traffic does nothing), making this faster than a single call to
162 // refcount.Decrement() checking the zero refcount condition.
163 template <typename R, typename Fn>
FastUnref(R * r,Fn && fn)164 inline void FastUnref(R* r, Fn&& fn) {
165 if (r->refcount.IsOne()) {
166 fn(r);
167 } else if (!r->refcount.DecrementExpectHighRefcount()) {
168 fn(r);
169 }
170 }
171
172 // Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
173 // node, or a SUBSTRING of an EXTERNAL or FLAT node.
DeleteLeafEdge(CordRep * rep)174 void DeleteLeafEdge(CordRep* rep) {
175 for (;;) {
176 if (rep->tag >= FLAT) {
177 CordRepFlat::Delete(rep->flat());
178 return;
179 }
180 if (rep->tag == EXTERNAL) {
181 CordRepExternal::Delete(rep->external());
182 return;
183 }
184 assert(rep->tag == SUBSTRING);
185 CordRepSubstring* substring = rep->substring();
186 rep = substring->child;
187 assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
188 delete substring;
189 if (rep->refcount.Decrement()) return;
190 }
191 }
192
193 // StackOperations contains the logic to build a left-most or right-most stack
194 // (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
195 // propagate node changes up the stack.
196 template <EdgeType edge_type>
197 struct StackOperations {
198 // Returns true if the node at 'depth' is not shared, i.e. has a refcount
199 // of one and all of its parent nodes have a refcount of one.
ownedabsl::cord_internal::__anon13e13ff00111::StackOperations200 inline bool owned(int depth) const { return depth < share_depth; }
201
202 // Returns the node at 'depth'.
nodeabsl::cord_internal::__anon13e13ff00111::StackOperations203 inline CordRepBtree* node(int depth) const { return stack[depth]; }
204
205 // Builds a `depth` levels deep stack starting at `tree` recording which nodes
206 // are private in the form of the 'share depth' where nodes are shared.
BuildStackabsl::cord_internal::__anon13e13ff00111::StackOperations207 inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
208 assert(depth <= tree->height());
209 int current_depth = 0;
210 while (current_depth < depth && tree->refcount.IsOne()) {
211 stack[current_depth++] = tree;
212 tree = tree->Edge(edge_type)->btree();
213 }
214 share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
215 while (current_depth < depth) {
216 stack[current_depth++] = tree;
217 tree = tree->Edge(edge_type)->btree();
218 }
219 return tree;
220 }
221
222 // Builds a stack with the invariant that all nodes are private owned / not
223 // shared. This is used in iterative updates where a previous propagation
224 // guaranteed all nodes are owned / private.
BuildOwnedStackabsl::cord_internal::__anon13e13ff00111::StackOperations225 inline void BuildOwnedStack(CordRepBtree* tree, int height) {
226 assert(height <= CordRepBtree::kMaxHeight);
227 int depth = 0;
228 while (depth < height) {
229 assert(tree->refcount.IsOne());
230 stack[depth++] = tree;
231 tree = tree->Edge(edge_type)->btree();
232 }
233 assert(tree->refcount.IsOne());
234 share_depth = depth + 1;
235 }
236
237 // Processes the final 'top level' result action for the tree.
238 // See the 'Action' enum for the various action implications.
Finalizeabsl::cord_internal::__anon13e13ff00111::StackOperations239 static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
240 switch (result.action) {
241 case CordRepBtree::kPopped:
242 if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
243 ABSL_RAW_LOG(FATAL, "Max height exceeded");
244 }
245 return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
246 : CordRepBtree::New(result.tree, tree);
247 case CordRepBtree::kCopied:
248 CordRep::Unref(tree);
249 ABSL_FALLTHROUGH_INTENDED;
250 case CordRepBtree::kSelf:
251 return result.tree;
252 }
253 ABSL_INTERNAL_UNREACHABLE;
254 return result.tree;
255 }
256
257 // Propagate the action result in 'result' up into all nodes of the stack
258 // starting at depth 'depth'. 'length' contains the extra length of data that
259 // was added at the lowest level, and is updated into all nodes of the stack.
260 // See the 'Action' enum for the various action implications.
261 // If 'propagate' is true, then any copied node values are updated into the
262 // stack, which is used for iterative processing on the same stack.
263 template <bool propagate = false>
Unwindabsl::cord_internal::__anon13e13ff00111::StackOperations264 inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
265 OpResult result) {
266 // TODO(mvels): revisit the below code to check if 3 loops with 3
267 // (incremental) conditions is faster than 1 loop with a switch.
268 // Benchmarking and perf recordings indicate the loop with switch is
269 // fastest, likely because of indirect jumps on the tight case values and
270 // dense branches. But it's worth considering 3 loops, as the `action`
271 // transitions are mono directional. E.g.:
272 // while (action == kPopped) {
273 // ...
274 // }
275 // while (action == kCopied) {
276 // ...
277 // }
278 // ...
279 // We also found that an "if () do {}" loop here seems faster, possibly
280 // because it allows the branch predictor more granular heuristics on
281 // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
282 // which appear to be the most common use cases.
283 if (depth != 0) {
284 do {
285 CordRepBtree* node = stack[--depth];
286 const bool owned = depth < share_depth;
287 switch (result.action) {
288 case CordRepBtree::kPopped:
289 assert(!propagate);
290 result = node->AddEdge<edge_type>(owned, result.tree, length);
291 break;
292 case CordRepBtree::kCopied:
293 result = node->SetEdge<edge_type>(owned, result.tree, length);
294 if (propagate) stack[depth] = result.tree;
295 break;
296 case CordRepBtree::kSelf:
297 node->length += length;
298 while (depth > 0) {
299 node = stack[--depth];
300 node->length += length;
301 }
302 return node;
303 }
304 } while (depth > 0);
305 }
306 return Finalize(tree, result);
307 }
308
309 // Invokes `Unwind` with `propagate=true` to update the stack node values.
Propagateabsl::cord_internal::__anon13e13ff00111::StackOperations310 inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
311 OpResult result) {
312 return Unwind</*propagate=*/true>(tree, depth, length, result);
313 }
314
315 // `share_depth` contains the depth at which the nodes in the stack become
316 // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
317 // then `share_depth` is 0. If the 2nd node is shared (and implicitly all
318 // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
319 // than the depth of the stack indicates that none of the nodes in the stack
320 // are shared.
321 int share_depth;
322
323 NodeStack stack;
324 };
325
326 } // namespace
327
Dump(const CordRep * rep,absl::string_view label,bool include_contents,std::ostream & stream)328 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
329 bool include_contents, std::ostream& stream) {
330 stream << "===================================\n";
331 if (!label.empty()) {
332 stream << label << '\n';
333 stream << "-----------------------------------\n";
334 }
335 if (rep) {
336 DumpAll(rep, include_contents, stream);
337 } else {
338 stream << "NULL\n";
339 }
340 }
341
Dump(const CordRep * rep,absl::string_view label,std::ostream & stream)342 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
343 std::ostream& stream) {
344 Dump(rep, label, false, stream);
345 }
346
Dump(const CordRep * rep,std::ostream & stream)347 void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
348 Dump(rep, absl::string_view(), false, stream);
349 }
350
DestroyLeaf(CordRepBtree * tree,size_t begin,size_t end)351 void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
352 for (CordRep* edge : tree->Edges(begin, end)) {
353 FastUnref(edge, DeleteLeafEdge);
354 }
355 Delete(tree);
356 }
357
DestroyNonLeaf(CordRepBtree * tree,size_t begin,size_t end)358 void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
359 size_t end) {
360 for (CordRep* edge : tree->Edges(begin, end)) {
361 FastUnref(edge->btree(), Destroy);
362 }
363 Delete(tree);
364 }
365
IsValid(const CordRepBtree * tree,bool shallow)366 bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
367 #define NODE_CHECK_VALID(x) \
368 if (!(x)) { \
369 ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
370 return false; \
371 }
372 #define NODE_CHECK_EQ(x, y) \
373 if ((x) != (y)) { \
374 ABSL_RAW_LOG(ERROR, \
375 "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
376 #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
377 return false; \
378 }
379
380 NODE_CHECK_VALID(tree != nullptr);
381 NODE_CHECK_VALID(tree->IsBtree());
382 NODE_CHECK_VALID(tree->height() <= kMaxHeight);
383 NODE_CHECK_VALID(tree->begin() < tree->capacity());
384 NODE_CHECK_VALID(tree->end() <= tree->capacity());
385 NODE_CHECK_VALID(tree->begin() <= tree->end());
386 size_t child_length = 0;
387 for (CordRep* edge : tree->Edges()) {
388 NODE_CHECK_VALID(edge != nullptr);
389 if (tree->height() > 0) {
390 NODE_CHECK_VALID(edge->IsBtree());
391 NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
392 } else {
393 NODE_CHECK_VALID(IsDataEdge(edge));
394 }
395 child_length += edge->length;
396 }
397 NODE_CHECK_EQ(child_length, tree->length);
398 if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
399 for (CordRep* edge : tree->Edges()) {
400 if (!IsValid(edge->btree(), shallow)) return false;
401 }
402 }
403 return true;
404
405 #undef NODE_CHECK_VALID
406 #undef NODE_CHECK_EQ
407 }
408
409 #ifndef NDEBUG
410
AssertValid(CordRepBtree * tree,bool shallow)411 CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
412 if (!IsValid(tree, shallow)) {
413 Dump(tree, "CordRepBtree validation failed:", false, std::cout);
414 ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
415 }
416 return tree;
417 }
418
AssertValid(const CordRepBtree * tree,bool shallow)419 const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
420 bool shallow) {
421 if (!IsValid(tree, shallow)) {
422 Dump(tree, "CordRepBtree validation failed:", false, std::cout);
423 ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
424 }
425 return tree;
426 }
427
428 #endif // NDEBUG
429
430 template <EdgeType edge_type>
AddEdge(bool owned,CordRep * edge,size_t delta)431 inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
432 if (size() >= kMaxCapacity) return {New(edge), kPopped};
433 OpResult result = ToOpResult(owned);
434 result.tree->Add<edge_type>(edge);
435 result.tree->length += delta;
436 return result;
437 }
438
439 template <EdgeType edge_type>
SetEdge(bool owned,CordRep * edge,size_t delta)440 OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
441 OpResult result;
442 const size_t idx = index(edge_type);
443 if (owned) {
444 result = {this, kSelf};
445 CordRep::Unref(edges_[idx]);
446 } else {
447 // Create a copy containing all unchanged edges. Unchanged edges are the
448 // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
449 // We conveniently cover both case using a constexpr `shift` being 0 or 1
450 // as `end :== back + 1`.
451 result = {CopyRaw(), kCopied};
452 constexpr int shift = edge_type == kFront ? 1 : 0;
453 for (CordRep* r : Edges(begin() + shift, back() + shift)) {
454 CordRep::Ref(r);
455 }
456 }
457 result.tree->edges_[idx] = edge;
458 result.tree->length += delta;
459 return result;
460 }
461
462 template <EdgeType edge_type>
AddCordRep(CordRepBtree * tree,CordRep * rep)463 CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
464 const int depth = tree->height();
465 const size_t length = rep->length;
466 StackOperations<edge_type> ops;
467 CordRepBtree* leaf = ops.BuildStack(tree, depth);
468 const OpResult result =
469 leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
470 return ops.Unwind(tree, depth, length, result);
471 }
472
473 template <>
NewLeaf(absl::string_view data,size_t extra)474 CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
475 size_t extra) {
476 CordRepBtree* leaf = CordRepBtree::New(0);
477 size_t length = 0;
478 size_t end = 0;
479 const size_t cap = leaf->capacity();
480 while (!data.empty() && end != cap) {
481 auto* flat = CordRepFlat::New(data.length() + extra);
482 flat->length = (std::min)(data.length(), flat->Capacity());
483 length += flat->length;
484 leaf->edges_[end++] = flat;
485 data = Consume<kBack>(flat->Data(), data, flat->length);
486 }
487 leaf->length = length;
488 leaf->set_end(end);
489 return leaf;
490 }
491
492 template <>
NewLeaf(absl::string_view data,size_t extra)493 CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
494 size_t extra) {
495 CordRepBtree* leaf = CordRepBtree::New(0);
496 size_t length = 0;
497 size_t begin = leaf->capacity();
498 leaf->set_end(leaf->capacity());
499 while (!data.empty() && begin != 0) {
500 auto* flat = CordRepFlat::New(data.length() + extra);
501 flat->length = (std::min)(data.length(), flat->Capacity());
502 length += flat->length;
503 leaf->edges_[--begin] = flat;
504 data = Consume<kFront>(flat->Data(), data, flat->length);
505 }
506 leaf->length = length;
507 leaf->set_begin(begin);
508 return leaf;
509 }
510
511 template <>
AddData(absl::string_view data,size_t extra)512 absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
513 size_t extra) {
514 assert(!data.empty());
515 assert(size() < capacity());
516 AlignBegin();
517 const size_t cap = capacity();
518 do {
519 CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
520 const size_t n = (std::min)(data.length(), flat->Capacity());
521 flat->length = n;
522 edges_[fetch_add_end(1)] = flat;
523 data = Consume<kBack>(flat->Data(), data, n);
524 } while (!data.empty() && end() != cap);
525 return data;
526 }
527
528 template <>
AddData(absl::string_view data,size_t extra)529 absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
530 size_t extra) {
531 assert(!data.empty());
532 assert(size() < capacity());
533 AlignEnd();
534 do {
535 CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
536 const size_t n = (std::min)(data.length(), flat->Capacity());
537 flat->length = n;
538 edges_[sub_fetch_begin(1)] = flat;
539 data = Consume<kFront>(flat->Data(), data, n);
540 } while (!data.empty() && begin() != 0);
541 return data;
542 }
543
544 template <EdgeType edge_type>
AddData(CordRepBtree * tree,absl::string_view data,size_t extra)545 CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
546 size_t extra) {
547 if (ABSL_PREDICT_FALSE(data.empty())) return tree;
548
549 const size_t original_data_size = data.size();
550 int depth = tree->height();
551 StackOperations<edge_type> ops;
552 CordRepBtree* leaf = ops.BuildStack(tree, depth);
553
554 // If there is capacity in the last edge, append as much data
555 // as possible into this last edge.
556 if (leaf->size() < leaf->capacity()) {
557 OpResult result = leaf->ToOpResult(ops.owned(depth));
558 data = result.tree->AddData<edge_type>(data, extra);
559 if (data.empty()) {
560 result.tree->length += original_data_size;
561 return ops.Unwind(tree, depth, original_data_size, result);
562 }
563
564 // We added some data into this leaf, but not all. Propagate the added
565 // length to the top most node, and rebuild the stack with any newly copied
566 // or updated nodes. From this point on, the path (leg) from the top most
567 // node to the right-most node towards the leaf node is privately owned.
568 size_t delta = original_data_size - data.size();
569 assert(delta > 0);
570 result.tree->length += delta;
571 tree = ops.Propagate(tree, depth, delta, result);
572 ops.share_depth = depth + 1;
573 }
574
575 // We were unable to append all data into the existing right-most leaf node.
576 // This means all remaining data must be put into (a) new leaf node(s) which
577 // we append to the tree. To make this efficient, we iteratively build full
578 // leaf nodes from `data` until the created leaf contains all remaining data.
579 // We utilize the `Unwind` method to merge the created leaf into the first
580 // level towards root that has capacity. On each iteration with remaining
581 // data, we rebuild the stack in the knowledge that right-most nodes are
582 // privately owned after the first `Unwind` completes.
583 for (;;) {
584 OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
585 if (result.tree->length == data.size()) {
586 return ops.Unwind(tree, depth, result.tree->length, result);
587 }
588 data = Consume<edge_type>(data, result.tree->length);
589 tree = ops.Unwind(tree, depth, result.tree->length, result);
590 depth = tree->height();
591 ops.BuildOwnedStack(tree, depth);
592 }
593 }
594
595 template <EdgeType edge_type>
Merge(CordRepBtree * dst,CordRepBtree * src)596 CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
597 assert(dst->height() >= src->height());
598
599 // Capture source length as we may consume / destroy `src`.
600 const size_t length = src->length;
601
602 // We attempt to merge `src` at its corresponding height in `dst`.
603 const int depth = dst->height() - src->height();
604 StackOperations<edge_type> ops;
605 CordRepBtree* merge_node = ops.BuildStack(dst, depth);
606
607 // If there is enough space in `merge_node` for all edges from `src`, add all
608 // edges to this node, making a fresh copy as needed if not privately owned.
609 // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
610 // `Finalize` to merge `src` into the first level towards `root` where there
611 // is capacity for another edge, or create a new top level node.
612 OpResult result;
613 if (merge_node->size() + src->size() <= kMaxCapacity) {
614 result = merge_node->ToOpResult(ops.owned(depth));
615 result.tree->Add<edge_type>(src->Edges());
616 result.tree->length += src->length;
617 if (src->refcount.IsOne()) {
618 Delete(src);
619 } else {
620 for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
621 CordRepBtree::Unref(src);
622 }
623 } else {
624 result = {src, kPopped};
625 }
626
627 // Unless we merged at the top level (i.e.: src and dst are equal height),
628 // unwind the result towards the top level, and finalize the result.
629 if (depth) {
630 return ops.Unwind(dst, depth, length, result);
631 }
632 return ops.Finalize(dst, result);
633 }
634
CopySuffix(size_t offset)635 CopyResult CordRepBtree::CopySuffix(size_t offset) {
636 assert(offset < this->length);
637
638 // As long as `offset` starts inside the last edge, we can 'drop' the current
639 // depth. For the most extreme example: if offset references the last data
640 // edge in the tree, there is only a single edge / path from the top of the
641 // tree to that last edge, so we can drop all the nodes except that edge.
642 // The fast path check for this is `back->length >= length - offset`.
643 int height = this->height();
644 CordRepBtree* node = this;
645 size_t len = node->length - offset;
646 CordRep* back = node->Edge(kBack);
647 while (back->length >= len) {
648 offset = back->length - len;
649 if (--height < 0) {
650 return {MakeSubstring(CordRep::Ref(back), offset), height};
651 }
652 node = back->btree();
653 back = node->Edge(kBack);
654 }
655 if (offset == 0) return {CordRep::Ref(node), height};
656
657 // Offset does not point into the last edge, so we span at least two edges.
658 // Find the index of offset with `IndexBeyond` which provides us the edge
659 // 'beyond' the offset if offset is not a clean starting point of an edge.
660 Position pos = node->IndexBeyond(offset);
661 CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
662 const CopyResult result = {sub, height};
663
664 // `pos.n` contains a non zero value if the offset is not an exact starting
665 // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
666 // bytes of the edge preceding that in `pos.index`. We need to iteratively
667 // adjust the preceding edge with the 'broken' offset until we have a perfect
668 // start of the edge.
669 while (pos.n != 0) {
670 assert(pos.index >= 1);
671 const size_t begin = pos.index - 1;
672 sub->set_begin(begin);
673 CordRep* const edge = node->Edge(begin);
674
675 len = pos.n;
676 offset = edge->length - len;
677
678 if (--height < 0) {
679 sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
680 return result;
681 }
682
683 node = edge->btree();
684 pos = node->IndexBeyond(offset);
685
686 CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
687 sub->edges_[begin] = nsub;
688 sub = nsub;
689 }
690 sub->set_begin(pos.index);
691 return result;
692 }
693
CopyPrefix(size_t n)694 CopyResult CordRepBtree::CopyPrefix(size_t n) {
695 assert(n > 0);
696 assert(n <= this->length);
697
698 // As long as `n` does not exceed the length of the first edge, we can 'drop'
699 // the current depth. For the most extreme example: if we'd copy a 1 byte
700 // prefix from a tree, there is only a single edge / path from the top of the
701 // tree to the single data edge containing this byte, so we can drop all the
702 // nodes except the data node.
703 int height = this->height();
704 CordRepBtree* node = this;
705 CordRep* front = node->Edge(kFront);
706 while (front->length >= n) {
707 if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
708 node = front->btree();
709 front = node->Edge(kFront);
710 }
711 if (node->length == n) return {CordRep::Ref(node), height};
712
713 // `n` spans at least two nodes, find the end point of the span.
714 Position pos = node->IndexOf(n);
715
716 // Create a partial copy of the node up to `pos.index`, with a defined length
717 // of `n`. Any 'partial last edge' is added further below as needed.
718 CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
719 const CopyResult result = {sub, height};
720
721 // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
722 // this is not zero, we don't have a 'clean cut', so we need to make a
723 // (partial) copy of that last edge, and repeat this until pos.n is zero.
724 while (pos.n != 0) {
725 size_t end = pos.index;
726 n = pos.n;
727
728 CordRep* edge = node->Edge(pos.index);
729 if (--height < 0) {
730 sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
731 sub->set_end(end);
732 AssertValid(result.edge->btree());
733 return result;
734 }
735
736 node = edge->btree();
737 pos = node->IndexOf(n);
738 CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
739 sub->edges_[end++] = nsub;
740 sub->set_end(end);
741 sub = nsub;
742 }
743 sub->set_end(pos.index);
744 AssertValid(result.edge->btree());
745 return result;
746 }
747
SubTree(size_t offset,size_t n)748 CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
749 assert(n <= this->length);
750 assert(offset <= this->length - n);
751 if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
752
753 CordRepBtree* node = this;
754 int height = node->height();
755 Position front = node->IndexOf(offset);
756 CordRep* left = node->edges_[front.index];
757 while (front.n + n <= left->length) {
758 if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
759 node = left->btree();
760 front = node->IndexOf(front.n);
761 left = node->edges_[front.index];
762 }
763
764 const Position back = node->IndexBefore(front, n);
765 CordRep* const right = node->edges_[back.index];
766 assert(back.index > front.index);
767
768 // Get partial suffix and prefix entries.
769 CopyResult prefix;
770 CopyResult suffix;
771 if (height > 0) {
772 // Copy prefix and suffix of the boundary nodes.
773 prefix = left->btree()->CopySuffix(front.n);
774 suffix = right->btree()->CopyPrefix(back.n);
775
776 // If there is an edge between the prefix and suffix edges, then the tree
777 // must remain at its previous (full) height. If we have no edges between
778 // prefix and suffix edges, then the tree must be as high as either the
779 // suffix or prefix edges (which are collapsed to their minimum heights).
780 if (front.index + 1 == back.index) {
781 height = (std::max)(prefix.height, suffix.height) + 1;
782 }
783
784 // Raise prefix and suffixes to the new tree height.
785 for (int h = prefix.height + 1; h < height; ++h) {
786 prefix.edge = CordRepBtree::New(prefix.edge);
787 }
788 for (int h = suffix.height + 1; h < height; ++h) {
789 suffix.edge = CordRepBtree::New(suffix.edge);
790 }
791 } else {
792 // Leaf node, simply take substrings for prefix and suffix.
793 prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
794 suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
795 }
796
797 // Compose resulting tree.
798 CordRepBtree* sub = CordRepBtree::New(height);
799 size_t end = 0;
800 sub->edges_[end++] = prefix.edge;
801 for (CordRep* r : node->Edges(front.index + 1, back.index)) {
802 sub->edges_[end++] = CordRep::Ref(r);
803 }
804 sub->edges_[end++] = suffix.edge;
805 sub->set_end(end);
806 sub->length = n;
807 return AssertValid(sub);
808 }
809
MergeTrees(CordRepBtree * left,CordRepBtree * right)810 CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
811 CordRepBtree* right) {
812 return left->height() >= right->height() ? Merge<kBack>(left, right)
813 : Merge<kFront>(right, left);
814 }
815
IsFlat(absl::string_view * fragment) const816 bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
817 if (height() == 0 && size() == 1) {
818 if (fragment) *fragment = Data(begin());
819 return true;
820 }
821 return false;
822 }
823
IsFlat(size_t offset,const size_t n,absl::string_view * fragment) const824 bool CordRepBtree::IsFlat(size_t offset, const size_t n,
825 absl::string_view* fragment) const {
826 assert(n <= this->length);
827 assert(offset <= this->length - n);
828 if (ABSL_PREDICT_FALSE(n == 0)) return false;
829 int height = this->height();
830 const CordRepBtree* node = this;
831 for (;;) {
832 const Position front = node->IndexOf(offset);
833 const CordRep* edge = node->Edge(front.index);
834 if (edge->length < front.n + n) return false;
835 if (--height < 0) {
836 if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
837 return true;
838 }
839 offset = front.n;
840 node = node->Edge(front.index)->btree();
841 }
842 }
843
GetCharacter(size_t offset) const844 char CordRepBtree::GetCharacter(size_t offset) const {
845 assert(offset < length);
846 const CordRepBtree* node = this;
847 int height = node->height();
848 for (;;) {
849 Position front = node->IndexOf(offset);
850 if (--height < 0) return node->Data(front.index)[front.n];
851 offset = front.n;
852 node = node->Edge(front.index)->btree();
853 }
854 }
855
GetAppendBufferSlow(size_t size)856 Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
857 // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
858 assert(height() >= 4);
859 assert(refcount.IsOne());
860
861 // Build a stack of nodes we may potentially need to update if we find a
862 // non-shared FLAT with capacity at the leaf level.
863 const int depth = height();
864 CordRepBtree* node = this;
865 CordRepBtree* stack[kMaxDepth];
866 for (int i = 0; i < depth; ++i) {
867 node = node->Edge(kBack)->btree();
868 if (!node->refcount.IsOne()) return {};
869 stack[i] = node;
870 }
871
872 // Must be a privately owned flat.
873 CordRep* const edge = node->Edge(kBack);
874 if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
875
876 // Must have capacity.
877 const size_t avail = edge->flat()->Capacity() - edge->length;
878 if (avail == 0) return {};
879
880 // Build span on remaining capacity.
881 size_t delta = (std::min)(size, avail);
882 Span<char> span = {edge->flat()->Data() + edge->length, delta};
883 edge->length += delta;
884 this->length += delta;
885 for (int i = 0; i < depth; ++i) {
886 stack[i]->length += delta;
887 }
888 return span;
889 }
890
CreateSlow(CordRep * rep)891 CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
892 if (rep->IsBtree()) return rep->btree();
893
894 CordRepBtree* node = nullptr;
895 auto consume = [&node](CordRep* r, size_t offset, size_t length) {
896 r = MakeSubstring(r, offset, length);
897 if (node == nullptr) {
898 node = New(r);
899 } else {
900 node = CordRepBtree::AddCordRep<kBack>(node, r);
901 }
902 };
903 Consume(rep, consume);
904 return node;
905 }
906
AppendSlow(CordRepBtree * tree,CordRep * rep)907 CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
908 if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
909 return MergeTrees(tree, rep->btree());
910 }
911 auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
912 r = MakeSubstring(r, offset, length);
913 tree = CordRepBtree::AddCordRep<kBack>(tree, r);
914 };
915 Consume(rep, consume);
916 return tree;
917 }
918
PrependSlow(CordRepBtree * tree,CordRep * rep)919 CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
920 if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
921 return MergeTrees(rep->btree(), tree);
922 }
923 auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
924 r = MakeSubstring(r, offset, length);
925 tree = CordRepBtree::AddCordRep<kFront>(tree, r);
926 };
927 ReverseConsume(rep, consume);
928 return tree;
929 }
930
Append(CordRepBtree * tree,absl::string_view data,size_t extra)931 CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
932 size_t extra) {
933 return CordRepBtree::AddData<kBack>(tree, data, extra);
934 }
935
Prepend(CordRepBtree * tree,absl::string_view data,size_t extra)936 CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
937 size_t extra) {
938 return CordRepBtree::AddData<kFront>(tree, data, extra);
939 }
940
941 template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
942 CordRep* rep);
943 template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
944 CordRep* rep);
945 template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
946 absl::string_view data,
947 size_t extra);
948 template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
949 absl::string_view data,
950 size_t extra);
951
952 } // namespace cord_internal
953 ABSL_NAMESPACE_END
954 } // namespace absl
955