1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/cord.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/resize_uninitialized.h"
40 #include "absl/strings/str_cat.h"
41 #include "absl/strings/str_format.h"
42 #include "absl/strings/str_join.h"
43 #include "absl/strings/string_view.h"
44
45 namespace absl {
46 ABSL_NAMESPACE_BEGIN
47
48 using ::absl::cord_internal::CordRep;
49 using ::absl::cord_internal::CordRepConcat;
50 using ::absl::cord_internal::CordRepExternal;
51 using ::absl::cord_internal::CordRepSubstring;
52
53 // Various representations that we allow
54 enum CordRepKind {
55 CONCAT = 0,
56 EXTERNAL = 1,
57 SUBSTRING = 2,
58
59 // We have different tags for different sized flat arrays,
60 // starting with FLAT
61 FLAT = 3,
62 };
63
64 namespace cord_internal {
65
concat()66 inline CordRepConcat* CordRep::concat() {
67 assert(tag == CONCAT);
68 return static_cast<CordRepConcat*>(this);
69 }
70
concat() const71 inline const CordRepConcat* CordRep::concat() const {
72 assert(tag == CONCAT);
73 return static_cast<const CordRepConcat*>(this);
74 }
75
substring()76 inline CordRepSubstring* CordRep::substring() {
77 assert(tag == SUBSTRING);
78 return static_cast<CordRepSubstring*>(this);
79 }
80
substring() const81 inline const CordRepSubstring* CordRep::substring() const {
82 assert(tag == SUBSTRING);
83 return static_cast<const CordRepSubstring*>(this);
84 }
85
external()86 inline CordRepExternal* CordRep::external() {
87 assert(tag == EXTERNAL);
88 return static_cast<CordRepExternal*>(this);
89 }
90
external() const91 inline const CordRepExternal* CordRep::external() const {
92 assert(tag == EXTERNAL);
93 return static_cast<const CordRepExternal*>(this);
94 }
95
96 } // namespace cord_internal
97
98 static const size_t kFlatOverhead = offsetof(CordRep, data);
99
100 // Largest and smallest flat node lengths we are willing to allocate
101 // Flat allocation size is stored in tag, which currently can encode sizes up
102 // to 4K, encoded as multiple of either 8 or 32 bytes.
103 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
104 static constexpr size_t kMaxFlatSize = 4096;
105 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
106 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
107
108 // Prefer copying blocks of at most this size, otherwise reference count.
109 static const size_t kMaxBytesToCopy = 511;
110
111 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)112 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)113 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
114
115 // Returns the size to the nearest equal or larger value that can be
116 // expressed exactly as a tag value.
RoundUpForTag(size_t size)117 static size_t RoundUpForTag(size_t size) {
118 return RoundUp(size, (size <= 1024) ? 8 : 32);
119 }
120
121 // Converts the allocated size to a tag, rounding down if the size
122 // does not exactly match a 'tag expressible' size value. The result is
123 // undefined if the size exceeds the maximum size that can be encoded in
124 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)125 static uint8_t AllocatedSizeToTag(size_t size) {
126 const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
127 assert(tag <= std::numeric_limits<uint8_t>::max());
128 return tag;
129 }
130
131 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)132 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
133 return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
134 }
135
136 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)137 static constexpr size_t TagToLength(uint8_t tag) {
138 return TagToAllocatedSize(tag) - kFlatOverhead;
139 }
140
141 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
142 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
143
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)144 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
145 return n == 0 ? a : Fibonacci(n - 1, b, a + b);
146 }
147
148 static_assert(Fibonacci(63) == 6557470319842,
149 "Fibonacci values computed incorrectly");
150
151 // Minimum length required for a given depth tree -- a tree is considered
152 // balanced if
153 // length(t) >= min_length[depth(t)]
154 // The root node depth is allowed to become twice as large to reduce rebalancing
155 // for larger strings (see IsRootBalanced).
156 static constexpr uint64_t min_length[] = {
157 Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5),
158 Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9),
159 Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13),
160 Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17),
161 Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21),
162 Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25),
163 Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29),
164 Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33),
165 Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37),
166 Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41),
167 Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45),
168 Fibonacci(46), Fibonacci(47),
169 0xffffffffffffffffull, // Avoid overflow
170 };
171
172 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
173
174 // The inlined size to use with absl::InlinedVector.
175 //
176 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
177 // the same value for their inlined size. The fact that they do is historical.
178 // It may be desirable for each to use a different inlined size optimized for
179 // that InlinedVector's usage.
180 //
181 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
182 // the inlined vector size (47 exists for backward compatibility).
183 static const int kInlinedVectorSize = 47;
184
IsRootBalanced(CordRep * node)185 static inline bool IsRootBalanced(CordRep* node) {
186 if (node->tag != CONCAT) {
187 return true;
188 } else if (node->concat()->depth() <= 15) {
189 return true;
190 } else if (node->concat()->depth() > kMinLengthSize) {
191 return false;
192 } else {
193 // Allow depth to become twice as large as implied by fibonacci rule to
194 // reduce rebalancing for larger strings.
195 return (node->length >= min_length[node->concat()->depth() / 2]);
196 }
197 }
198
199 static CordRep* Rebalance(CordRep* node);
200 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
201 static bool VerifyNode(CordRep* root, CordRep* start_node,
202 bool full_validation);
203
VerifyTree(CordRep * node)204 static inline CordRep* VerifyTree(CordRep* node) {
205 // Verification is expensive, so only do it in debug mode.
206 // Even in debug mode we normally do only light validation.
207 // If you are debugging Cord itself, you should define the
208 // macro EXTRA_CORD_VALIDATION, e.g. by adding
209 // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
210 #ifdef EXTRA_CORD_VALIDATION
211 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
212 #else // EXTRA_CORD_VALIDATION
213 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
214 #endif // EXTRA_CORD_VALIDATION
215 static_cast<void>(&VerifyNode);
216
217 return node;
218 }
219
220 // --------------------------------------------------------------------
221 // Memory management
222
Ref(CordRep * rep)223 inline CordRep* Ref(CordRep* rep) {
224 if (rep != nullptr) {
225 rep->refcount.Increment();
226 }
227 return rep;
228 }
229
230 // This internal routine is called from the cold path of Unref below. Keeping it
231 // in a separate routine allows good inlining of Unref into many profitable call
232 // sites. However, the call to this function can be highly disruptive to the
233 // register pressure in those callers. To minimize the cost to callers, we use
234 // a special LLVM calling convention that preserves most registers. This allows
235 // the call to this routine in cold paths to not disrupt the caller's register
236 // pressure. This calling convention is not available on all platforms; we
237 // intentionally allow LLVM to ignore the attribute rather than attempting to
238 // hardcode the list of supported platforms.
239 #if defined(__clang__) && !defined(__i386__)
240 #pragma clang diagnostic push
241 #pragma clang diagnostic ignored "-Wattributes"
242 __attribute__((preserve_most))
243 #pragma clang diagnostic pop
244 #endif
UnrefInternal(CordRep * rep)245 static void UnrefInternal(CordRep* rep) {
246 assert(rep != nullptr);
247
248 absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
249 while (true) {
250 if (rep->tag == CONCAT) {
251 CordRepConcat* rep_concat = rep->concat();
252 CordRep* right = rep_concat->right;
253 if (!right->refcount.Decrement()) {
254 pending.push_back(right);
255 }
256 CordRep* left = rep_concat->left;
257 delete rep_concat;
258 rep = nullptr;
259 if (!left->refcount.Decrement()) {
260 rep = left;
261 continue;
262 }
263 } else if (rep->tag == EXTERNAL) {
264 CordRepExternal* rep_external = rep->external();
265 rep_external->releaser_invoker(rep_external);
266 rep = nullptr;
267 } else if (rep->tag == SUBSTRING) {
268 CordRepSubstring* rep_substring = rep->substring();
269 CordRep* child = rep_substring->child;
270 delete rep_substring;
271 rep = nullptr;
272 if (!child->refcount.Decrement()) {
273 rep = child;
274 continue;
275 }
276 } else {
277 // Flat CordReps are allocated and constructed with raw ::operator new
278 // and placement new, and must be destructed and deallocated
279 // accordingly.
280 #if defined(__cpp_sized_deallocation)
281 size_t size = TagToAllocatedSize(rep->tag);
282 rep->~CordRep();
283 ::operator delete(rep, size);
284 #else
285 rep->~CordRep();
286 ::operator delete(rep);
287 #endif
288 rep = nullptr;
289 }
290
291 if (!pending.empty()) {
292 rep = pending.back();
293 pending.pop_back();
294 } else {
295 break;
296 }
297 }
298 }
299
Unref(CordRep * rep)300 inline void Unref(CordRep* rep) {
301 // Fast-path for two common, hot cases: a null rep and a shared root.
302 if (ABSL_PREDICT_TRUE(rep == nullptr ||
303 rep->refcount.DecrementExpectHighRefcount())) {
304 return;
305 }
306
307 UnrefInternal(rep);
308 }
309
310 // Return the depth of a node
Depth(const CordRep * rep)311 static int Depth(const CordRep* rep) {
312 if (rep->tag == CONCAT) {
313 return rep->concat()->depth();
314 } else {
315 return 0;
316 }
317 }
318
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)319 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
320 CordRep* right) {
321 concat->left = left;
322 concat->right = right;
323
324 concat->length = left->length + right->length;
325 concat->set_depth(1 + std::max(Depth(left), Depth(right)));
326 }
327
328 // Create a concatenation of the specified nodes.
329 // Does not change the refcounts of "left" and "right".
330 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)331 static CordRep* RawConcat(CordRep* left, CordRep* right) {
332 // Avoid making degenerate concat nodes (one child is empty)
333 if (left == nullptr || left->length == 0) {
334 Unref(left);
335 return right;
336 }
337 if (right == nullptr || right->length == 0) {
338 Unref(right);
339 return left;
340 }
341
342 CordRepConcat* rep = new CordRepConcat();
343 rep->tag = CONCAT;
344 SetConcatChildren(rep, left, right);
345
346 return rep;
347 }
348
Concat(CordRep * left,CordRep * right)349 static CordRep* Concat(CordRep* left, CordRep* right) {
350 CordRep* rep = RawConcat(left, right);
351 if (rep != nullptr && !IsRootBalanced(rep)) {
352 rep = Rebalance(rep);
353 }
354 return VerifyTree(rep);
355 }
356
357 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)358 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
359 // Make repeated passes over the array, merging adjacent pairs
360 // until we are left with just a single node.
361 while (n > 1) {
362 size_t dst = 0;
363 for (size_t src = 0; src < n; src += 2) {
364 if (src + 1 < n) {
365 reps[dst] = Concat(reps[src], reps[src + 1]);
366 } else {
367 reps[dst] = reps[src];
368 }
369 dst++;
370 }
371 n = dst;
372 }
373
374 return reps[0];
375 }
376
377 // Create a new flat node.
NewFlat(size_t length_hint)378 static CordRep* NewFlat(size_t length_hint) {
379 if (length_hint <= kMinFlatLength) {
380 length_hint = kMinFlatLength;
381 } else if (length_hint > kMaxFlatLength) {
382 length_hint = kMaxFlatLength;
383 }
384
385 // Round size up so it matches a size we can exactly express in a tag.
386 const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
387 void* const raw_rep = ::operator new(size);
388 CordRep* rep = new (raw_rep) CordRep();
389 rep->tag = AllocatedSizeToTag(size);
390 return VerifyTree(rep);
391 }
392
393 // Create a new tree out of the specified array.
394 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)395 static CordRep* NewTree(const char* data,
396 size_t length,
397 size_t alloc_hint) {
398 if (length == 0) return nullptr;
399 absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
400 size_t n = 0;
401 do {
402 const size_t len = std::min(length, kMaxFlatLength);
403 CordRep* rep = NewFlat(len + alloc_hint);
404 rep->length = len;
405 memcpy(rep->data, data, len);
406 reps[n++] = VerifyTree(rep);
407 data += len;
408 length -= len;
409 } while (length != 0);
410 return MakeBalancedTree(reps.data(), n);
411 }
412
413 namespace cord_internal {
414
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)415 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
416 assert(!data.empty());
417 rep->length = data.size();
418 rep->tag = EXTERNAL;
419 rep->base = data.data();
420 VerifyTree(rep);
421 }
422
423 } // namespace cord_internal
424
NewSubstring(CordRep * child,size_t offset,size_t length)425 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
426 // Never create empty substring nodes
427 if (length == 0) {
428 Unref(child);
429 return nullptr;
430 } else {
431 CordRepSubstring* rep = new CordRepSubstring();
432 assert((offset + length) <= child->length);
433 rep->length = length;
434 rep->tag = SUBSTRING;
435 rep->start = offset;
436 rep->child = child;
437 return VerifyTree(rep);
438 }
439 }
440
441 // --------------------------------------------------------------------
442 // Cord::InlineRep functions
443
444 constexpr unsigned char Cord::InlineRep::kMaxInline;
445
set_data(const char * data,size_t n,bool nullify_tail)446 inline void Cord::InlineRep::set_data(const char* data, size_t n,
447 bool nullify_tail) {
448 static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
449
450 cord_internal::SmallMemmove(data_, data, n, nullify_tail);
451 data_[kMaxInline] = static_cast<char>(n);
452 }
453
set_data(size_t n)454 inline char* Cord::InlineRep::set_data(size_t n) {
455 assert(n <= kMaxInline);
456 memset(data_, 0, sizeof(data_));
457 data_[kMaxInline] = static_cast<char>(n);
458 return data_;
459 }
460
force_tree(size_t extra_hint)461 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
462 size_t len = data_[kMaxInline];
463 CordRep* result;
464 if (len > kMaxInline) {
465 memcpy(&result, data_, sizeof(result));
466 } else {
467 result = NewFlat(len + extra_hint);
468 result->length = len;
469 memcpy(result->data, data_, len);
470 set_tree(result);
471 }
472 return result;
473 }
474
reduce_size(size_t n)475 inline void Cord::InlineRep::reduce_size(size_t n) {
476 size_t tag = data_[kMaxInline];
477 assert(tag <= kMaxInline);
478 assert(tag >= n);
479 tag -= n;
480 memset(data_ + tag, 0, n);
481 data_[kMaxInline] = static_cast<char>(tag);
482 }
483
remove_prefix(size_t n)484 inline void Cord::InlineRep::remove_prefix(size_t n) {
485 cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n);
486 reduce_size(n);
487 }
488
AppendTree(CordRep * tree)489 void Cord::InlineRep::AppendTree(CordRep* tree) {
490 if (tree == nullptr) return;
491 size_t len = data_[kMaxInline];
492 if (len == 0) {
493 set_tree(tree);
494 } else {
495 set_tree(Concat(force_tree(0), tree));
496 }
497 }
498
PrependTree(CordRep * tree)499 void Cord::InlineRep::PrependTree(CordRep* tree) {
500 assert(tree != nullptr);
501 size_t len = data_[kMaxInline];
502 if (len == 0) {
503 set_tree(tree);
504 } else {
505 set_tree(Concat(tree, force_tree(0)));
506 }
507 }
508
509 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
510 // suitable leaf is found, the function will update the length field for all
511 // nodes to account for the size increase. The append region address will be
512 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)513 static inline bool PrepareAppendRegion(CordRep* root, char** region,
514 size_t* size, size_t max_length) {
515 // Search down the right-hand path for a non-full FLAT node.
516 CordRep* dst = root;
517 while (dst->tag == CONCAT && dst->refcount.IsOne()) {
518 dst = dst->concat()->right;
519 }
520
521 if (dst->tag < FLAT || !dst->refcount.IsOne()) {
522 *region = nullptr;
523 *size = 0;
524 return false;
525 }
526
527 const size_t in_use = dst->length;
528 const size_t capacity = TagToLength(dst->tag);
529 if (in_use == capacity) {
530 *region = nullptr;
531 *size = 0;
532 return false;
533 }
534
535 size_t size_increase = std::min(capacity - in_use, max_length);
536
537 // We need to update the length fields for all nodes, including the leaf node.
538 for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
539 rep->length += size_increase;
540 }
541 dst->length += size_increase;
542
543 *region = dst->data + in_use;
544 *size = size_increase;
545 return true;
546 }
547
GetAppendRegion(char ** region,size_t * size,size_t max_length)548 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
549 size_t max_length) {
550 if (max_length == 0) {
551 *region = nullptr;
552 *size = 0;
553 return;
554 }
555
556 // Try to fit in the inline buffer if possible.
557 size_t inline_length = data_[kMaxInline];
558 if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
559 *region = data_ + inline_length;
560 *size = max_length;
561 data_[kMaxInline] = static_cast<char>(inline_length + max_length);
562 return;
563 }
564
565 CordRep* root = force_tree(max_length);
566
567 if (PrepareAppendRegion(root, region, size, max_length)) {
568 return;
569 }
570
571 // Allocate new node.
572 CordRep* new_node =
573 NewFlat(std::max(static_cast<size_t>(root->length), max_length));
574 new_node->length =
575 std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
576 *region = new_node->data;
577 *size = new_node->length;
578 replace_tree(Concat(root, new_node));
579 }
580
GetAppendRegion(char ** region,size_t * size)581 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
582 const size_t max_length = std::numeric_limits<size_t>::max();
583
584 // Try to fit in the inline buffer if possible.
585 size_t inline_length = data_[kMaxInline];
586 if (inline_length < kMaxInline) {
587 *region = data_ + inline_length;
588 *size = kMaxInline - inline_length;
589 data_[kMaxInline] = kMaxInline;
590 return;
591 }
592
593 CordRep* root = force_tree(max_length);
594
595 if (PrepareAppendRegion(root, region, size, max_length)) {
596 return;
597 }
598
599 // Allocate new node.
600 CordRep* new_node = NewFlat(root->length);
601 new_node->length = TagToLength(new_node->tag);
602 *region = new_node->data;
603 *size = new_node->length;
604 replace_tree(Concat(root, new_node));
605 }
606
607 // If the rep is a leaf, this will increment the value at total_mem_usage and
608 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)609 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
610 if (rep->tag >= FLAT) {
611 *total_mem_usage += TagToAllocatedSize(rep->tag);
612 return true;
613 }
614 if (rep->tag == EXTERNAL) {
615 *total_mem_usage += sizeof(CordRepConcat) + rep->length;
616 return true;
617 }
618 return false;
619 }
620
AssignSlow(const Cord::InlineRep & src)621 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
622 ClearSlow();
623
624 memcpy(data_, src.data_, sizeof(data_));
625 if (is_tree()) {
626 Ref(tree());
627 }
628 }
629
ClearSlow()630 void Cord::InlineRep::ClearSlow() {
631 if (is_tree()) {
632 Unref(tree());
633 }
634 memset(data_, 0, sizeof(data_));
635 }
636
637 // --------------------------------------------------------------------
638 // Constructors and destructors
639
Cord(const Cord & src)640 Cord::Cord(const Cord& src) : contents_(src.contents_) {
641 Ref(contents_.tree()); // Does nothing if contents_ has embedded data
642 }
643
Cord(absl::string_view src)644 Cord::Cord(absl::string_view src) {
645 const size_t n = src.size();
646 if (n <= InlineRep::kMaxInline) {
647 contents_.set_data(src.data(), n, false);
648 } else {
649 contents_.set_tree(NewTree(src.data(), n, 0));
650 }
651 }
652
653 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)654 Cord::Cord(T&& src) {
655 if (
656 // String is short: copy data to avoid external block overhead.
657 src.size() <= kMaxBytesToCopy ||
658 // String is wasteful: copy data to avoid pinning too much unused memory.
659 src.size() < src.capacity() / 2
660 ) {
661 if (src.size() <= InlineRep::kMaxInline) {
662 contents_.set_data(src.data(), src.size(), false);
663 } else {
664 contents_.set_tree(NewTree(src.data(), src.size(), 0));
665 }
666 } else {
667 struct StringReleaser {
668 void operator()(absl::string_view /* data */) {}
669 std::string data;
670 };
671 const absl::string_view original_data = src;
672 auto* rep = static_cast<
673 ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
674 absl::cord_internal::NewExternalRep(
675 original_data, StringReleaser{std::forward<T>(src)}));
676 // Moving src may have invalidated its data pointer, so adjust it.
677 rep->base = rep->template get<0>().data.data();
678 contents_.set_tree(rep);
679 }
680 }
681
682 template Cord::Cord(std::string&& src);
683
684 // The destruction code is separate so that the compiler can determine
685 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()686 void Cord::DestroyCordSlow() {
687 Unref(VerifyTree(contents_.tree()));
688 }
689
690 // --------------------------------------------------------------------
691 // Mutators
692
Clear()693 void Cord::Clear() {
694 Unref(contents_.clear());
695 }
696
operator =(absl::string_view src)697 Cord& Cord::operator=(absl::string_view src) {
698
699 const char* data = src.data();
700 size_t length = src.size();
701 CordRep* tree = contents_.tree();
702 if (length <= InlineRep::kMaxInline) {
703 // Embed into this->contents_
704 contents_.set_data(data, length, true);
705 Unref(tree);
706 return *this;
707 }
708 if (tree != nullptr && tree->tag >= FLAT &&
709 TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
710 // Copy in place if the existing FLAT node is reusable.
711 memmove(tree->data, data, length);
712 tree->length = length;
713 VerifyTree(tree);
714 return *this;
715 }
716 contents_.set_tree(NewTree(data, length, 0));
717 Unref(tree);
718 return *this;
719 }
720
721 template <typename T, Cord::EnableIfString<T>>
operator =(T && src)722 Cord& Cord::operator=(T&& src) {
723 if (src.size() <= kMaxBytesToCopy) {
724 *this = absl::string_view(src);
725 } else {
726 *this = Cord(std::forward<T>(src));
727 }
728 return *this;
729 }
730
731 template Cord& Cord::operator=(std::string&& src);
732
733 // TODO(sanjay): Move to Cord::InlineRep section of file. For now,
734 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)735 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
736 if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined.
737 // Try to fit in the inline buffer if possible.
738 size_t inline_length = data_[kMaxInline];
739 if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
740 // Append new data to embedded array
741 data_[kMaxInline] = static_cast<char>(inline_length + src_size);
742 memcpy(data_ + inline_length, src_data, src_size);
743 return;
744 }
745
746 CordRep* root = tree();
747
748 size_t appended = 0;
749 if (root) {
750 char* region;
751 if (PrepareAppendRegion(root, ®ion, &appended, src_size)) {
752 memcpy(region, src_data, appended);
753 }
754 } else {
755 // It is possible that src_data == data_, but when we transition from an
756 // InlineRep to a tree we need to assign data_ = root via set_tree. To
757 // avoid corrupting the source data before we copy it, delay calling
758 // set_tree until after we've copied data.
759 // We are going from an inline size to beyond inline size. Make the new size
760 // either double the inlined size, or the added size + 10%.
761 const size_t size1 = inline_length * 2 + src_size;
762 const size_t size2 = inline_length + src_size / 10;
763 root = NewFlat(std::max<size_t>(size1, size2));
764 appended = std::min(src_size, TagToLength(root->tag) - inline_length);
765 memcpy(root->data, data_, inline_length);
766 memcpy(root->data + inline_length, src_data, appended);
767 root->length = inline_length + appended;
768 set_tree(root);
769 }
770
771 src_data += appended;
772 src_size -= appended;
773 if (src_size == 0) {
774 return;
775 }
776
777 // Use new block(s) for any remaining bytes that were not handled above.
778 // Alloc extra memory only if the right child of the root of the new tree is
779 // going to be a FLAT node, which will permit further inplace appends.
780 size_t length = src_size;
781 if (src_size < kMaxFlatLength) {
782 // The new length is either
783 // - old size + 10%
784 // - old_size + src_size
785 // This will cause a reasonable conservative step-up in size that is still
786 // large enough to avoid excessive amounts of small fragments being added.
787 length = std::max<size_t>(root->length / 10, src_size);
788 }
789 set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
790 }
791
TakeRep() const792 inline CordRep* Cord::TakeRep() const& {
793 return Ref(contents_.tree());
794 }
795
TakeRep()796 inline CordRep* Cord::TakeRep() && {
797 CordRep* rep = contents_.tree();
798 contents_.clear();
799 return rep;
800 }
801
802 template <typename C>
AppendImpl(C && src)803 inline void Cord::AppendImpl(C&& src) {
804 if (empty()) {
805 // In case of an empty destination avoid allocating a new node, do not copy
806 // data.
807 *this = std::forward<C>(src);
808 return;
809 }
810
811 // For short cords, it is faster to copy data if there is room in dst.
812 const size_t src_size = src.contents_.size();
813 if (src_size <= kMaxBytesToCopy) {
814 CordRep* src_tree = src.contents_.tree();
815 if (src_tree == nullptr) {
816 // src has embedded data.
817 contents_.AppendArray(src.contents_.data(), src_size);
818 return;
819 }
820 if (src_tree->tag >= FLAT) {
821 // src tree just has one flat node.
822 contents_.AppendArray(src_tree->data, src_size);
823 return;
824 }
825 if (&src == this) {
826 // ChunkIterator below assumes that src is not modified during traversal.
827 Append(Cord(src));
828 return;
829 }
830 // TODO(mec): Should we only do this if "dst" has space?
831 for (absl::string_view chunk : src.Chunks()) {
832 Append(chunk);
833 }
834 return;
835 }
836
837 contents_.AppendTree(std::forward<C>(src).TakeRep());
838 }
839
Append(const Cord & src)840 void Cord::Append(const Cord& src) { AppendImpl(src); }
841
Append(Cord && src)842 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
843
844 template <typename T, Cord::EnableIfString<T>>
Append(T && src)845 void Cord::Append(T&& src) {
846 if (src.size() <= kMaxBytesToCopy) {
847 Append(absl::string_view(src));
848 } else {
849 Append(Cord(std::forward<T>(src)));
850 }
851 }
852
853 template void Cord::Append(std::string&& src);
854
Prepend(const Cord & src)855 void Cord::Prepend(const Cord& src) {
856 CordRep* src_tree = src.contents_.tree();
857 if (src_tree != nullptr) {
858 Ref(src_tree);
859 contents_.PrependTree(src_tree);
860 return;
861 }
862
863 // `src` cord is inlined.
864 absl::string_view src_contents(src.contents_.data(), src.contents_.size());
865 return Prepend(src_contents);
866 }
867
Prepend(absl::string_view src)868 void Cord::Prepend(absl::string_view src) {
869 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
870 size_t cur_size = contents_.size();
871 if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
872 // Use embedded storage.
873 char data[InlineRep::kMaxInline + 1] = {0};
874 data[InlineRep::kMaxInline] = cur_size + src.size(); // set size
875 memcpy(data, src.data(), src.size());
876 memcpy(data + src.size(), contents_.data(), cur_size);
877 memcpy(reinterpret_cast<void*>(&contents_), data,
878 InlineRep::kMaxInline + 1);
879 } else {
880 contents_.PrependTree(NewTree(src.data(), src.size(), 0));
881 }
882 }
883
884 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)885 inline void Cord::Prepend(T&& src) {
886 if (src.size() <= kMaxBytesToCopy) {
887 Prepend(absl::string_view(src));
888 } else {
889 Prepend(Cord(std::forward<T>(src)));
890 }
891 }
892
893 template void Cord::Prepend(std::string&& src);
894
RemovePrefixFrom(CordRep * node,size_t n)895 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
896 if (n >= node->length) return nullptr;
897 if (n == 0) return Ref(node);
898 absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
899
900 while (node->tag == CONCAT) {
901 assert(n <= node->length);
902 if (n < node->concat()->left->length) {
903 // Push right to stack, descend left.
904 rhs_stack.push_back(node->concat()->right);
905 node = node->concat()->left;
906 } else {
907 // Drop left, descend right.
908 n -= node->concat()->left->length;
909 node = node->concat()->right;
910 }
911 }
912 assert(n <= node->length);
913
914 if (n == 0) {
915 Ref(node);
916 } else {
917 size_t start = n;
918 size_t len = node->length - n;
919 if (node->tag == SUBSTRING) {
920 // Consider in-place update of node, similar to in RemoveSuffixFrom().
921 start += node->substring()->start;
922 node = node->substring()->child;
923 }
924 node = NewSubstring(Ref(node), start, len);
925 }
926 while (!rhs_stack.empty()) {
927 node = Concat(node, Ref(rhs_stack.back()));
928 rhs_stack.pop_back();
929 }
930 return node;
931 }
932
933 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
934 // exception that removing a suffix has an optimization where a node may be
935 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)936 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
937 if (n >= node->length) return nullptr;
938 if (n == 0) return Ref(node);
939 absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
940 bool inplace_ok = node->refcount.IsOne();
941
942 while (node->tag == CONCAT) {
943 assert(n <= node->length);
944 if (n < node->concat()->right->length) {
945 // Push left to stack, descend right.
946 lhs_stack.push_back(node->concat()->left);
947 node = node->concat()->right;
948 } else {
949 // Drop right, descend left.
950 n -= node->concat()->right->length;
951 node = node->concat()->left;
952 }
953 inplace_ok = inplace_ok && node->refcount.IsOne();
954 }
955 assert(n <= node->length);
956
957 if (n == 0) {
958 Ref(node);
959 } else if (inplace_ok && node->tag != EXTERNAL) {
960 // Consider making a new buffer if the current node capacity is much
961 // larger than the new length.
962 Ref(node);
963 node->length -= n;
964 } else {
965 size_t start = 0;
966 size_t len = node->length - n;
967 if (node->tag == SUBSTRING) {
968 start = node->substring()->start;
969 node = node->substring()->child;
970 }
971 node = NewSubstring(Ref(node), start, len);
972 }
973 while (!lhs_stack.empty()) {
974 node = Concat(Ref(lhs_stack.back()), node);
975 lhs_stack.pop_back();
976 }
977 return node;
978 }
979
RemovePrefix(size_t n)980 void Cord::RemovePrefix(size_t n) {
981 ABSL_INTERNAL_CHECK(n <= size(),
982 absl::StrCat("Requested prefix size ", n,
983 " exceeds Cord's size ", size()));
984 CordRep* tree = contents_.tree();
985 if (tree == nullptr) {
986 contents_.remove_prefix(n);
987 } else {
988 CordRep* newrep = RemovePrefixFrom(tree, n);
989 Unref(tree);
990 contents_.replace_tree(VerifyTree(newrep));
991 }
992 }
993
RemoveSuffix(size_t n)994 void Cord::RemoveSuffix(size_t n) {
995 ABSL_INTERNAL_CHECK(n <= size(),
996 absl::StrCat("Requested suffix size ", n,
997 " exceeds Cord's size ", size()));
998 CordRep* tree = contents_.tree();
999 if (tree == nullptr) {
1000 contents_.reduce_size(n);
1001 } else {
1002 CordRep* newrep = RemoveSuffixFrom(tree, n);
1003 Unref(tree);
1004 contents_.replace_tree(VerifyTree(newrep));
1005 }
1006 }
1007
1008 // Work item for NewSubRange().
1009 struct SubRange {
SubRangeabsl::SubRange1010 SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1011 : node(a_node), pos(a_pos), n(a_n) {}
1012 CordRep* node; // nullptr means concat last 2 results.
1013 size_t pos;
1014 size_t n;
1015 };
1016
NewSubRange(CordRep * node,size_t pos,size_t n)1017 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1018 absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1019 absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1020 todo.push_back(SubRange(node, pos, n));
1021 do {
1022 const SubRange& sr = todo.back();
1023 node = sr.node;
1024 pos = sr.pos;
1025 n = sr.n;
1026 todo.pop_back();
1027
1028 if (node == nullptr) {
1029 assert(results.size() >= 2);
1030 CordRep* right = results.back();
1031 results.pop_back();
1032 CordRep* left = results.back();
1033 results.pop_back();
1034 results.push_back(Concat(left, right));
1035 } else if (pos == 0 && n == node->length) {
1036 results.push_back(Ref(node));
1037 } else if (node->tag != CONCAT) {
1038 if (node->tag == SUBSTRING) {
1039 pos += node->substring()->start;
1040 node = node->substring()->child;
1041 }
1042 results.push_back(NewSubstring(Ref(node), pos, n));
1043 } else if (pos + n <= node->concat()->left->length) {
1044 todo.push_back(SubRange(node->concat()->left, pos, n));
1045 } else if (pos >= node->concat()->left->length) {
1046 pos -= node->concat()->left->length;
1047 todo.push_back(SubRange(node->concat()->right, pos, n));
1048 } else {
1049 size_t left_n = node->concat()->left->length - pos;
1050 todo.push_back(SubRange(nullptr, 0, 0)); // Concat()
1051 todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1052 todo.push_back(SubRange(node->concat()->left, pos, left_n));
1053 }
1054 } while (!todo.empty());
1055 assert(results.size() == 1);
1056 return results[0];
1057 }
1058
Subcord(size_t pos,size_t new_size) const1059 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1060 Cord sub_cord;
1061 size_t length = size();
1062 if (pos > length) pos = length;
1063 if (new_size > length - pos) new_size = length - pos;
1064 CordRep* tree = contents_.tree();
1065 if (tree == nullptr) {
1066 // sub_cord is newly constructed, no need to re-zero-out the tail of
1067 // contents_ memory.
1068 sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1069 } else if (new_size == 0) {
1070 // We want to return empty subcord, so nothing to do.
1071 } else if (new_size <= InlineRep::kMaxInline) {
1072 Cord::ChunkIterator it = chunk_begin();
1073 it.AdvanceBytes(pos);
1074 char* dest = sub_cord.contents_.data_;
1075 size_t remaining_size = new_size;
1076 while (remaining_size > it->size()) {
1077 cord_internal::SmallMemmove(dest, it->data(), it->size());
1078 remaining_size -= it->size();
1079 dest += it->size();
1080 ++it;
1081 }
1082 cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1083 sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size;
1084 } else {
1085 sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1086 }
1087 return sub_cord;
1088 }
1089
1090 // --------------------------------------------------------------------
1091 // Balancing
1092
1093 class CordForest {
1094 public:
CordForest(size_t length)1095 explicit CordForest(size_t length)
1096 : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1097
Build(CordRep * cord_root)1098 void Build(CordRep* cord_root) {
1099 std::vector<CordRep*> pending = {cord_root};
1100
1101 while (!pending.empty()) {
1102 CordRep* node = pending.back();
1103 pending.pop_back();
1104 CheckNode(node);
1105 if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1106 AddNode(node);
1107 continue;
1108 }
1109
1110 CordRepConcat* concat_node = node->concat();
1111 if (concat_node->depth() >= kMinLengthSize ||
1112 concat_node->length < min_length[concat_node->depth()]) {
1113 pending.push_back(concat_node->right);
1114 pending.push_back(concat_node->left);
1115
1116 if (concat_node->refcount.IsOne()) {
1117 concat_node->left = concat_freelist_;
1118 concat_freelist_ = concat_node;
1119 } else {
1120 Ref(concat_node->right);
1121 Ref(concat_node->left);
1122 Unref(concat_node);
1123 }
1124 } else {
1125 AddNode(node);
1126 }
1127 }
1128 }
1129
ConcatNodes()1130 CordRep* ConcatNodes() {
1131 CordRep* sum = nullptr;
1132 for (auto* node : trees_) {
1133 if (node == nullptr) continue;
1134
1135 sum = PrependNode(node, sum);
1136 root_length_ -= node->length;
1137 if (root_length_ == 0) break;
1138 }
1139 ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1140 return VerifyTree(sum);
1141 }
1142
1143 private:
AppendNode(CordRep * node,CordRep * sum)1144 CordRep* AppendNode(CordRep* node, CordRep* sum) {
1145 return (sum == nullptr) ? node : MakeConcat(sum, node);
1146 }
1147
PrependNode(CordRep * node,CordRep * sum)1148 CordRep* PrependNode(CordRep* node, CordRep* sum) {
1149 return (sum == nullptr) ? node : MakeConcat(node, sum);
1150 }
1151
AddNode(CordRep * node)1152 void AddNode(CordRep* node) {
1153 CordRep* sum = nullptr;
1154
1155 // Collect together everything with which we will merge with node
1156 int i = 0;
1157 for (; node->length > min_length[i + 1]; ++i) {
1158 auto& tree_at_i = trees_[i];
1159
1160 if (tree_at_i == nullptr) continue;
1161 sum = PrependNode(tree_at_i, sum);
1162 tree_at_i = nullptr;
1163 }
1164
1165 sum = AppendNode(node, sum);
1166
1167 // Insert sum into appropriate place in the forest
1168 for (; sum->length >= min_length[i]; ++i) {
1169 auto& tree_at_i = trees_[i];
1170 if (tree_at_i == nullptr) continue;
1171
1172 sum = MakeConcat(tree_at_i, sum);
1173 tree_at_i = nullptr;
1174 }
1175
1176 // min_length[0] == 1, which means sum->length >= min_length[0]
1177 assert(i > 0);
1178 trees_[i - 1] = sum;
1179 }
1180
1181 // Make concat node trying to resue existing CordRepConcat nodes we
1182 // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1183 CordRep* MakeConcat(CordRep* left, CordRep* right) {
1184 if (concat_freelist_ == nullptr) return RawConcat(left, right);
1185
1186 CordRepConcat* rep = concat_freelist_;
1187 if (concat_freelist_->left == nullptr) {
1188 concat_freelist_ = nullptr;
1189 } else {
1190 concat_freelist_ = concat_freelist_->left->concat();
1191 }
1192 SetConcatChildren(rep, left, right);
1193
1194 return rep;
1195 }
1196
CheckNode(CordRep * node)1197 static void CheckNode(CordRep* node) {
1198 ABSL_INTERNAL_CHECK(node->length != 0u, "");
1199 if (node->tag == CONCAT) {
1200 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1201 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1202 ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1203 node->concat()->right->length),
1204 "");
1205 }
1206 }
1207
1208 size_t root_length_;
1209
1210 // use an inlined vector instead of a flat array to get bounds checking
1211 absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1212
1213 // List of concat nodes we can re-use for Cord balancing.
1214 CordRepConcat* concat_freelist_ = nullptr;
1215 };
1216
Rebalance(CordRep * node)1217 static CordRep* Rebalance(CordRep* node) {
1218 VerifyTree(node);
1219 assert(node->tag == CONCAT);
1220
1221 if (node->length == 0) {
1222 return nullptr;
1223 }
1224
1225 CordForest forest(node->length);
1226 forest.Build(node);
1227 return forest.ConcatNodes();
1228 }
1229
1230 // --------------------------------------------------------------------
1231 // Comparators
1232
1233 namespace {
1234
ClampResult(int memcmp_res)1235 int ClampResult(int memcmp_res) {
1236 return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1237 }
1238
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1239 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1240 size_t* size_to_compare) {
1241 size_t compared_size = std::min(lhs->size(), rhs->size());
1242 assert(*size_to_compare >= compared_size);
1243 *size_to_compare -= compared_size;
1244
1245 int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1246 if (memcmp_res != 0) return memcmp_res;
1247
1248 lhs->remove_prefix(compared_size);
1249 rhs->remove_prefix(compared_size);
1250
1251 return 0;
1252 }
1253
1254 // This overload set computes comparison results from memcmp result. This
1255 // interface is used inside GenericCompare below. Differet implementations
1256 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1257 // set. For bool we just interested in "value == 0".
1258 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1259 ResultType ComputeCompareResult(int memcmp_res) {
1260 return ClampResult(memcmp_res);
1261 }
1262 template <>
ComputeCompareResult(int memcmp_res)1263 bool ComputeCompareResult<bool>(int memcmp_res) {
1264 return memcmp_res == 0;
1265 }
1266
1267 } // namespace
1268
1269 // Helper routine. Locates the first flat chunk of the Cord without
1270 // initializing the iterator.
FindFlatStartPiece() const1271 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1272 size_t n = data_[kMaxInline];
1273 if (n <= kMaxInline) {
1274 return absl::string_view(data_, n);
1275 }
1276
1277 CordRep* node = tree();
1278 if (node->tag >= FLAT) {
1279 return absl::string_view(node->data, node->length);
1280 }
1281
1282 if (node->tag == EXTERNAL) {
1283 return absl::string_view(node->external()->base, node->length);
1284 }
1285
1286 // Walk down the left branches until we hit a non-CONCAT node.
1287 while (node->tag == CONCAT) {
1288 node = node->concat()->left;
1289 }
1290
1291 // Get the child node if we encounter a SUBSTRING.
1292 size_t offset = 0;
1293 size_t length = node->length;
1294 assert(length != 0);
1295
1296 if (node->tag == SUBSTRING) {
1297 offset = node->substring()->start;
1298 node = node->substring()->child;
1299 }
1300
1301 if (node->tag >= FLAT) {
1302 return absl::string_view(node->data + offset, length);
1303 }
1304
1305 assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1306
1307 return absl::string_view(node->external()->base + offset, length);
1308 }
1309
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1310 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1311 size_t size_to_compare) const {
1312 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1313 if (!chunk->empty()) return true;
1314 ++*it;
1315 if (it->bytes_remaining_ == 0) return false;
1316 *chunk = **it;
1317 return true;
1318 };
1319
1320 Cord::ChunkIterator lhs_it = chunk_begin();
1321
1322 // compared_size is inside first chunk.
1323 absl::string_view lhs_chunk =
1324 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1325 assert(compared_size <= lhs_chunk.size());
1326 assert(compared_size <= rhs.size());
1327 lhs_chunk.remove_prefix(compared_size);
1328 rhs.remove_prefix(compared_size);
1329 size_to_compare -= compared_size; // skip already compared size.
1330
1331 while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1332 int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1333 if (comparison_result != 0) return comparison_result;
1334 if (size_to_compare == 0) return 0;
1335 }
1336
1337 return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1338 }
1339
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1340 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1341 size_t size_to_compare) const {
1342 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1343 if (!chunk->empty()) return true;
1344 ++*it;
1345 if (it->bytes_remaining_ == 0) return false;
1346 *chunk = **it;
1347 return true;
1348 };
1349
1350 Cord::ChunkIterator lhs_it = chunk_begin();
1351 Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1352
1353 // compared_size is inside both first chunks.
1354 absl::string_view lhs_chunk =
1355 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1356 absl::string_view rhs_chunk =
1357 (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1358 assert(compared_size <= lhs_chunk.size());
1359 assert(compared_size <= rhs_chunk.size());
1360 lhs_chunk.remove_prefix(compared_size);
1361 rhs_chunk.remove_prefix(compared_size);
1362 size_to_compare -= compared_size; // skip already compared size.
1363
1364 while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1365 int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1366 if (memcmp_res != 0) return memcmp_res;
1367 if (size_to_compare == 0) return 0;
1368 }
1369
1370 return static_cast<int>(rhs_chunk.empty()) -
1371 static_cast<int>(lhs_chunk.empty());
1372 }
1373
GetFirstChunk(const Cord & c)1374 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1375 return c.contents_.FindFlatStartPiece();
1376 }
GetFirstChunk(absl::string_view sv)1377 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1378 return sv;
1379 }
1380
1381 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1382 // that 'size_to_compare' is greater that size of smallest of first chunks.
1383 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1384 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1385 size_t size_to_compare) {
1386 absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1387 absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1388
1389 size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1390 assert(size_to_compare >= compared_size);
1391 int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1392 if (compared_size == size_to_compare || memcmp_res != 0) {
1393 return ComputeCompareResult<ResultType>(memcmp_res);
1394 }
1395
1396 return ComputeCompareResult<ResultType>(
1397 lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1398 }
1399
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1400 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1401 return GenericCompare<bool>(*this, rhs, size_to_compare);
1402 }
1403
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1404 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1405 return GenericCompare<bool>(*this, rhs, size_to_compare);
1406 }
1407
1408 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1409 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1410 size_t lhs_size = lhs.size();
1411 size_t rhs_size = rhs.size();
1412 if (lhs_size == rhs_size) {
1413 return GenericCompare<int>(lhs, rhs, lhs_size);
1414 }
1415 if (lhs_size < rhs_size) {
1416 auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1417 return data_comp_res == 0 ? -1 : data_comp_res;
1418 }
1419
1420 auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1421 return data_comp_res == 0 ? +1 : data_comp_res;
1422 }
1423
Compare(absl::string_view rhs) const1424 int Cord::Compare(absl::string_view rhs) const {
1425 return SharedCompareImpl(*this, rhs);
1426 }
1427
CompareImpl(const Cord & rhs) const1428 int Cord::CompareImpl(const Cord& rhs) const {
1429 return SharedCompareImpl(*this, rhs);
1430 }
1431
EndsWith(absl::string_view rhs) const1432 bool Cord::EndsWith(absl::string_view rhs) const {
1433 size_t my_size = size();
1434 size_t rhs_size = rhs.size();
1435
1436 if (my_size < rhs_size) return false;
1437
1438 Cord tmp(*this);
1439 tmp.RemovePrefix(my_size - rhs_size);
1440 return tmp.EqualsImpl(rhs, rhs_size);
1441 }
1442
EndsWith(const Cord & rhs) const1443 bool Cord::EndsWith(const Cord& rhs) const {
1444 size_t my_size = size();
1445 size_t rhs_size = rhs.size();
1446
1447 if (my_size < rhs_size) return false;
1448
1449 Cord tmp(*this);
1450 tmp.RemovePrefix(my_size - rhs_size);
1451 return tmp.EqualsImpl(rhs, rhs_size);
1452 }
1453
1454 // --------------------------------------------------------------------
1455 // Misc.
1456
operator std::string() const1457 Cord::operator std::string() const {
1458 std::string s;
1459 absl::CopyCordToString(*this, &s);
1460 return s;
1461 }
1462
CopyCordToString(const Cord & src,std::string * dst)1463 void CopyCordToString(const Cord& src, std::string* dst) {
1464 if (!src.contents_.is_tree()) {
1465 src.contents_.CopyTo(dst);
1466 } else {
1467 absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1468 src.CopyToArraySlowPath(&(*dst)[0]);
1469 }
1470 }
1471
CopyToArraySlowPath(char * dst) const1472 void Cord::CopyToArraySlowPath(char* dst) const {
1473 assert(contents_.is_tree());
1474 absl::string_view fragment;
1475 if (GetFlatAux(contents_.tree(), &fragment)) {
1476 memcpy(dst, fragment.data(), fragment.size());
1477 return;
1478 }
1479 for (absl::string_view chunk : Chunks()) {
1480 memcpy(dst, chunk.data(), chunk.size());
1481 dst += chunk.size();
1482 }
1483 }
1484
operator ++()1485 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1486 ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 &&
1487 "Attempted to iterate past `end()`");
1488 assert(bytes_remaining_ >= current_chunk_.size());
1489 bytes_remaining_ -= current_chunk_.size();
1490
1491 if (stack_of_right_children_.empty()) {
1492 assert(!current_chunk_.empty()); // Called on invalid iterator.
1493 // We have reached the end of the Cord.
1494 return *this;
1495 }
1496
1497 // Process the next node on the stack.
1498 CordRep* node = stack_of_right_children_.back();
1499 stack_of_right_children_.pop_back();
1500
1501 // Walk down the left branches until we hit a non-CONCAT node. Save the
1502 // right children to the stack for subsequent traversal.
1503 while (node->tag == CONCAT) {
1504 stack_of_right_children_.push_back(node->concat()->right);
1505 node = node->concat()->left;
1506 }
1507
1508 // Get the child node if we encounter a SUBSTRING.
1509 size_t offset = 0;
1510 size_t length = node->length;
1511 if (node->tag == SUBSTRING) {
1512 offset = node->substring()->start;
1513 node = node->substring()->child;
1514 }
1515
1516 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1517 assert(length != 0);
1518 const char* data =
1519 node->tag == EXTERNAL ? node->external()->base : node->data;
1520 current_chunk_ = absl::string_view(data + offset, length);
1521 current_leaf_ = node;
1522 return *this;
1523 }
1524
AdvanceAndReadBytes(size_t n)1525 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1526 ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1527 "Attempted to iterate past `end()`");
1528 Cord subcord;
1529
1530 if (n <= InlineRep::kMaxInline) {
1531 // Range to read fits in inline data. Flatten it.
1532 char* data = subcord.contents_.set_data(n);
1533 while (n > current_chunk_.size()) {
1534 memcpy(data, current_chunk_.data(), current_chunk_.size());
1535 data += current_chunk_.size();
1536 n -= current_chunk_.size();
1537 ++*this;
1538 }
1539 memcpy(data, current_chunk_.data(), n);
1540 if (n < current_chunk_.size()) {
1541 RemoveChunkPrefix(n);
1542 } else if (n > 0) {
1543 ++*this;
1544 }
1545 return subcord;
1546 }
1547 if (n < current_chunk_.size()) {
1548 // Range to read is a proper subrange of the current chunk.
1549 assert(current_leaf_ != nullptr);
1550 CordRep* subnode = Ref(current_leaf_);
1551 const char* data =
1552 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1553 subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1554 subcord.contents_.set_tree(VerifyTree(subnode));
1555 RemoveChunkPrefix(n);
1556 return subcord;
1557 }
1558
1559 // Range to read begins with a proper subrange of the current chunk.
1560 assert(!current_chunk_.empty());
1561 assert(current_leaf_ != nullptr);
1562 CordRep* subnode = Ref(current_leaf_);
1563 if (current_chunk_.size() < subnode->length) {
1564 const char* data =
1565 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1566 subnode = NewSubstring(subnode, current_chunk_.data() - data,
1567 current_chunk_.size());
1568 }
1569 n -= current_chunk_.size();
1570 bytes_remaining_ -= current_chunk_.size();
1571
1572 // Process the next node(s) on the stack, reading whole subtrees depending on
1573 // their length and how many bytes we are advancing.
1574 CordRep* node = nullptr;
1575 while (!stack_of_right_children_.empty()) {
1576 node = stack_of_right_children_.back();
1577 stack_of_right_children_.pop_back();
1578 if (node->length > n) break;
1579 // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1580 // Avoiding that would need pretty complicated logic (instead of
1581 // current_leaf_, keep current_subtree_ which points to the highest node
1582 // such that the current leaf can be found on the path of left children
1583 // starting from current_subtree_; delay creating subnode while node is
1584 // below current_subtree_; find the proper node along the path of left
1585 // children starting from current_subtree_ if this loop exits while staying
1586 // below current_subtree_; etc.; alternatively, push parents instead of
1587 // right children on the stack).
1588 subnode = Concat(subnode, Ref(node));
1589 n -= node->length;
1590 bytes_remaining_ -= node->length;
1591 node = nullptr;
1592 }
1593
1594 if (node == nullptr) {
1595 // We have reached the end of the Cord.
1596 assert(bytes_remaining_ == 0);
1597 subcord.contents_.set_tree(VerifyTree(subnode));
1598 return subcord;
1599 }
1600
1601 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1602 // right children to the stack for subsequent traversal.
1603 while (node->tag == CONCAT) {
1604 if (node->concat()->left->length > n) {
1605 // Push right, descend left.
1606 stack_of_right_children_.push_back(node->concat()->right);
1607 node = node->concat()->left;
1608 } else {
1609 // Read left, descend right.
1610 subnode = Concat(subnode, Ref(node->concat()->left));
1611 n -= node->concat()->left->length;
1612 bytes_remaining_ -= node->concat()->left->length;
1613 node = node->concat()->right;
1614 }
1615 }
1616
1617 // Get the child node if we encounter a SUBSTRING.
1618 size_t offset = 0;
1619 size_t length = node->length;
1620 if (node->tag == SUBSTRING) {
1621 offset = node->substring()->start;
1622 node = node->substring()->child;
1623 }
1624
1625 // Range to read ends with a proper (possibly empty) subrange of the current
1626 // chunk.
1627 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1628 assert(length > n);
1629 if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1630 const char* data =
1631 node->tag == EXTERNAL ? node->external()->base : node->data;
1632 current_chunk_ = absl::string_view(data + offset + n, length - n);
1633 current_leaf_ = node;
1634 bytes_remaining_ -= n;
1635 subcord.contents_.set_tree(VerifyTree(subnode));
1636 return subcord;
1637 }
1638
AdvanceBytesSlowPath(size_t n)1639 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1640 assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1641 assert(n >= current_chunk_.size()); // This should only be called when
1642 // iterating to a new node.
1643
1644 n -= current_chunk_.size();
1645 bytes_remaining_ -= current_chunk_.size();
1646
1647 // Process the next node(s) on the stack, skipping whole subtrees depending on
1648 // their length and how many bytes we are advancing.
1649 CordRep* node = nullptr;
1650 while (!stack_of_right_children_.empty()) {
1651 node = stack_of_right_children_.back();
1652 stack_of_right_children_.pop_back();
1653 if (node->length > n) break;
1654 n -= node->length;
1655 bytes_remaining_ -= node->length;
1656 node = nullptr;
1657 }
1658
1659 if (node == nullptr) {
1660 // We have reached the end of the Cord.
1661 assert(bytes_remaining_ == 0);
1662 return;
1663 }
1664
1665 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1666 // right children to the stack for subsequent traversal.
1667 while (node->tag == CONCAT) {
1668 if (node->concat()->left->length > n) {
1669 // Push right, descend left.
1670 stack_of_right_children_.push_back(node->concat()->right);
1671 node = node->concat()->left;
1672 } else {
1673 // Skip left, descend right.
1674 n -= node->concat()->left->length;
1675 bytes_remaining_ -= node->concat()->left->length;
1676 node = node->concat()->right;
1677 }
1678 }
1679
1680 // Get the child node if we encounter a SUBSTRING.
1681 size_t offset = 0;
1682 size_t length = node->length;
1683 if (node->tag == SUBSTRING) {
1684 offset = node->substring()->start;
1685 node = node->substring()->child;
1686 }
1687
1688 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1689 assert(length > n);
1690 const char* data =
1691 node->tag == EXTERNAL ? node->external()->base : node->data;
1692 current_chunk_ = absl::string_view(data + offset + n, length - n);
1693 current_leaf_ = node;
1694 bytes_remaining_ -= n;
1695 }
1696
operator [](size_t i) const1697 char Cord::operator[](size_t i) const {
1698 ABSL_HARDENING_ASSERT(i < size());
1699 size_t offset = i;
1700 const CordRep* rep = contents_.tree();
1701 if (rep == nullptr) {
1702 return contents_.data()[i];
1703 }
1704 while (true) {
1705 assert(rep != nullptr);
1706 assert(offset < rep->length);
1707 if (rep->tag >= FLAT) {
1708 // Get the "i"th character directly from the flat array.
1709 return rep->data[offset];
1710 } else if (rep->tag == EXTERNAL) {
1711 // Get the "i"th character from the external array.
1712 return rep->external()->base[offset];
1713 } else if (rep->tag == CONCAT) {
1714 // Recursively branch to the side of the concatenation that the "i"th
1715 // character is on.
1716 size_t left_length = rep->concat()->left->length;
1717 if (offset < left_length) {
1718 rep = rep->concat()->left;
1719 } else {
1720 offset -= left_length;
1721 rep = rep->concat()->right;
1722 }
1723 } else {
1724 // This must be a substring a node, so bypass it to get to the child.
1725 assert(rep->tag == SUBSTRING);
1726 offset += rep->substring()->start;
1727 rep = rep->substring()->child;
1728 }
1729 }
1730 }
1731
FlattenSlowPath()1732 absl::string_view Cord::FlattenSlowPath() {
1733 size_t total_size = size();
1734 CordRep* new_rep;
1735 char* new_buffer;
1736
1737 // Try to put the contents into a new flat rep. If they won't fit in the
1738 // biggest possible flat node, use an external rep instead.
1739 if (total_size <= kMaxFlatLength) {
1740 new_rep = NewFlat(total_size);
1741 new_rep->length = total_size;
1742 new_buffer = new_rep->data;
1743 CopyToArraySlowPath(new_buffer);
1744 } else {
1745 new_buffer = std::allocator<char>().allocate(total_size);
1746 CopyToArraySlowPath(new_buffer);
1747 new_rep = absl::cord_internal::NewExternalRep(
1748 absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1749 std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1750 s.size());
1751 });
1752 }
1753 Unref(contents_.tree());
1754 contents_.set_tree(new_rep);
1755 return absl::string_view(new_buffer, total_size);
1756 }
1757
GetFlatAux(CordRep * rep,absl::string_view * fragment)1758 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1759 assert(rep != nullptr);
1760 if (rep->tag >= FLAT) {
1761 *fragment = absl::string_view(rep->data, rep->length);
1762 return true;
1763 } else if (rep->tag == EXTERNAL) {
1764 *fragment = absl::string_view(rep->external()->base, rep->length);
1765 return true;
1766 } else if (rep->tag == SUBSTRING) {
1767 CordRep* child = rep->substring()->child;
1768 if (child->tag >= FLAT) {
1769 *fragment =
1770 absl::string_view(child->data + rep->substring()->start, rep->length);
1771 return true;
1772 } else if (child->tag == EXTERNAL) {
1773 *fragment = absl::string_view(
1774 child->external()->base + rep->substring()->start, rep->length);
1775 return true;
1776 }
1777 }
1778 return false;
1779 }
1780
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1781 /* static */ void Cord::ForEachChunkAux(
1782 absl::cord_internal::CordRep* rep,
1783 absl::FunctionRef<void(absl::string_view)> callback) {
1784 assert(rep != nullptr);
1785 int stack_pos = 0;
1786 constexpr int stack_max = 128;
1787 // Stack of right branches for tree traversal
1788 absl::cord_internal::CordRep* stack[stack_max];
1789 absl::cord_internal::CordRep* current_node = rep;
1790 while (true) {
1791 if (current_node->tag == CONCAT) {
1792 if (stack_pos == stack_max) {
1793 // There's no more room on our stack array to add another right branch,
1794 // and the idea is to avoid allocations, so call this function
1795 // recursively to navigate this subtree further. (This is not something
1796 // we expect to happen in practice).
1797 ForEachChunkAux(current_node, callback);
1798
1799 // Pop the next right branch and iterate.
1800 current_node = stack[--stack_pos];
1801 continue;
1802 } else {
1803 // Save the right branch for later traversal and continue down the left
1804 // branch.
1805 stack[stack_pos++] = current_node->concat()->right;
1806 current_node = current_node->concat()->left;
1807 continue;
1808 }
1809 }
1810 // This is a leaf node, so invoke our callback.
1811 absl::string_view chunk;
1812 bool success = GetFlatAux(current_node, &chunk);
1813 assert(success);
1814 if (success) {
1815 callback(chunk);
1816 }
1817 if (stack_pos == 0) {
1818 // end of traversal
1819 return;
1820 }
1821 current_node = stack[--stack_pos];
1822 }
1823 }
1824
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1825 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1826 const int kIndentStep = 1;
1827 int indent = 0;
1828 absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1829 absl::InlinedVector<int, kInlinedVectorSize> indents;
1830 for (;;) {
1831 *os << std::setw(3) << rep->refcount.Get();
1832 *os << " " << std::setw(7) << rep->length;
1833 *os << " [";
1834 if (include_data) *os << static_cast<void*>(rep);
1835 *os << "]";
1836 *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1837 *os << " " << std::setw(indent) << "";
1838 if (rep->tag == CONCAT) {
1839 *os << "CONCAT depth=" << Depth(rep) << "\n";
1840 indent += kIndentStep;
1841 indents.push_back(indent);
1842 stack.push_back(rep->concat()->right);
1843 rep = rep->concat()->left;
1844 } else if (rep->tag == SUBSTRING) {
1845 *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1846 indent += kIndentStep;
1847 rep = rep->substring()->child;
1848 } else { // Leaf
1849 if (rep->tag == EXTERNAL) {
1850 *os << "EXTERNAL [";
1851 if (include_data)
1852 *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1853 *os << "]\n";
1854 } else {
1855 *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1856 if (include_data)
1857 *os << absl::CEscape(std::string(rep->data, rep->length));
1858 *os << "]\n";
1859 }
1860 if (stack.empty()) break;
1861 rep = stack.back();
1862 stack.pop_back();
1863 indent = indents.back();
1864 indents.pop_back();
1865 }
1866 }
1867 ABSL_INTERNAL_CHECK(indents.empty(), "");
1868 }
1869
ReportError(CordRep * root,CordRep * node)1870 static std::string ReportError(CordRep* root, CordRep* node) {
1871 std::ostringstream buf;
1872 buf << "Error at node " << node << " in:";
1873 DumpNode(root, true, &buf);
1874 return buf.str();
1875 }
1876
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1877 static bool VerifyNode(CordRep* root, CordRep* start_node,
1878 bool full_validation) {
1879 absl::InlinedVector<CordRep*, 2> worklist;
1880 worklist.push_back(start_node);
1881 do {
1882 CordRep* node = worklist.back();
1883 worklist.pop_back();
1884
1885 ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1886 if (node != root) {
1887 ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1888 }
1889
1890 if (node->tag == CONCAT) {
1891 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1892 ReportError(root, node));
1893 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1894 ReportError(root, node));
1895 ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1896 node->concat()->right->length),
1897 ReportError(root, node));
1898 if (full_validation) {
1899 worklist.push_back(node->concat()->right);
1900 worklist.push_back(node->concat()->left);
1901 }
1902 } else if (node->tag >= FLAT) {
1903 ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1904 ReportError(root, node));
1905 } else if (node->tag == EXTERNAL) {
1906 ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1907 ReportError(root, node));
1908 } else if (node->tag == SUBSTRING) {
1909 ABSL_INTERNAL_CHECK(
1910 node->substring()->start < node->substring()->child->length,
1911 ReportError(root, node));
1912 ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1913 node->substring()->child->length,
1914 ReportError(root, node));
1915 }
1916 } while (!worklist.empty());
1917 return true;
1918 }
1919
1920 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1921 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1922 size_t total_mem_usage = 0;
1923
1924 // Allow a quick exit for the common case that the root is a leaf.
1925 if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1926 return total_mem_usage;
1927 }
1928
1929 // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1930 // never be appended to tree_stack. This reduces overhead from manipulating
1931 // tree_stack.
1932 absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1933 const CordRep* cur_node = rep;
1934 while (true) {
1935 const CordRep* next_node = nullptr;
1936
1937 if (cur_node->tag == CONCAT) {
1938 total_mem_usage += sizeof(CordRepConcat);
1939 const CordRep* left = cur_node->concat()->left;
1940 if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1941 next_node = left;
1942 }
1943
1944 const CordRep* right = cur_node->concat()->right;
1945 if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1946 if (next_node) {
1947 tree_stack.push_back(next_node);
1948 }
1949 next_node = right;
1950 }
1951 } else {
1952 // Since cur_node is not a leaf or a concat node it must be a substring.
1953 assert(cur_node->tag == SUBSTRING);
1954 total_mem_usage += sizeof(CordRepSubstring);
1955 next_node = cur_node->substring()->child;
1956 if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1957 next_node = nullptr;
1958 }
1959 }
1960
1961 if (!next_node) {
1962 if (tree_stack.empty()) {
1963 return total_mem_usage;
1964 }
1965 next_node = tree_stack.back();
1966 tree_stack.pop_back();
1967 }
1968 cur_node = next_node;
1969 }
1970 }
1971
operator <<(std::ostream & out,const Cord & cord)1972 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1973 for (absl::string_view chunk : cord.Chunks()) {
1974 out.write(chunk.data(), chunk.size());
1975 }
1976 return out;
1977 }
1978
1979 namespace strings_internal {
FlatOverhead()1980 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()1981 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)1982 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
1983 return TagToLength(tag);
1984 }
LengthToTag(size_t s)1985 uint8_t CordTestAccess::LengthToTag(size_t s) {
1986 ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
1987 return AllocatedSizeToTag(s + kFlatOverhead);
1988 }
SizeofCordRepConcat()1989 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()1990 size_t CordTestAccess::SizeofCordRepExternal() {
1991 return sizeof(CordRepExternal);
1992 }
SizeofCordRepSubstring()1993 size_t CordTestAccess::SizeofCordRepSubstring() {
1994 return sizeof(CordRepSubstring);
1995 }
1996 } // namespace strings_internal
1997 ABSL_NAMESPACE_END
1998 } // namespace absl
1999