1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/cord.h"
16
17 #include <algorithm>
18 #include <cstddef>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <iomanip>
22 #include <limits>
23 #include <ostream>
24 #include <sstream>
25 #include <type_traits>
26 #include <unordered_set>
27 #include <vector>
28
29 #include "absl/base/casts.h"
30 #include "absl/base/internal/raw_logging.h"
31 #include "absl/base/port.h"
32 #include "absl/container/fixed_array.h"
33 #include "absl/container/inlined_vector.h"
34 #include "absl/strings/escaping.h"
35 #include "absl/strings/internal/cord_internal.h"
36 #include "absl/strings/internal/resize_uninitialized.h"
37 #include "absl/strings/str_cat.h"
38 #include "absl/strings/str_format.h"
39 #include "absl/strings/str_join.h"
40 #include "absl/strings/string_view.h"
41
42 namespace absl {
43 ABSL_NAMESPACE_BEGIN
44
45 using ::absl::cord_internal::CordRep;
46 using ::absl::cord_internal::CordRepConcat;
47 using ::absl::cord_internal::CordRepExternal;
48 using ::absl::cord_internal::CordRepSubstring;
49
50 // Various representations that we allow
51 enum CordRepKind {
52 CONCAT = 0,
53 EXTERNAL = 1,
54 SUBSTRING = 2,
55
56 // We have different tags for different sized flat arrays,
57 // starting with FLAT
58 FLAT = 3,
59 };
60
61 namespace {
62
63 // Type used with std::allocator for allocating and deallocating
64 // `CordRepExternal`. std::allocator is used because it opaquely handles the
65 // different new / delete overloads available on a given platform.
66 struct alignas(absl::cord_internal::ExternalRepAlignment()) ExternalAllocType {
67 unsigned char value[absl::cord_internal::ExternalRepAlignment()];
68 };
69
70 // Returns the number of objects to pass in to std::allocator<ExternalAllocType>
71 // allocate() and deallocate() to create enough room for `CordRepExternal` with
72 // `releaser_size` bytes on the end.
GetExternalAllocNumObjects(size_t releaser_size)73 constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) {
74 // Be sure to round up since `releaser_size` could be smaller than
75 // `sizeof(ExternalAllocType)`.
76 return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) -
77 1) /
78 sizeof(ExternalAllocType);
79 }
80
81 // Allocates enough memory for `CordRepExternal` and a releaser with size
82 // `releaser_size` bytes.
AllocateExternal(size_t releaser_size)83 void* AllocateExternal(size_t releaser_size) {
84 return std::allocator<ExternalAllocType>().allocate(
85 GetExternalAllocNumObjects(releaser_size));
86 }
87
88 // Deallocates the memory for a `CordRepExternal` assuming it was allocated with
89 // a releaser of given size and alignment.
DeallocateExternal(CordRepExternal * p,size_t releaser_size)90 void DeallocateExternal(CordRepExternal* p, size_t releaser_size) {
91 std::allocator<ExternalAllocType>().deallocate(
92 reinterpret_cast<ExternalAllocType*>(p),
93 GetExternalAllocNumObjects(releaser_size));
94 }
95
96 // Returns a pointer to the type erased releaser for the given CordRepExternal.
GetExternalReleaser(CordRepExternal * rep)97 void* GetExternalReleaser(CordRepExternal* rep) {
98 return rep + 1;
99 }
100
101 } // namespace
102
103 namespace cord_internal {
104
concat()105 inline CordRepConcat* CordRep::concat() {
106 assert(tag == CONCAT);
107 return static_cast<CordRepConcat*>(this);
108 }
109
concat() const110 inline const CordRepConcat* CordRep::concat() const {
111 assert(tag == CONCAT);
112 return static_cast<const CordRepConcat*>(this);
113 }
114
substring()115 inline CordRepSubstring* CordRep::substring() {
116 assert(tag == SUBSTRING);
117 return static_cast<CordRepSubstring*>(this);
118 }
119
substring() const120 inline const CordRepSubstring* CordRep::substring() const {
121 assert(tag == SUBSTRING);
122 return static_cast<const CordRepSubstring*>(this);
123 }
124
external()125 inline CordRepExternal* CordRep::external() {
126 assert(tag == EXTERNAL);
127 return static_cast<CordRepExternal*>(this);
128 }
129
external() const130 inline const CordRepExternal* CordRep::external() const {
131 assert(tag == EXTERNAL);
132 return static_cast<const CordRepExternal*>(this);
133 }
134
135 } // namespace cord_internal
136
137 static const size_t kFlatOverhead = offsetof(CordRep, data);
138
139 static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13");
140
141 // Largest and smallest flat node lengths we are willing to allocate
142 // Flat allocation size is stored in tag, which currently can encode sizes up
143 // to 4K, encoded as multiple of either 8 or 32 bytes.
144 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
145 static constexpr size_t kMaxFlatSize = 4096;
146 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
147 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
148
149 // Prefer copying blocks of at most this size, otherwise reference count.
150 static const size_t kMaxBytesToCopy = 511;
151
152 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)153 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)154 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
155
156 // Returns the size to the nearest equal or larger value that can be
157 // expressed exactly as a tag value.
RoundUpForTag(size_t size)158 static size_t RoundUpForTag(size_t size) {
159 return RoundUp(size, (size <= 1024) ? 8 : 32);
160 }
161
162 // Converts the allocated size to a tag, rounding down if the size
163 // does not exactly match a 'tag expressible' size value. The result is
164 // undefined if the size exceeds the maximum size that can be encoded in
165 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)166 static uint8_t AllocatedSizeToTag(size_t size) {
167 const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
168 assert(tag <= std::numeric_limits<uint8_t>::max());
169 return tag;
170 }
171
172 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)173 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
174 return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
175 }
176
177 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)178 static constexpr size_t TagToLength(uint8_t tag) {
179 return TagToAllocatedSize(tag) - kFlatOverhead;
180 }
181
182 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
183 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
184
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)185 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
186 return n == 0 ? a : Fibonacci(n - 1, b, a + b);
187 }
188
189 static_assert(Fibonacci(63) == 6557470319842,
190 "Fibonacci values computed incorrectly");
191
192 // Minimum length required for a given depth tree -- a tree is considered
193 // balanced if
194 // length(t) >= min_length[depth(t)]
195 // The root node depth is allowed to become twice as large to reduce rebalancing
196 // for larger strings (see IsRootBalanced).
197 static constexpr uint64_t min_length[] = {
198 Fibonacci(2),
199 Fibonacci(3),
200 Fibonacci(4),
201 Fibonacci(5),
202 Fibonacci(6),
203 Fibonacci(7),
204 Fibonacci(8),
205 Fibonacci(9),
206 Fibonacci(10),
207 Fibonacci(11),
208 Fibonacci(12),
209 Fibonacci(13),
210 Fibonacci(14),
211 Fibonacci(15),
212 Fibonacci(16),
213 Fibonacci(17),
214 Fibonacci(18),
215 Fibonacci(19),
216 Fibonacci(20),
217 Fibonacci(21),
218 Fibonacci(22),
219 Fibonacci(23),
220 Fibonacci(24),
221 Fibonacci(25),
222 Fibonacci(26),
223 Fibonacci(27),
224 Fibonacci(28),
225 Fibonacci(29),
226 Fibonacci(30),
227 Fibonacci(31),
228 Fibonacci(32),
229 Fibonacci(33),
230 Fibonacci(34),
231 Fibonacci(35),
232 Fibonacci(36),
233 Fibonacci(37),
234 Fibonacci(38),
235 Fibonacci(39),
236 Fibonacci(40),
237 Fibonacci(41),
238 Fibonacci(42),
239 Fibonacci(43),
240 Fibonacci(44),
241 Fibonacci(45),
242 Fibonacci(46),
243 Fibonacci(47),
244 0xffffffffffffffffull, // Avoid overflow
245 };
246
247 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
248
249 // The inlined size to use with absl::InlinedVector.
250 //
251 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
252 // the same value for their inlined size. The fact that they do is historical.
253 // It may be desirable for each to use a different inlined size optimized for
254 // that InlinedVector's usage.
255 //
256 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
257 // the inlined vector size (47 exists for backward compatibility).
258 static const int kInlinedVectorSize = 47;
259
IsRootBalanced(CordRep * node)260 static inline bool IsRootBalanced(CordRep* node) {
261 if (node->tag != CONCAT) {
262 return true;
263 } else if (node->concat()->depth() <= 15) {
264 return true;
265 } else if (node->concat()->depth() > kMinLengthSize) {
266 return false;
267 } else {
268 // Allow depth to become twice as large as implied by fibonacci rule to
269 // reduce rebalancing for larger strings.
270 return (node->length >= min_length[node->concat()->depth() / 2]);
271 }
272 }
273
274 static CordRep* Rebalance(CordRep* node);
275 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
276 static bool VerifyNode(CordRep* root, CordRep* start_node,
277 bool full_validation);
278
VerifyTree(CordRep * node)279 static inline CordRep* VerifyTree(CordRep* node) {
280 // Verification is expensive, so only do it in debug mode.
281 // Even in debug mode we normally do only light validation.
282 // If you are debugging Cord itself, you should define the
283 // macro EXTRA_CORD_VALIDATION, e.g. by adding
284 // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
285 #ifdef EXTRA_CORD_VALIDATION
286 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
287 #else // EXTRA_CORD_VALIDATION
288 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
289 #endif // EXTRA_CORD_VALIDATION
290 static_cast<void>(&VerifyNode);
291
292 return node;
293 }
294
295 // --------------------------------------------------------------------
296 // Memory management
297
Ref(CordRep * rep)298 inline CordRep* Ref(CordRep* rep) {
299 if (rep != nullptr) {
300 rep->refcount.Increment();
301 }
302 return rep;
303 }
304
305 // This internal routine is called from the cold path of Unref below. Keeping it
306 // in a separate routine allows good inlining of Unref into many profitable call
307 // sites. However, the call to this function can be highly disruptive to the
308 // register pressure in those callers. To minimize the cost to callers, we use
309 // a special LLVM calling convention that preserves most registers. This allows
310 // the call to this routine in cold paths to not disrupt the caller's register
311 // pressure. This calling convention is not available on all platforms; we
312 // intentionally allow LLVM to ignore the attribute rather than attempting to
313 // hardcode the list of supported platforms.
314 #if defined(__clang__) && !defined(__i386__)
315 #pragma clang diagnostic push
316 #pragma clang diagnostic ignored "-Wattributes"
317 __attribute__((preserve_most))
318 #pragma clang diagnostic pop
319 #endif
UnrefInternal(CordRep * rep)320 static void UnrefInternal(CordRep* rep) {
321 assert(rep != nullptr);
322
323 absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
324 while (true) {
325 if (rep->tag == CONCAT) {
326 CordRepConcat* rep_concat = rep->concat();
327 CordRep* right = rep_concat->right;
328 if (!right->refcount.Decrement()) {
329 pending.push_back(right);
330 }
331 CordRep* left = rep_concat->left;
332 delete rep_concat;
333 rep = nullptr;
334 if (!left->refcount.Decrement()) {
335 rep = left;
336 continue;
337 }
338 } else if (rep->tag == EXTERNAL) {
339 CordRepExternal* rep_external = rep->external();
340 absl::string_view data(rep_external->base, rep->length);
341 void* releaser = GetExternalReleaser(rep_external);
342 size_t releaser_size = rep_external->releaser_invoker(releaser, data);
343 rep_external->~CordRepExternal();
344 DeallocateExternal(rep_external, releaser_size);
345 rep = nullptr;
346 } else if (rep->tag == SUBSTRING) {
347 CordRepSubstring* rep_substring = rep->substring();
348 CordRep* child = rep_substring->child;
349 delete rep_substring;
350 rep = nullptr;
351 if (!child->refcount.Decrement()) {
352 rep = child;
353 continue;
354 }
355 } else {
356 // Flat CordReps are allocated and constructed with raw ::operator new
357 // and placement new, and must be destructed and deallocated
358 // accordingly.
359 #if defined(__cpp_sized_deallocation)
360 size_t size = TagToAllocatedSize(rep->tag);
361 rep->~CordRep();
362 ::operator delete(rep, size);
363 #else
364 rep->~CordRep();
365 ::operator delete(rep);
366 #endif
367 rep = nullptr;
368 }
369
370 if (!pending.empty()) {
371 rep = pending.back();
372 pending.pop_back();
373 } else {
374 break;
375 }
376 }
377 }
378
Unref(CordRep * rep)379 inline void Unref(CordRep* rep) {
380 // Fast-path for two common, hot cases: a null rep and a shared root.
381 if (ABSL_PREDICT_TRUE(rep == nullptr ||
382 rep->refcount.DecrementExpectHighRefcount())) {
383 return;
384 }
385
386 UnrefInternal(rep);
387 }
388
389 // Return the depth of a node
Depth(const CordRep * rep)390 static int Depth(const CordRep* rep) {
391 if (rep->tag == CONCAT) {
392 return rep->concat()->depth();
393 } else {
394 return 0;
395 }
396 }
397
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)398 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
399 CordRep* right) {
400 concat->left = left;
401 concat->right = right;
402
403 concat->length = left->length + right->length;
404 concat->set_depth(1 + std::max(Depth(left), Depth(right)));
405 }
406
407 // Create a concatenation of the specified nodes.
408 // Does not change the refcounts of "left" and "right".
409 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)410 static CordRep* RawConcat(CordRep* left, CordRep* right) {
411 // Avoid making degenerate concat nodes (one child is empty)
412 if (left == nullptr || left->length == 0) {
413 Unref(left);
414 return right;
415 }
416 if (right == nullptr || right->length == 0) {
417 Unref(right);
418 return left;
419 }
420
421 CordRepConcat* rep = new CordRepConcat();
422 rep->tag = CONCAT;
423 SetConcatChildren(rep, left, right);
424
425 return rep;
426 }
427
Concat(CordRep * left,CordRep * right)428 static CordRep* Concat(CordRep* left, CordRep* right) {
429 CordRep* rep = RawConcat(left, right);
430 if (rep != nullptr && !IsRootBalanced(rep)) {
431 rep = Rebalance(rep);
432 }
433 return VerifyTree(rep);
434 }
435
436 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)437 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
438 // Make repeated passes over the array, merging adjacent pairs
439 // until we are left with just a single node.
440 while (n > 1) {
441 size_t dst = 0;
442 for (size_t src = 0; src < n; src += 2) {
443 if (src + 1 < n) {
444 reps[dst] = Concat(reps[src], reps[src + 1]);
445 } else {
446 reps[dst] = reps[src];
447 }
448 dst++;
449 }
450 n = dst;
451 }
452
453 return reps[0];
454 }
455
456 // Create a new flat node.
NewFlat(size_t length_hint)457 static CordRep* NewFlat(size_t length_hint) {
458 if (length_hint <= kMinFlatLength) {
459 length_hint = kMinFlatLength;
460 } else if (length_hint > kMaxFlatLength) {
461 length_hint = kMaxFlatLength;
462 }
463
464 // Round size up so it matches a size we can exactly express in a tag.
465 const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
466 void* const raw_rep = ::operator new(size);
467 CordRep* rep = new (raw_rep) CordRep();
468 rep->tag = AllocatedSizeToTag(size);
469 return VerifyTree(rep);
470 }
471
472 // Create a new tree out of the specified array.
473 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)474 static CordRep* NewTree(const char* data,
475 size_t length,
476 size_t alloc_hint) {
477 if (length == 0) return nullptr;
478 absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
479 size_t n = 0;
480 do {
481 const size_t len = std::min(length, kMaxFlatLength);
482 CordRep* rep = NewFlat(len + alloc_hint);
483 rep->length = len;
484 memcpy(rep->data, data, len);
485 reps[n++] = VerifyTree(rep);
486 data += len;
487 length -= len;
488 } while (length != 0);
489 return MakeBalancedTree(reps.data(), n);
490 }
491
492 namespace cord_internal {
493
NewExternalWithUninitializedReleaser(absl::string_view data,ExternalReleaserInvoker invoker,size_t releaser_size)494 ExternalRepReleaserPair NewExternalWithUninitializedReleaser(
495 absl::string_view data, ExternalReleaserInvoker invoker,
496 size_t releaser_size) {
497 assert(!data.empty());
498
499 void* raw_rep = AllocateExternal(releaser_size);
500 auto* rep = new (raw_rep) CordRepExternal();
501 rep->length = data.size();
502 rep->tag = EXTERNAL;
503 rep->base = data.data();
504 rep->releaser_invoker = invoker;
505 return {VerifyTree(rep), GetExternalReleaser(rep)};
506 }
507
508 } // namespace cord_internal
509
NewSubstring(CordRep * child,size_t offset,size_t length)510 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
511 // Never create empty substring nodes
512 if (length == 0) {
513 Unref(child);
514 return nullptr;
515 } else {
516 CordRepSubstring* rep = new CordRepSubstring();
517 assert((offset + length) <= child->length);
518 rep->length = length;
519 rep->tag = SUBSTRING;
520 rep->start = offset;
521 rep->child = child;
522 return VerifyTree(rep);
523 }
524 }
525
526 // --------------------------------------------------------------------
527 // Cord::InlineRep functions
528
529 // This will trigger LNK2005 in MSVC.
530 #ifndef COMPILER_MSVC
531 const unsigned char Cord::InlineRep::kMaxInline;
532 #endif // COMPILER_MSVC
533
set_data(const char * data,size_t n,bool nullify_tail)534 inline void Cord::InlineRep::set_data(const char* data, size_t n,
535 bool nullify_tail) {
536 static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
537
538 cord_internal::SmallMemmove(data_, data, n, nullify_tail);
539 data_[kMaxInline] = static_cast<char>(n);
540 }
541
set_data(size_t n)542 inline char* Cord::InlineRep::set_data(size_t n) {
543 assert(n <= kMaxInline);
544 memset(data_, 0, sizeof(data_));
545 data_[kMaxInline] = static_cast<char>(n);
546 return data_;
547 }
548
force_tree(size_t extra_hint)549 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
550 size_t len = data_[kMaxInline];
551 CordRep* result;
552 if (len > kMaxInline) {
553 memcpy(&result, data_, sizeof(result));
554 } else {
555 result = NewFlat(len + extra_hint);
556 result->length = len;
557 memcpy(result->data, data_, len);
558 set_tree(result);
559 }
560 return result;
561 }
562
reduce_size(size_t n)563 inline void Cord::InlineRep::reduce_size(size_t n) {
564 size_t tag = data_[kMaxInline];
565 assert(tag <= kMaxInline);
566 assert(tag >= n);
567 tag -= n;
568 memset(data_ + tag, 0, n);
569 data_[kMaxInline] = static_cast<char>(tag);
570 }
571
remove_prefix(size_t n)572 inline void Cord::InlineRep::remove_prefix(size_t n) {
573 cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n);
574 reduce_size(n);
575 }
576
AppendTree(CordRep * tree)577 void Cord::InlineRep::AppendTree(CordRep* tree) {
578 if (tree == nullptr) return;
579 size_t len = data_[kMaxInline];
580 if (len == 0) {
581 set_tree(tree);
582 } else {
583 set_tree(Concat(force_tree(0), tree));
584 }
585 }
586
PrependTree(CordRep * tree)587 void Cord::InlineRep::PrependTree(CordRep* tree) {
588 if (tree == nullptr) return;
589 size_t len = data_[kMaxInline];
590 if (len == 0) {
591 set_tree(tree);
592 } else {
593 set_tree(Concat(tree, force_tree(0)));
594 }
595 }
596
597 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
598 // suitable leaf is found, the function will update the length field for all
599 // nodes to account for the size increase. The append region address will be
600 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)601 static inline bool PrepareAppendRegion(CordRep* root, char** region,
602 size_t* size, size_t max_length) {
603 // Search down the right-hand path for a non-full FLAT node.
604 CordRep* dst = root;
605 while (dst->tag == CONCAT && dst->refcount.IsOne()) {
606 dst = dst->concat()->right;
607 }
608
609 if (dst->tag < FLAT || !dst->refcount.IsOne()) {
610 *region = nullptr;
611 *size = 0;
612 return false;
613 }
614
615 const size_t in_use = dst->length;
616 const size_t capacity = TagToLength(dst->tag);
617 if (in_use == capacity) {
618 *region = nullptr;
619 *size = 0;
620 return false;
621 }
622
623 size_t size_increase = std::min(capacity - in_use, max_length);
624
625 // We need to update the length fields for all nodes, including the leaf node.
626 for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
627 rep->length += size_increase;
628 }
629 dst->length += size_increase;
630
631 *region = dst->data + in_use;
632 *size = size_increase;
633 return true;
634 }
635
GetAppendRegion(char ** region,size_t * size,size_t max_length)636 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
637 size_t max_length) {
638 if (max_length == 0) {
639 *region = nullptr;
640 *size = 0;
641 return;
642 }
643
644 // Try to fit in the inline buffer if possible.
645 size_t inline_length = data_[kMaxInline];
646 if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
647 *region = data_ + inline_length;
648 *size = max_length;
649 data_[kMaxInline] = static_cast<char>(inline_length + max_length);
650 return;
651 }
652
653 CordRep* root = force_tree(max_length);
654
655 if (PrepareAppendRegion(root, region, size, max_length)) {
656 return;
657 }
658
659 // Allocate new node.
660 CordRep* new_node =
661 NewFlat(std::max(static_cast<size_t>(root->length), max_length));
662 new_node->length =
663 std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
664 *region = new_node->data;
665 *size = new_node->length;
666 replace_tree(Concat(root, new_node));
667 }
668
GetAppendRegion(char ** region,size_t * size)669 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
670 const size_t max_length = std::numeric_limits<size_t>::max();
671
672 // Try to fit in the inline buffer if possible.
673 size_t inline_length = data_[kMaxInline];
674 if (inline_length < kMaxInline) {
675 *region = data_ + inline_length;
676 *size = kMaxInline - inline_length;
677 data_[kMaxInline] = kMaxInline;
678 return;
679 }
680
681 CordRep* root = force_tree(max_length);
682
683 if (PrepareAppendRegion(root, region, size, max_length)) {
684 return;
685 }
686
687 // Allocate new node.
688 CordRep* new_node = NewFlat(root->length);
689 new_node->length = TagToLength(new_node->tag);
690 *region = new_node->data;
691 *size = new_node->length;
692 replace_tree(Concat(root, new_node));
693 }
694
695 // If the rep is a leaf, this will increment the value at total_mem_usage and
696 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)697 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
698 if (rep->tag >= FLAT) {
699 *total_mem_usage += TagToAllocatedSize(rep->tag);
700 return true;
701 }
702 if (rep->tag == EXTERNAL) {
703 *total_mem_usage += sizeof(CordRepConcat) + rep->length;
704 return true;
705 }
706 return false;
707 }
708
AssignSlow(const Cord::InlineRep & src)709 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
710 ClearSlow();
711
712 memcpy(data_, src.data_, sizeof(data_));
713 if (is_tree()) {
714 Ref(tree());
715 }
716 }
717
ClearSlow()718 void Cord::InlineRep::ClearSlow() {
719 if (is_tree()) {
720 Unref(tree());
721 }
722 memset(data_, 0, sizeof(data_));
723 }
724
725 // --------------------------------------------------------------------
726 // Constructors and destructors
727
Cord(const Cord & src)728 Cord::Cord(const Cord& src) : contents_(src.contents_) {
729 Ref(contents_.tree()); // Does nothing if contents_ has embedded data
730 }
731
Cord(absl::string_view src)732 Cord::Cord(absl::string_view src) {
733 const size_t n = src.size();
734 if (n <= InlineRep::kMaxInline) {
735 contents_.set_data(src.data(), n, false);
736 } else {
737 contents_.set_tree(NewTree(src.data(), n, 0));
738 }
739 }
740
741 // The destruction code is separate so that the compiler can determine
742 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()743 void Cord::DestroyCordSlow() {
744 Unref(VerifyTree(contents_.tree()));
745 }
746
747 // --------------------------------------------------------------------
748 // Mutators
749
Clear()750 void Cord::Clear() {
751 Unref(contents_.clear());
752 }
753
operator =(absl::string_view src)754 Cord& Cord::operator=(absl::string_view src) {
755
756 const char* data = src.data();
757 size_t length = src.size();
758 CordRep* tree = contents_.tree();
759 if (length <= InlineRep::kMaxInline) {
760 // Embed into this->contents_
761 contents_.set_data(data, length, true);
762 Unref(tree);
763 return *this;
764 }
765 if (tree != nullptr && tree->tag >= FLAT &&
766 TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
767 // Copy in place if the existing FLAT node is reusable.
768 memmove(tree->data, data, length);
769 tree->length = length;
770 VerifyTree(tree);
771 return *this;
772 }
773 contents_.set_tree(NewTree(data, length, 0));
774 Unref(tree);
775 return *this;
776 }
777
778 // TODO(sanjay): Move to Cord::InlineRep section of file. For now,
779 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)780 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
781 if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined.
782 // Try to fit in the inline buffer if possible.
783 size_t inline_length = data_[kMaxInline];
784 if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
785 // Append new data to embedded array
786 data_[kMaxInline] = static_cast<char>(inline_length + src_size);
787 memcpy(data_ + inline_length, src_data, src_size);
788 return;
789 }
790
791 CordRep* root = tree();
792
793 size_t appended = 0;
794 if (root) {
795 char* region;
796 if (PrepareAppendRegion(root, ®ion, &appended, src_size)) {
797 memcpy(region, src_data, appended);
798 }
799 } else {
800 // It is possible that src_data == data_, but when we transition from an
801 // InlineRep to a tree we need to assign data_ = root via set_tree. To
802 // avoid corrupting the source data before we copy it, delay calling
803 // set_tree until after we've copied data.
804 // We are going from an inline size to beyond inline size. Make the new size
805 // either double the inlined size, or the added size + 10%.
806 const size_t size1 = inline_length * 2 + src_size;
807 const size_t size2 = inline_length + src_size / 10;
808 root = NewFlat(std::max<size_t>(size1, size2));
809 appended = std::min(src_size, TagToLength(root->tag) - inline_length);
810 memcpy(root->data, data_, inline_length);
811 memcpy(root->data + inline_length, src_data, appended);
812 root->length = inline_length + appended;
813 set_tree(root);
814 }
815
816 src_data += appended;
817 src_size -= appended;
818 if (src_size == 0) {
819 return;
820 }
821
822 // Use new block(s) for any remaining bytes that were not handled above.
823 // Alloc extra memory only if the right child of the root of the new tree is
824 // going to be a FLAT node, which will permit further inplace appends.
825 size_t length = src_size;
826 if (src_size < kMaxFlatLength) {
827 // The new length is either
828 // - old size + 10%
829 // - old_size + src_size
830 // This will cause a reasonable conservative step-up in size that is still
831 // large enough to avoid excessive amounts of small fragments being added.
832 length = std::max<size_t>(root->length / 10, src_size);
833 }
834 set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
835 }
836
TakeRep() const837 inline CordRep* Cord::TakeRep() const& {
838 return Ref(contents_.tree());
839 }
840
TakeRep()841 inline CordRep* Cord::TakeRep() && {
842 CordRep* rep = contents_.tree();
843 contents_.clear();
844 return rep;
845 }
846
847 template <typename C>
AppendImpl(C && src)848 inline void Cord::AppendImpl(C&& src) {
849 if (empty()) {
850 // In case of an empty destination avoid allocating a new node, do not copy
851 // data.
852 *this = std::forward<C>(src);
853 return;
854 }
855
856 // For short cords, it is faster to copy data if there is room in dst.
857 const size_t src_size = src.contents_.size();
858 if (src_size <= kMaxBytesToCopy) {
859 CordRep* src_tree = src.contents_.tree();
860 if (src_tree == nullptr) {
861 // src has embedded data.
862 contents_.AppendArray(src.contents_.data(), src_size);
863 return;
864 }
865 if (src_tree->tag >= FLAT) {
866 // src tree just has one flat node.
867 contents_.AppendArray(src_tree->data, src_size);
868 return;
869 }
870 if (&src == this) {
871 // ChunkIterator below assumes that src is not modified during traversal.
872 Append(Cord(src));
873 return;
874 }
875 // TODO(mec): Should we only do this if "dst" has space?
876 for (absl::string_view chunk : src.Chunks()) {
877 Append(chunk);
878 }
879 return;
880 }
881
882 contents_.AppendTree(std::forward<C>(src).TakeRep());
883 }
884
Append(const Cord & src)885 void Cord::Append(const Cord& src) { AppendImpl(src); }
886
Append(Cord && src)887 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
888
Prepend(const Cord & src)889 void Cord::Prepend(const Cord& src) {
890 CordRep* src_tree = src.contents_.tree();
891 if (src_tree != nullptr) {
892 Ref(src_tree);
893 contents_.PrependTree(src_tree);
894 return;
895 }
896
897 // `src` cord is inlined.
898 absl::string_view src_contents(src.contents_.data(), src.contents_.size());
899 return Prepend(src_contents);
900 }
901
Prepend(absl::string_view src)902 void Cord::Prepend(absl::string_view src) {
903 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
904 size_t cur_size = contents_.size();
905 if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
906 // Use embedded storage.
907 char data[InlineRep::kMaxInline + 1] = {0};
908 data[InlineRep::kMaxInline] = cur_size + src.size(); // set size
909 memcpy(data, src.data(), src.size());
910 memcpy(data + src.size(), contents_.data(), cur_size);
911 memcpy(reinterpret_cast<void*>(&contents_), data,
912 InlineRep::kMaxInline + 1);
913 } else {
914 contents_.PrependTree(NewTree(src.data(), src.size(), 0));
915 }
916 }
917
RemovePrefixFrom(CordRep * node,size_t n)918 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
919 if (n >= node->length) return nullptr;
920 if (n == 0) return Ref(node);
921 absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
922
923 while (node->tag == CONCAT) {
924 assert(n <= node->length);
925 if (n < node->concat()->left->length) {
926 // Push right to stack, descend left.
927 rhs_stack.push_back(node->concat()->right);
928 node = node->concat()->left;
929 } else {
930 // Drop left, descend right.
931 n -= node->concat()->left->length;
932 node = node->concat()->right;
933 }
934 }
935 assert(n <= node->length);
936
937 if (n == 0) {
938 Ref(node);
939 } else {
940 size_t start = n;
941 size_t len = node->length - n;
942 if (node->tag == SUBSTRING) {
943 // Consider in-place update of node, similar to in RemoveSuffixFrom().
944 start += node->substring()->start;
945 node = node->substring()->child;
946 }
947 node = NewSubstring(Ref(node), start, len);
948 }
949 while (!rhs_stack.empty()) {
950 node = Concat(node, Ref(rhs_stack.back()));
951 rhs_stack.pop_back();
952 }
953 return node;
954 }
955
956 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
957 // exception that removing a suffix has an optimization where a node may be
958 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)959 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
960 if (n >= node->length) return nullptr;
961 if (n == 0) return Ref(node);
962 absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
963 bool inplace_ok = node->refcount.IsOne();
964
965 while (node->tag == CONCAT) {
966 assert(n <= node->length);
967 if (n < node->concat()->right->length) {
968 // Push left to stack, descend right.
969 lhs_stack.push_back(node->concat()->left);
970 node = node->concat()->right;
971 } else {
972 // Drop right, descend left.
973 n -= node->concat()->right->length;
974 node = node->concat()->left;
975 }
976 inplace_ok = inplace_ok && node->refcount.IsOne();
977 }
978 assert(n <= node->length);
979
980 if (n == 0) {
981 Ref(node);
982 } else if (inplace_ok && node->tag != EXTERNAL) {
983 // Consider making a new buffer if the current node capacity is much
984 // larger than the new length.
985 Ref(node);
986 node->length -= n;
987 } else {
988 size_t start = 0;
989 size_t len = node->length - n;
990 if (node->tag == SUBSTRING) {
991 start = node->substring()->start;
992 node = node->substring()->child;
993 }
994 node = NewSubstring(Ref(node), start, len);
995 }
996 while (!lhs_stack.empty()) {
997 node = Concat(Ref(lhs_stack.back()), node);
998 lhs_stack.pop_back();
999 }
1000 return node;
1001 }
1002
RemovePrefix(size_t n)1003 void Cord::RemovePrefix(size_t n) {
1004 ABSL_INTERNAL_CHECK(n <= size(),
1005 absl::StrCat("Requested prefix size ", n,
1006 " exceeds Cord's size ", size()));
1007 CordRep* tree = contents_.tree();
1008 if (tree == nullptr) {
1009 contents_.remove_prefix(n);
1010 } else {
1011 CordRep* newrep = RemovePrefixFrom(tree, n);
1012 Unref(tree);
1013 contents_.replace_tree(VerifyTree(newrep));
1014 }
1015 }
1016
RemoveSuffix(size_t n)1017 void Cord::RemoveSuffix(size_t n) {
1018 ABSL_INTERNAL_CHECK(n <= size(),
1019 absl::StrCat("Requested suffix size ", n,
1020 " exceeds Cord's size ", size()));
1021 CordRep* tree = contents_.tree();
1022 if (tree == nullptr) {
1023 contents_.reduce_size(n);
1024 } else {
1025 CordRep* newrep = RemoveSuffixFrom(tree, n);
1026 Unref(tree);
1027 contents_.replace_tree(VerifyTree(newrep));
1028 }
1029 }
1030
1031 // Work item for NewSubRange().
1032 struct SubRange {
SubRangeabsl::SubRange1033 SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1034 : node(a_node), pos(a_pos), n(a_n) {}
1035 CordRep* node; // nullptr means concat last 2 results.
1036 size_t pos;
1037 size_t n;
1038 };
1039
NewSubRange(CordRep * node,size_t pos,size_t n)1040 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1041 absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1042 absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1043 todo.push_back(SubRange(node, pos, n));
1044 do {
1045 const SubRange& sr = todo.back();
1046 node = sr.node;
1047 pos = sr.pos;
1048 n = sr.n;
1049 todo.pop_back();
1050
1051 if (node == nullptr) {
1052 assert(results.size() >= 2);
1053 CordRep* right = results.back();
1054 results.pop_back();
1055 CordRep* left = results.back();
1056 results.pop_back();
1057 results.push_back(Concat(left, right));
1058 } else if (pos == 0 && n == node->length) {
1059 results.push_back(Ref(node));
1060 } else if (node->tag != CONCAT) {
1061 if (node->tag == SUBSTRING) {
1062 pos += node->substring()->start;
1063 node = node->substring()->child;
1064 }
1065 results.push_back(NewSubstring(Ref(node), pos, n));
1066 } else if (pos + n <= node->concat()->left->length) {
1067 todo.push_back(SubRange(node->concat()->left, pos, n));
1068 } else if (pos >= node->concat()->left->length) {
1069 pos -= node->concat()->left->length;
1070 todo.push_back(SubRange(node->concat()->right, pos, n));
1071 } else {
1072 size_t left_n = node->concat()->left->length - pos;
1073 todo.push_back(SubRange(nullptr, 0, 0)); // Concat()
1074 todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1075 todo.push_back(SubRange(node->concat()->left, pos, left_n));
1076 }
1077 } while (!todo.empty());
1078 assert(results.size() == 1);
1079 return results[0];
1080 }
1081
Subcord(size_t pos,size_t new_size) const1082 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1083 Cord sub_cord;
1084 size_t length = size();
1085 if (pos > length) pos = length;
1086 if (new_size > length - pos) new_size = length - pos;
1087 CordRep* tree = contents_.tree();
1088 if (tree == nullptr) {
1089 // sub_cord is newly constructed, no need to re-zero-out the tail of
1090 // contents_ memory.
1091 sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1092 } else if (new_size == 0) {
1093 // We want to return empty subcord, so nothing to do.
1094 } else if (new_size <= InlineRep::kMaxInline) {
1095 Cord::ChunkIterator it = chunk_begin();
1096 it.AdvanceBytes(pos);
1097 char* dest = sub_cord.contents_.data_;
1098 size_t remaining_size = new_size;
1099 while (remaining_size > it->size()) {
1100 cord_internal::SmallMemmove(dest, it->data(), it->size());
1101 remaining_size -= it->size();
1102 dest += it->size();
1103 ++it;
1104 }
1105 cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1106 sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size;
1107 } else {
1108 sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1109 }
1110 return sub_cord;
1111 }
1112
1113 // --------------------------------------------------------------------
1114 // Balancing
1115
1116 class CordForest {
1117 public:
CordForest(size_t length)1118 explicit CordForest(size_t length)
1119 : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1120
Build(CordRep * cord_root)1121 void Build(CordRep* cord_root) {
1122 std::vector<CordRep*> pending = {cord_root};
1123
1124 while (!pending.empty()) {
1125 CordRep* node = pending.back();
1126 pending.pop_back();
1127 CheckNode(node);
1128 if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1129 AddNode(node);
1130 continue;
1131 }
1132
1133 CordRepConcat* concat_node = node->concat();
1134 if (concat_node->depth() >= kMinLengthSize ||
1135 concat_node->length < min_length[concat_node->depth()]) {
1136 pending.push_back(concat_node->right);
1137 pending.push_back(concat_node->left);
1138
1139 if (concat_node->refcount.IsOne()) {
1140 concat_node->left = concat_freelist_;
1141 concat_freelist_ = concat_node;
1142 } else {
1143 Ref(concat_node->right);
1144 Ref(concat_node->left);
1145 Unref(concat_node);
1146 }
1147 } else {
1148 AddNode(node);
1149 }
1150 }
1151 }
1152
ConcatNodes()1153 CordRep* ConcatNodes() {
1154 CordRep* sum = nullptr;
1155 for (auto* node : trees_) {
1156 if (node == nullptr) continue;
1157
1158 sum = PrependNode(node, sum);
1159 root_length_ -= node->length;
1160 if (root_length_ == 0) break;
1161 }
1162 ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1163 return VerifyTree(sum);
1164 }
1165
1166 private:
AppendNode(CordRep * node,CordRep * sum)1167 CordRep* AppendNode(CordRep* node, CordRep* sum) {
1168 return (sum == nullptr) ? node : MakeConcat(sum, node);
1169 }
1170
PrependNode(CordRep * node,CordRep * sum)1171 CordRep* PrependNode(CordRep* node, CordRep* sum) {
1172 return (sum == nullptr) ? node : MakeConcat(node, sum);
1173 }
1174
AddNode(CordRep * node)1175 void AddNode(CordRep* node) {
1176 CordRep* sum = nullptr;
1177
1178 // Collect together everything with which we will merge node
1179 int i = 0;
1180 for (; node->length > min_length[i + 1]; ++i) {
1181 auto& tree_at_i = trees_[i];
1182
1183 if (tree_at_i == nullptr) continue;
1184 sum = PrependNode(tree_at_i, sum);
1185 tree_at_i = nullptr;
1186 }
1187
1188 sum = AppendNode(node, sum);
1189
1190 // Insert sum into appropriate place in the forest
1191 for (; sum->length >= min_length[i]; ++i) {
1192 auto& tree_at_i = trees_[i];
1193 if (tree_at_i == nullptr) continue;
1194
1195 sum = MakeConcat(tree_at_i, sum);
1196 tree_at_i = nullptr;
1197 }
1198
1199 // min_length[0] == 1, which means sum->length >= min_length[0]
1200 assert(i > 0);
1201 trees_[i - 1] = sum;
1202 }
1203
1204 // Make concat node trying to resue existing CordRepConcat nodes we
1205 // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1206 CordRep* MakeConcat(CordRep* left, CordRep* right) {
1207 if (concat_freelist_ == nullptr) return RawConcat(left, right);
1208
1209 CordRepConcat* rep = concat_freelist_;
1210 if (concat_freelist_->left == nullptr) {
1211 concat_freelist_ = nullptr;
1212 } else {
1213 concat_freelist_ = concat_freelist_->left->concat();
1214 }
1215 SetConcatChildren(rep, left, right);
1216
1217 return rep;
1218 }
1219
CheckNode(CordRep * node)1220 static void CheckNode(CordRep* node) {
1221 ABSL_INTERNAL_CHECK(node->length != 0u, "");
1222 if (node->tag == CONCAT) {
1223 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1224 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1225 ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1226 node->concat()->right->length),
1227 "");
1228 }
1229 }
1230
1231 size_t root_length_;
1232
1233 // use an inlined vector instead of a flat array to get bounds checking
1234 absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1235
1236 // List of concat nodes we can re-use for Cord balancing.
1237 CordRepConcat* concat_freelist_ = nullptr;
1238 };
1239
Rebalance(CordRep * node)1240 static CordRep* Rebalance(CordRep* node) {
1241 VerifyTree(node);
1242 assert(node->tag == CONCAT);
1243
1244 if (node->length == 0) {
1245 return nullptr;
1246 }
1247
1248 CordForest forest(node->length);
1249 forest.Build(node);
1250 return forest.ConcatNodes();
1251 }
1252
1253 // --------------------------------------------------------------------
1254 // Comparators
1255
1256 namespace {
1257
ClampResult(int memcmp_res)1258 int ClampResult(int memcmp_res) {
1259 return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1260 }
1261
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1262 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1263 size_t* size_to_compare) {
1264 size_t compared_size = std::min(lhs->size(), rhs->size());
1265 assert(*size_to_compare >= compared_size);
1266 *size_to_compare -= compared_size;
1267
1268 int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1269 if (memcmp_res != 0) return memcmp_res;
1270
1271 lhs->remove_prefix(compared_size);
1272 rhs->remove_prefix(compared_size);
1273
1274 return 0;
1275 }
1276
1277 // This overload set computes comparison results from memcmp result. This
1278 // interface is used inside GenericCompare below. Differet implementations
1279 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1280 // set. For bool we just interested in "value == 0".
1281 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1282 ResultType ComputeCompareResult(int memcmp_res) {
1283 return ClampResult(memcmp_res);
1284 }
1285 template <>
ComputeCompareResult(int memcmp_res)1286 bool ComputeCompareResult<bool>(int memcmp_res) {
1287 return memcmp_res == 0;
1288 }
1289
1290 } // namespace
1291
1292 // Helper routine. Locates the first flat chunk of the Cord without
1293 // initializing the iterator.
FindFlatStartPiece() const1294 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1295 size_t n = data_[kMaxInline];
1296 if (n <= kMaxInline) {
1297 return absl::string_view(data_, n);
1298 }
1299
1300 CordRep* node = tree();
1301 if (node->tag >= FLAT) {
1302 return absl::string_view(node->data, node->length);
1303 }
1304
1305 if (node->tag == EXTERNAL) {
1306 return absl::string_view(node->external()->base, node->length);
1307 }
1308
1309 // Walk down the left branches until we hit a non-CONCAT node.
1310 while (node->tag == CONCAT) {
1311 node = node->concat()->left;
1312 }
1313
1314 // Get the child node if we encounter a SUBSTRING.
1315 size_t offset = 0;
1316 size_t length = node->length;
1317 assert(length != 0);
1318
1319 if (node->tag == SUBSTRING) {
1320 offset = node->substring()->start;
1321 node = node->substring()->child;
1322 }
1323
1324 if (node->tag >= FLAT) {
1325 return absl::string_view(node->data + offset, length);
1326 }
1327
1328 assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1329
1330 return absl::string_view(node->external()->base + offset, length);
1331 }
1332
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1333 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1334 size_t size_to_compare) const {
1335 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1336 if (!chunk->empty()) return true;
1337 ++*it;
1338 if (it->bytes_remaining_ == 0) return false;
1339 *chunk = **it;
1340 return true;
1341 };
1342
1343 Cord::ChunkIterator lhs_it = chunk_begin();
1344
1345 // compared_size is inside first chunk.
1346 absl::string_view lhs_chunk =
1347 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1348 assert(compared_size <= lhs_chunk.size());
1349 assert(compared_size <= rhs.size());
1350 lhs_chunk.remove_prefix(compared_size);
1351 rhs.remove_prefix(compared_size);
1352 size_to_compare -= compared_size; // skip already compared size.
1353
1354 while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1355 int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1356 if (comparison_result != 0) return comparison_result;
1357 if (size_to_compare == 0) return 0;
1358 }
1359
1360 return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1361 }
1362
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1363 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1364 size_t size_to_compare) const {
1365 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1366 if (!chunk->empty()) return true;
1367 ++*it;
1368 if (it->bytes_remaining_ == 0) return false;
1369 *chunk = **it;
1370 return true;
1371 };
1372
1373 Cord::ChunkIterator lhs_it = chunk_begin();
1374 Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1375
1376 // compared_size is inside both first chunks.
1377 absl::string_view lhs_chunk =
1378 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1379 absl::string_view rhs_chunk =
1380 (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1381 assert(compared_size <= lhs_chunk.size());
1382 assert(compared_size <= rhs_chunk.size());
1383 lhs_chunk.remove_prefix(compared_size);
1384 rhs_chunk.remove_prefix(compared_size);
1385 size_to_compare -= compared_size; // skip already compared size.
1386
1387 while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1388 int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1389 if (memcmp_res != 0) return memcmp_res;
1390 if (size_to_compare == 0) return 0;
1391 }
1392
1393 return static_cast<int>(rhs_chunk.empty()) -
1394 static_cast<int>(lhs_chunk.empty());
1395 }
1396
GetFirstChunk(const Cord & c)1397 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1398 return c.contents_.FindFlatStartPiece();
1399 }
GetFirstChunk(absl::string_view sv)1400 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1401 return sv;
1402 }
1403
1404 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1405 // that 'size_to_compare' is greater that size of smallest of first chunks.
1406 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1407 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1408 size_t size_to_compare) {
1409 absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1410 absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1411
1412 size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1413 assert(size_to_compare >= compared_size);
1414 int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1415 if (compared_size == size_to_compare || memcmp_res != 0) {
1416 return ComputeCompareResult<ResultType>(memcmp_res);
1417 }
1418
1419 return ComputeCompareResult<ResultType>(
1420 lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1421 }
1422
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1423 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1424 return GenericCompare<bool>(*this, rhs, size_to_compare);
1425 }
1426
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1427 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1428 return GenericCompare<bool>(*this, rhs, size_to_compare);
1429 }
1430
1431 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1432 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1433 size_t lhs_size = lhs.size();
1434 size_t rhs_size = rhs.size();
1435 if (lhs_size == rhs_size) {
1436 return GenericCompare<int>(lhs, rhs, lhs_size);
1437 }
1438 if (lhs_size < rhs_size) {
1439 auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1440 return data_comp_res == 0 ? -1 : data_comp_res;
1441 }
1442
1443 auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1444 return data_comp_res == 0 ? +1 : data_comp_res;
1445 }
1446
Compare(absl::string_view rhs) const1447 int Cord::Compare(absl::string_view rhs) const {
1448 return SharedCompareImpl(*this, rhs);
1449 }
1450
CompareImpl(const Cord & rhs) const1451 int Cord::CompareImpl(const Cord& rhs) const {
1452 return SharedCompareImpl(*this, rhs);
1453 }
1454
EndsWith(absl::string_view rhs) const1455 bool Cord::EndsWith(absl::string_view rhs) const {
1456 size_t my_size = size();
1457 size_t rhs_size = rhs.size();
1458
1459 if (my_size < rhs_size) return false;
1460
1461 Cord tmp(*this);
1462 tmp.RemovePrefix(my_size - rhs_size);
1463 return tmp.EqualsImpl(rhs, rhs_size);
1464 }
1465
EndsWith(const Cord & rhs) const1466 bool Cord::EndsWith(const Cord& rhs) const {
1467 size_t my_size = size();
1468 size_t rhs_size = rhs.size();
1469
1470 if (my_size < rhs_size) return false;
1471
1472 Cord tmp(*this);
1473 tmp.RemovePrefix(my_size - rhs_size);
1474 return tmp.EqualsImpl(rhs, rhs_size);
1475 }
1476
1477 // --------------------------------------------------------------------
1478 // Misc.
1479
operator std::string() const1480 Cord::operator std::string() const {
1481 std::string s;
1482 absl::CopyCordToString(*this, &s);
1483 return s;
1484 }
1485
CopyCordToString(const Cord & src,std::string * dst)1486 void CopyCordToString(const Cord& src, std::string* dst) {
1487 if (!src.contents_.is_tree()) {
1488 src.contents_.CopyTo(dst);
1489 } else {
1490 absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1491 src.CopyToArraySlowPath(&(*dst)[0]);
1492 }
1493 }
1494
CopyToArraySlowPath(char * dst) const1495 void Cord::CopyToArraySlowPath(char* dst) const {
1496 assert(contents_.is_tree());
1497 absl::string_view fragment;
1498 if (GetFlatAux(contents_.tree(), &fragment)) {
1499 memcpy(dst, fragment.data(), fragment.size());
1500 return;
1501 }
1502 for (absl::string_view chunk : Chunks()) {
1503 memcpy(dst, chunk.data(), chunk.size());
1504 dst += chunk.size();
1505 }
1506 }
1507
operator ++()1508 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1509 assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`");
1510 assert(bytes_remaining_ >= current_chunk_.size());
1511 bytes_remaining_ -= current_chunk_.size();
1512
1513 if (stack_of_right_children_.empty()) {
1514 assert(!current_chunk_.empty()); // Called on invalid iterator.
1515 // We have reached the end of the Cord.
1516 return *this;
1517 }
1518
1519 // Process the next node on the stack.
1520 CordRep* node = stack_of_right_children_.back();
1521 stack_of_right_children_.pop_back();
1522
1523 // Walk down the left branches until we hit a non-CONCAT node. Save the
1524 // right children to the stack for subsequent traversal.
1525 while (node->tag == CONCAT) {
1526 stack_of_right_children_.push_back(node->concat()->right);
1527 node = node->concat()->left;
1528 }
1529
1530 // Get the child node if we encounter a SUBSTRING.
1531 size_t offset = 0;
1532 size_t length = node->length;
1533 if (node->tag == SUBSTRING) {
1534 offset = node->substring()->start;
1535 node = node->substring()->child;
1536 }
1537
1538 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1539 assert(length != 0);
1540 const char* data =
1541 node->tag == EXTERNAL ? node->external()->base : node->data;
1542 current_chunk_ = absl::string_view(data + offset, length);
1543 current_leaf_ = node;
1544 return *this;
1545 }
1546
AdvanceAndReadBytes(size_t n)1547 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1548 assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1549 Cord subcord;
1550
1551 if (n <= InlineRep::kMaxInline) {
1552 // Range to read fits in inline data. Flatten it.
1553 char* data = subcord.contents_.set_data(n);
1554 while (n > current_chunk_.size()) {
1555 memcpy(data, current_chunk_.data(), current_chunk_.size());
1556 data += current_chunk_.size();
1557 n -= current_chunk_.size();
1558 ++*this;
1559 }
1560 memcpy(data, current_chunk_.data(), n);
1561 if (n < current_chunk_.size()) {
1562 RemoveChunkPrefix(n);
1563 } else if (n > 0) {
1564 ++*this;
1565 }
1566 return subcord;
1567 }
1568 if (n < current_chunk_.size()) {
1569 // Range to read is a proper subrange of the current chunk.
1570 assert(current_leaf_ != nullptr);
1571 CordRep* subnode = Ref(current_leaf_);
1572 const char* data =
1573 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1574 subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1575 subcord.contents_.set_tree(VerifyTree(subnode));
1576 RemoveChunkPrefix(n);
1577 return subcord;
1578 }
1579
1580 // Range to read begins with a proper subrange of the current chunk.
1581 assert(!current_chunk_.empty());
1582 assert(current_leaf_ != nullptr);
1583 CordRep* subnode = Ref(current_leaf_);
1584 if (current_chunk_.size() < subnode->length) {
1585 const char* data =
1586 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1587 subnode = NewSubstring(subnode, current_chunk_.data() - data,
1588 current_chunk_.size());
1589 }
1590 n -= current_chunk_.size();
1591 bytes_remaining_ -= current_chunk_.size();
1592
1593 // Process the next node(s) on the stack, reading whole subtrees depending on
1594 // their length and how many bytes we are advancing.
1595 CordRep* node = nullptr;
1596 while (!stack_of_right_children_.empty()) {
1597 node = stack_of_right_children_.back();
1598 stack_of_right_children_.pop_back();
1599 if (node->length > n) break;
1600 // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1601 // Avoiding that would need pretty complicated logic (instead of
1602 // current_leaf_, keep current_subtree_ which points to the highest node
1603 // such that the current leaf can be found on the path of left children
1604 // starting from current_subtree_; delay creating subnode while node is
1605 // below current_subtree_; find the proper node along the path of left
1606 // children starting from current_subtree_ if this loop exits while staying
1607 // below current_subtree_; etc.; alternatively, push parents instead of
1608 // right children on the stack).
1609 subnode = Concat(subnode, Ref(node));
1610 n -= node->length;
1611 bytes_remaining_ -= node->length;
1612 node = nullptr;
1613 }
1614
1615 if (node == nullptr) {
1616 // We have reached the end of the Cord.
1617 assert(bytes_remaining_ == 0);
1618 subcord.contents_.set_tree(VerifyTree(subnode));
1619 return subcord;
1620 }
1621
1622 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1623 // right children to the stack for subsequent traversal.
1624 while (node->tag == CONCAT) {
1625 if (node->concat()->left->length > n) {
1626 // Push right, descend left.
1627 stack_of_right_children_.push_back(node->concat()->right);
1628 node = node->concat()->left;
1629 } else {
1630 // Read left, descend right.
1631 subnode = Concat(subnode, Ref(node->concat()->left));
1632 n -= node->concat()->left->length;
1633 bytes_remaining_ -= node->concat()->left->length;
1634 node = node->concat()->right;
1635 }
1636 }
1637
1638 // Get the child node if we encounter a SUBSTRING.
1639 size_t offset = 0;
1640 size_t length = node->length;
1641 if (node->tag == SUBSTRING) {
1642 offset = node->substring()->start;
1643 node = node->substring()->child;
1644 }
1645
1646 // Range to read ends with a proper (possibly empty) subrange of the current
1647 // chunk.
1648 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1649 assert(length > n);
1650 if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1651 const char* data =
1652 node->tag == EXTERNAL ? node->external()->base : node->data;
1653 current_chunk_ = absl::string_view(data + offset + n, length - n);
1654 current_leaf_ = node;
1655 bytes_remaining_ -= n;
1656 subcord.contents_.set_tree(VerifyTree(subnode));
1657 return subcord;
1658 }
1659
AdvanceBytesSlowPath(size_t n)1660 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1661 assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1662 assert(n >= current_chunk_.size()); // This should only be called when
1663 // iterating to a new node.
1664
1665 n -= current_chunk_.size();
1666 bytes_remaining_ -= current_chunk_.size();
1667
1668 // Process the next node(s) on the stack, skipping whole subtrees depending on
1669 // their length and how many bytes we are advancing.
1670 CordRep* node = nullptr;
1671 while (!stack_of_right_children_.empty()) {
1672 node = stack_of_right_children_.back();
1673 stack_of_right_children_.pop_back();
1674 if (node->length > n) break;
1675 n -= node->length;
1676 bytes_remaining_ -= node->length;
1677 node = nullptr;
1678 }
1679
1680 if (node == nullptr) {
1681 // We have reached the end of the Cord.
1682 assert(bytes_remaining_ == 0);
1683 return;
1684 }
1685
1686 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1687 // right children to the stack for subsequent traversal.
1688 while (node->tag == CONCAT) {
1689 if (node->concat()->left->length > n) {
1690 // Push right, descend left.
1691 stack_of_right_children_.push_back(node->concat()->right);
1692 node = node->concat()->left;
1693 } else {
1694 // Skip left, descend right.
1695 n -= node->concat()->left->length;
1696 bytes_remaining_ -= node->concat()->left->length;
1697 node = node->concat()->right;
1698 }
1699 }
1700
1701 // Get the child node if we encounter a SUBSTRING.
1702 size_t offset = 0;
1703 size_t length = node->length;
1704 if (node->tag == SUBSTRING) {
1705 offset = node->substring()->start;
1706 node = node->substring()->child;
1707 }
1708
1709 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1710 assert(length > n);
1711 const char* data =
1712 node->tag == EXTERNAL ? node->external()->base : node->data;
1713 current_chunk_ = absl::string_view(data + offset + n, length - n);
1714 current_leaf_ = node;
1715 bytes_remaining_ -= n;
1716 }
1717
operator [](size_t i) const1718 char Cord::operator[](size_t i) const {
1719 assert(i < size());
1720 size_t offset = i;
1721 const CordRep* rep = contents_.tree();
1722 if (rep == nullptr) {
1723 return contents_.data()[i];
1724 }
1725 while (true) {
1726 assert(rep != nullptr);
1727 assert(offset < rep->length);
1728 if (rep->tag >= FLAT) {
1729 // Get the "i"th character directly from the flat array.
1730 return rep->data[offset];
1731 } else if (rep->tag == EXTERNAL) {
1732 // Get the "i"th character from the external array.
1733 return rep->external()->base[offset];
1734 } else if (rep->tag == CONCAT) {
1735 // Recursively branch to the side of the concatenation that the "i"th
1736 // character is on.
1737 size_t left_length = rep->concat()->left->length;
1738 if (offset < left_length) {
1739 rep = rep->concat()->left;
1740 } else {
1741 offset -= left_length;
1742 rep = rep->concat()->right;
1743 }
1744 } else {
1745 // This must be a substring a node, so bypass it to get to the child.
1746 assert(rep->tag == SUBSTRING);
1747 offset += rep->substring()->start;
1748 rep = rep->substring()->child;
1749 }
1750 }
1751 }
1752
FlattenSlowPath()1753 absl::string_view Cord::FlattenSlowPath() {
1754 size_t total_size = size();
1755 CordRep* new_rep;
1756 char* new_buffer;
1757
1758 // Try to put the contents into a new flat rep. If they won't fit in the
1759 // biggest possible flat node, use an external rep instead.
1760 if (total_size <= kMaxFlatLength) {
1761 new_rep = NewFlat(total_size);
1762 new_rep->length = total_size;
1763 new_buffer = new_rep->data;
1764 CopyToArraySlowPath(new_buffer);
1765 } else {
1766 new_buffer = std::allocator<char>().allocate(total_size);
1767 CopyToArraySlowPath(new_buffer);
1768 new_rep = absl::cord_internal::NewExternalRep(
1769 absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1770 std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1771 s.size());
1772 });
1773 }
1774 Unref(contents_.tree());
1775 contents_.set_tree(new_rep);
1776 return absl::string_view(new_buffer, total_size);
1777 }
1778
GetFlatAux(CordRep * rep,absl::string_view * fragment)1779 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1780 assert(rep != nullptr);
1781 if (rep->tag >= FLAT) {
1782 *fragment = absl::string_view(rep->data, rep->length);
1783 return true;
1784 } else if (rep->tag == EXTERNAL) {
1785 *fragment = absl::string_view(rep->external()->base, rep->length);
1786 return true;
1787 } else if (rep->tag == SUBSTRING) {
1788 CordRep* child = rep->substring()->child;
1789 if (child->tag >= FLAT) {
1790 *fragment =
1791 absl::string_view(child->data + rep->substring()->start, rep->length);
1792 return true;
1793 } else if (child->tag == EXTERNAL) {
1794 *fragment = absl::string_view(
1795 child->external()->base + rep->substring()->start, rep->length);
1796 return true;
1797 }
1798 }
1799 return false;
1800 }
1801
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1802 /* static */ void Cord::ForEachChunkAux(
1803 absl::cord_internal::CordRep* rep,
1804 absl::FunctionRef<void(absl::string_view)> callback) {
1805 assert(rep != nullptr);
1806 int stack_pos = 0;
1807 constexpr int stack_max = 128;
1808 // Stack of right branches for tree traversal
1809 absl::cord_internal::CordRep* stack[stack_max];
1810 absl::cord_internal::CordRep* current_node = rep;
1811 while (true) {
1812 if (current_node->tag == CONCAT) {
1813 if (stack_pos == stack_max) {
1814 // There's no more room on our stack array to add another right branch,
1815 // and the idea is to avoid allocations, so call this function
1816 // recursively to navigate this subtree further. (This is not something
1817 // we expect to happen in practice).
1818 ForEachChunkAux(current_node, callback);
1819
1820 // Pop the next right branch and iterate.
1821 current_node = stack[--stack_pos];
1822 continue;
1823 } else {
1824 // Save the right branch for later traversal and continue down the left
1825 // branch.
1826 stack[stack_pos++] = current_node->concat()->right;
1827 current_node = current_node->concat()->left;
1828 continue;
1829 }
1830 }
1831 // This is a leaf node, so invoke our callback.
1832 absl::string_view chunk;
1833 bool success = GetFlatAux(current_node, &chunk);
1834 assert(success);
1835 if (success) {
1836 callback(chunk);
1837 }
1838 if (stack_pos == 0) {
1839 // end of traversal
1840 return;
1841 }
1842 current_node = stack[--stack_pos];
1843 }
1844 }
1845
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1846 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1847 const int kIndentStep = 1;
1848 int indent = 0;
1849 absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1850 absl::InlinedVector<int, kInlinedVectorSize> indents;
1851 for (;;) {
1852 *os << std::setw(3) << rep->refcount.Get();
1853 *os << " " << std::setw(7) << rep->length;
1854 *os << " [";
1855 if (include_data) *os << static_cast<void*>(rep);
1856 *os << "]";
1857 *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1858 *os << " " << std::setw(indent) << "";
1859 if (rep->tag == CONCAT) {
1860 *os << "CONCAT depth=" << Depth(rep) << "\n";
1861 indent += kIndentStep;
1862 indents.push_back(indent);
1863 stack.push_back(rep->concat()->right);
1864 rep = rep->concat()->left;
1865 } else if (rep->tag == SUBSTRING) {
1866 *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1867 indent += kIndentStep;
1868 rep = rep->substring()->child;
1869 } else { // Leaf
1870 if (rep->tag == EXTERNAL) {
1871 *os << "EXTERNAL [";
1872 if (include_data)
1873 *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1874 *os << "]\n";
1875 } else {
1876 *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1877 if (include_data)
1878 *os << absl::CEscape(std::string(rep->data, rep->length));
1879 *os << "]\n";
1880 }
1881 if (stack.empty()) break;
1882 rep = stack.back();
1883 stack.pop_back();
1884 indent = indents.back();
1885 indents.pop_back();
1886 }
1887 }
1888 ABSL_INTERNAL_CHECK(indents.empty(), "");
1889 }
1890
ReportError(CordRep * root,CordRep * node)1891 static std::string ReportError(CordRep* root, CordRep* node) {
1892 std::ostringstream buf;
1893 buf << "Error at node " << node << " in:";
1894 DumpNode(root, true, &buf);
1895 return buf.str();
1896 }
1897
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1898 static bool VerifyNode(CordRep* root, CordRep* start_node,
1899 bool full_validation) {
1900 absl::InlinedVector<CordRep*, 2> worklist;
1901 worklist.push_back(start_node);
1902 do {
1903 CordRep* node = worklist.back();
1904 worklist.pop_back();
1905
1906 ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1907 if (node != root) {
1908 ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1909 }
1910
1911 if (node->tag == CONCAT) {
1912 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1913 ReportError(root, node));
1914 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1915 ReportError(root, node));
1916 ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1917 node->concat()->right->length),
1918 ReportError(root, node));
1919 if (full_validation) {
1920 worklist.push_back(node->concat()->right);
1921 worklist.push_back(node->concat()->left);
1922 }
1923 } else if (node->tag >= FLAT) {
1924 ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1925 ReportError(root, node));
1926 } else if (node->tag == EXTERNAL) {
1927 ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1928 ReportError(root, node));
1929 } else if (node->tag == SUBSTRING) {
1930 ABSL_INTERNAL_CHECK(
1931 node->substring()->start < node->substring()->child->length,
1932 ReportError(root, node));
1933 ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1934 node->substring()->child->length,
1935 ReportError(root, node));
1936 }
1937 } while (!worklist.empty());
1938 return true;
1939 }
1940
1941 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1942 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1943 size_t total_mem_usage = 0;
1944
1945 // Allow a quick exit for the common case that the root is a leaf.
1946 if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1947 return total_mem_usage;
1948 }
1949
1950 // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1951 // never be appended to tree_stack. This reduces overhead from manipulating
1952 // tree_stack.
1953 absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1954 const CordRep* cur_node = rep;
1955 while (true) {
1956 const CordRep* next_node = nullptr;
1957
1958 if (cur_node->tag == CONCAT) {
1959 total_mem_usage += sizeof(CordRepConcat);
1960 const CordRep* left = cur_node->concat()->left;
1961 if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1962 next_node = left;
1963 }
1964
1965 const CordRep* right = cur_node->concat()->right;
1966 if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1967 if (next_node) {
1968 tree_stack.push_back(next_node);
1969 }
1970 next_node = right;
1971 }
1972 } else {
1973 // Since cur_node is not a leaf or a concat node it must be a substring.
1974 assert(cur_node->tag == SUBSTRING);
1975 total_mem_usage += sizeof(CordRepSubstring);
1976 next_node = cur_node->substring()->child;
1977 if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1978 next_node = nullptr;
1979 }
1980 }
1981
1982 if (!next_node) {
1983 if (tree_stack.empty()) {
1984 return total_mem_usage;
1985 }
1986 next_node = tree_stack.back();
1987 tree_stack.pop_back();
1988 }
1989 cur_node = next_node;
1990 }
1991 }
1992
operator <<(std::ostream & out,const Cord & cord)1993 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1994 for (absl::string_view chunk : cord.Chunks()) {
1995 out.write(chunk.data(), chunk.size());
1996 }
1997 return out;
1998 }
1999
2000 namespace strings_internal {
FlatOverhead()2001 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()2002 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)2003 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
2004 return TagToLength(tag);
2005 }
LengthToTag(size_t s)2006 uint8_t CordTestAccess::LengthToTag(size_t s) {
2007 ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
2008 return AllocatedSizeToTag(s + kFlatOverhead);
2009 }
SizeofCordRepConcat()2010 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()2011 size_t CordTestAccess::SizeofCordRepExternal() {
2012 return sizeof(CordRepExternal);
2013 }
SizeofCordRepSubstring()2014 size_t CordTestAccess::SizeofCordRepSubstring() {
2015 return sizeof(CordRepSubstring);
2016 }
2017 } // namespace strings_internal
2018 ABSL_NAMESPACE_END
2019 } // namespace absl
2020