1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/cord.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/cord_rep_flat.h"
40 #include "absl/strings/internal/cord_rep_ring.h"
41 #include "absl/strings/internal/cordz_statistics.h"
42 #include "absl/strings/internal/cordz_update_scope.h"
43 #include "absl/strings/internal/cordz_update_tracker.h"
44 #include "absl/strings/internal/resize_uninitialized.h"
45 #include "absl/strings/str_cat.h"
46 #include "absl/strings/str_format.h"
47 #include "absl/strings/str_join.h"
48 #include "absl/strings/string_view.h"
49
50 namespace absl {
51 ABSL_NAMESPACE_BEGIN
52
53 using ::absl::cord_internal::CordRep;
54 using ::absl::cord_internal::CordRepConcat;
55 using ::absl::cord_internal::CordRepExternal;
56 using ::absl::cord_internal::CordRepFlat;
57 using ::absl::cord_internal::CordRepRing;
58 using ::absl::cord_internal::CordRepSubstring;
59 using ::absl::cord_internal::CordzUpdateTracker;
60 using ::absl::cord_internal::InlineData;
61 using ::absl::cord_internal::kMaxFlatLength;
62 using ::absl::cord_internal::kMinFlatLength;
63
64 using ::absl::cord_internal::CONCAT;
65 using ::absl::cord_internal::EXTERNAL;
66 using ::absl::cord_internal::FLAT;
67 using ::absl::cord_internal::RING;
68 using ::absl::cord_internal::SUBSTRING;
69
70 using ::absl::cord_internal::kInlinedVectorSize;
71 using ::absl::cord_internal::kMaxBytesToCopy;
72
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)73 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
74 return n == 0 ? a : Fibonacci(n - 1, b, a + b);
75 }
76
77 static_assert(Fibonacci(63) == 6557470319842,
78 "Fibonacci values computed incorrectly");
79
80 // Minimum length required for a given depth tree -- a tree is considered
81 // balanced if
82 // length(t) >= min_length[depth(t)]
83 // The root node depth is allowed to become twice as large to reduce rebalancing
84 // for larger strings (see IsRootBalanced).
85 static constexpr uint64_t min_length[] = {
86 Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5),
87 Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9),
88 Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13),
89 Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17),
90 Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21),
91 Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25),
92 Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29),
93 Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33),
94 Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37),
95 Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41),
96 Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45),
97 Fibonacci(46), Fibonacci(47),
98 0xffffffffffffffffull, // Avoid overflow
99 };
100
101 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
102
cord_ring_enabled()103 static inline bool cord_ring_enabled() {
104 return cord_internal::cord_ring_buffer_enabled.load(
105 std::memory_order_relaxed);
106 }
107
IsRootBalanced(CordRep * node)108 static inline bool IsRootBalanced(CordRep* node) {
109 if (node->tag != CONCAT) {
110 return true;
111 } else if (node->concat()->depth() <= 15) {
112 return true;
113 } else if (node->concat()->depth() > kMinLengthSize) {
114 return false;
115 } else {
116 // Allow depth to become twice as large as implied by fibonacci rule to
117 // reduce rebalancing for larger strings.
118 return (node->length >= min_length[node->concat()->depth() / 2]);
119 }
120 }
121
122 static CordRep* Rebalance(CordRep* node);
123 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
124 int indent = 0);
125 static bool VerifyNode(CordRep* root, CordRep* start_node,
126 bool full_validation);
127
VerifyTree(CordRep * node)128 static inline CordRep* VerifyTree(CordRep* node) {
129 // Verification is expensive, so only do it in debug mode.
130 // Even in debug mode we normally do only light validation.
131 // If you are debugging Cord itself, you should define the
132 // macro EXTRA_CORD_VALIDATION, e.g. by adding
133 // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
134 #ifdef EXTRA_CORD_VALIDATION
135 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
136 #else // EXTRA_CORD_VALIDATION
137 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
138 #endif // EXTRA_CORD_VALIDATION
139 static_cast<void>(&VerifyNode);
140
141 return node;
142 }
143
144 // Return the depth of a node
Depth(const CordRep * rep)145 static int Depth(const CordRep* rep) {
146 if (rep->tag == CONCAT) {
147 return rep->concat()->depth();
148 } else {
149 return 0;
150 }
151 }
152
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)153 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
154 CordRep* right) {
155 concat->left = left;
156 concat->right = right;
157
158 concat->length = left->length + right->length;
159 concat->set_depth(1 + std::max(Depth(left), Depth(right)));
160 }
161
162 // Create a concatenation of the specified nodes.
163 // Does not change the refcounts of "left" and "right".
164 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)165 static CordRep* RawConcat(CordRep* left, CordRep* right) {
166 // Avoid making degenerate concat nodes (one child is empty)
167 if (left == nullptr) return right;
168 if (right == nullptr) return left;
169 if (left->length == 0) {
170 CordRep::Unref(left);
171 return right;
172 }
173 if (right->length == 0) {
174 CordRep::Unref(right);
175 return left;
176 }
177
178 CordRepConcat* rep = new CordRepConcat();
179 rep->tag = CONCAT;
180 SetConcatChildren(rep, left, right);
181
182 return rep;
183 }
184
Concat(CordRep * left,CordRep * right)185 static CordRep* Concat(CordRep* left, CordRep* right) {
186 CordRep* rep = RawConcat(left, right);
187 if (rep != nullptr && !IsRootBalanced(rep)) {
188 rep = Rebalance(rep);
189 }
190 return VerifyTree(rep);
191 }
192
193 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)194 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
195 // Make repeated passes over the array, merging adjacent pairs
196 // until we are left with just a single node.
197 while (n > 1) {
198 size_t dst = 0;
199 for (size_t src = 0; src < n; src += 2) {
200 if (src + 1 < n) {
201 reps[dst] = Concat(reps[src], reps[src + 1]);
202 } else {
203 reps[dst] = reps[src];
204 }
205 dst++;
206 }
207 n = dst;
208 }
209
210 return reps[0];
211 }
212
CreateFlat(const char * data,size_t length,size_t alloc_hint)213 static CordRepFlat* CreateFlat(const char* data, size_t length,
214 size_t alloc_hint) {
215 CordRepFlat* flat = CordRepFlat::New(length + alloc_hint);
216 flat->length = length;
217 memcpy(flat->Data(), data, length);
218 return flat;
219 }
220
221 // Creates a new flat or ringbuffer out of the specified array.
222 // The returned node has a refcount of 1.
RingNewTree(const char * data,size_t length,size_t alloc_hint)223 static CordRep* RingNewTree(const char* data, size_t length,
224 size_t alloc_hint) {
225 if (length <= kMaxFlatLength) {
226 return CreateFlat(data, length, alloc_hint);
227 }
228 CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0);
229 data += kMaxFlatLength;
230 length -= kMaxFlatLength;
231 size_t extra = (length - 1) / kMaxFlatLength + 1;
232 auto* root = CordRepRing::Create(flat, extra);
233 return CordRepRing::Append(root, {data, length}, alloc_hint);
234 }
235
236 // Create a new tree out of the specified array.
237 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)238 static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) {
239 if (length == 0) return nullptr;
240 if (cord_ring_enabled()) {
241 return RingNewTree(data, length, alloc_hint);
242 }
243 absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
244 size_t n = 0;
245 do {
246 const size_t len = std::min(length, kMaxFlatLength);
247 CordRepFlat* rep = CordRepFlat::New(len + alloc_hint);
248 rep->length = len;
249 memcpy(rep->Data(), data, len);
250 reps[n++] = VerifyTree(rep);
251 data += len;
252 length -= len;
253 } while (length != 0);
254 return MakeBalancedTree(reps.data(), n);
255 }
256
257 namespace cord_internal {
258
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)259 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
260 assert(!data.empty());
261 rep->length = data.size();
262 rep->tag = EXTERNAL;
263 rep->base = data.data();
264 VerifyTree(rep);
265 }
266
267 } // namespace cord_internal
268
NewSubstring(CordRep * child,size_t offset,size_t length)269 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
270 // Never create empty substring nodes
271 if (length == 0) {
272 CordRep::Unref(child);
273 return nullptr;
274 } else {
275 CordRepSubstring* rep = new CordRepSubstring();
276 assert((offset + length) <= child->length);
277 rep->length = length;
278 rep->tag = SUBSTRING;
279 rep->start = offset;
280 rep->child = child;
281 return VerifyTree(rep);
282 }
283 }
284
285 // Creates a CordRep from the provided string. If the string is large enough,
286 // and not wasteful, we move the string into an external cord rep, preserving
287 // the already allocated string contents.
288 // Requires the provided string length to be larger than `kMaxInline`.
CordRepFromString(std::string && src)289 static CordRep* CordRepFromString(std::string&& src) {
290 assert(src.length() > cord_internal::kMaxInline);
291 if (
292 // String is short: copy data to avoid external block overhead.
293 src.size() <= kMaxBytesToCopy ||
294 // String is wasteful: copy data to avoid pinning too much unused memory.
295 src.size() < src.capacity() / 2
296 ) {
297 return NewTree(src.data(), src.size(), 0);
298 }
299
300 struct StringReleaser {
301 void operator()(absl::string_view /* data */) {}
302 std::string data;
303 };
304 const absl::string_view original_data = src;
305 auto* rep =
306 static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
307 absl::cord_internal::NewExternalRep(original_data,
308 StringReleaser{std::move(src)}));
309 // Moving src may have invalidated its data pointer, so adjust it.
310 rep->base = rep->template get<0>().data.data();
311 return rep;
312 }
313
314 // --------------------------------------------------------------------
315 // Cord::InlineRep functions
316
317 constexpr unsigned char Cord::InlineRep::kMaxInline;
318
set_data(const char * data,size_t n,bool nullify_tail)319 inline void Cord::InlineRep::set_data(const char* data, size_t n,
320 bool nullify_tail) {
321 static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
322
323 cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail);
324 set_inline_size(n);
325 }
326
set_data(size_t n)327 inline char* Cord::InlineRep::set_data(size_t n) {
328 assert(n <= kMaxInline);
329 ResetToEmpty();
330 set_inline_size(n);
331 return data_.as_chars();
332 }
333
reduce_size(size_t n)334 inline void Cord::InlineRep::reduce_size(size_t n) {
335 size_t tag = inline_size();
336 assert(tag <= kMaxInline);
337 assert(tag >= n);
338 tag -= n;
339 memset(data_.as_chars() + tag, 0, n);
340 set_inline_size(static_cast<char>(tag));
341 }
342
remove_prefix(size_t n)343 inline void Cord::InlineRep::remove_prefix(size_t n) {
344 cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n,
345 inline_size() - n);
346 reduce_size(n);
347 }
348
349 // Returns `rep` converted into a CordRepRing.
350 // Directly returns `rep` if `rep` is already a CordRepRing.
ForceRing(CordRep * rep,size_t extra)351 static CordRepRing* ForceRing(CordRep* rep, size_t extra) {
352 return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra);
353 }
354
AppendTreeToInlined(CordRep * tree,MethodIdentifier method)355 void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
356 MethodIdentifier method) {
357 assert(!is_tree());
358 if (!data_.is_empty()) {
359 CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
360 if (cord_ring_enabled()) {
361 tree = CordRepRing::Append(CordRepRing::Create(flat, 1), tree);
362 } else {
363 tree = Concat(flat, tree);
364 }
365 }
366 EmplaceTree(tree, method);
367 }
368
AppendTreeToTree(CordRep * tree,MethodIdentifier method)369 void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) {
370 assert(is_tree());
371 const CordzUpdateScope scope(data_.cordz_info(), method);
372 if (cord_ring_enabled()) {
373 tree = CordRepRing::Append(ForceRing(data_.as_tree(), 1), tree);
374 } else {
375 tree = Concat(data_.as_tree(), tree);
376 }
377 SetTree(tree, scope);
378 }
379
AppendTree(CordRep * tree,MethodIdentifier method)380 void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) {
381 if (tree == nullptr) return;
382 if (data_.is_tree()) {
383 AppendTreeToTree(tree, method);
384 } else {
385 AppendTreeToInlined(tree, method);
386 }
387 }
388
PrependTreeToInlined(CordRep * tree,MethodIdentifier method)389 void Cord::InlineRep::PrependTreeToInlined(CordRep* tree,
390 MethodIdentifier method) {
391 assert(!is_tree());
392 if (!data_.is_empty()) {
393 CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
394 if (cord_ring_enabled()) {
395 tree = CordRepRing::Prepend(CordRepRing::Create(flat, 1), tree);
396 } else {
397 tree = Concat(tree, flat);
398 }
399 }
400 EmplaceTree(tree, method);
401 }
402
PrependTreeToTree(CordRep * tree,MethodIdentifier method)403 void Cord::InlineRep::PrependTreeToTree(CordRep* tree,
404 MethodIdentifier method) {
405 assert(is_tree());
406 const CordzUpdateScope scope(data_.cordz_info(), method);
407 if (cord_ring_enabled()) {
408 tree = CordRepRing::Prepend(ForceRing(data_.as_tree(), 1), tree);
409 } else {
410 tree = Concat(tree, data_.as_tree());
411 }
412 SetTree(tree, scope);
413 }
414
PrependTree(CordRep * tree,MethodIdentifier method)415 void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) {
416 assert(tree != nullptr);
417 if (data_.is_tree()) {
418 PrependTreeToTree(tree, method);
419 } else {
420 PrependTreeToInlined(tree, method);
421 }
422 }
423
424 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
425 // suitable leaf is found, the function will update the length field for all
426 // nodes to account for the size increase. The append region address will be
427 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)428 static inline bool PrepareAppendRegion(CordRep* root, char** region,
429 size_t* size, size_t max_length) {
430 if (root->tag == RING && root->refcount.IsOne()) {
431 Span<char> span = root->ring()->GetAppendBuffer(max_length);
432 if (!span.empty()) {
433 *region = span.data();
434 *size = span.size();
435 return true;
436 }
437 }
438
439 // Search down the right-hand path for a non-full FLAT node.
440 CordRep* dst = root;
441 while (dst->tag == CONCAT && dst->refcount.IsOne()) {
442 dst = dst->concat()->right;
443 }
444
445 if (dst->tag < FLAT || !dst->refcount.IsOne()) {
446 *region = nullptr;
447 *size = 0;
448 return false;
449 }
450
451 const size_t in_use = dst->length;
452 const size_t capacity = dst->flat()->Capacity();
453 if (in_use == capacity) {
454 *region = nullptr;
455 *size = 0;
456 return false;
457 }
458
459 size_t size_increase = std::min(capacity - in_use, max_length);
460
461 // We need to update the length fields for all nodes, including the leaf node.
462 for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
463 rep->length += size_increase;
464 }
465 dst->length += size_increase;
466
467 *region = dst->flat()->Data() + in_use;
468 *size = size_increase;
469 return true;
470 }
471
472 template <bool has_length>
GetAppendRegion(char ** region,size_t * size,size_t length)473 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
474 size_t length) {
475 auto constexpr method = CordzUpdateTracker::kGetAppendRegion;
476
477 CordRep* root = tree();
478 size_t sz = root ? root->length : inline_size();
479 if (root == nullptr) {
480 size_t available = kMaxInline - sz;
481 if (available >= (has_length ? length : 1)) {
482 *region = data_.as_chars() + sz;
483 *size = has_length ? length : available;
484 set_inline_size(has_length ? sz + length : kMaxInline);
485 return;
486 }
487 }
488
489 size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength);
490 CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra);
491 CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method);
492 if (PrepareAppendRegion(rep, region, size, length)) {
493 CommitTree(root, rep, scope, method);
494 return;
495 }
496
497 // Allocate new node.
498 CordRepFlat* new_node = CordRepFlat::New(extra);
499 new_node->length = std::min(new_node->Capacity(), length);
500 *region = new_node->Data();
501 *size = new_node->length;
502
503 if (cord_ring_enabled()) {
504 rep = CordRepRing::Append(ForceRing(rep, 1), new_node);
505 } else {
506 rep = Concat(rep, new_node);
507 }
508 CommitTree(root, rep, scope, method);
509 }
510
511 // If the rep is a leaf, this will increment the value at total_mem_usage and
512 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)513 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
514 if (rep->tag >= FLAT) {
515 *total_mem_usage += rep->flat()->AllocatedSize();
516 return true;
517 }
518 if (rep->tag == EXTERNAL) {
519 // We don't know anything about the embedded / bound data, but we can safely
520 // assume it is 'at least' a word / pointer to data. In the future we may
521 // choose to use the 'data' byte as a tag to identify the types of some
522 // well-known externals, such as a std::string instance.
523 *total_mem_usage +=
524 sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length;
525 return true;
526 }
527 return false;
528 }
529
AssignSlow(const Cord::InlineRep & src)530 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
531 assert(&src != this);
532 assert(is_tree() || src.is_tree());
533 auto constexpr method = CordzUpdateTracker::kAssignCord;
534 if (ABSL_PREDICT_TRUE(!is_tree())) {
535 EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method);
536 return;
537 }
538
539 CordRep* tree = as_tree();
540 if (CordRep* src_tree = src.tree()) {
541 // Leave any existing `cordz_info` in place, and let MaybeTrackCord()
542 // decide if this cord should be (or remains to be) sampled or not.
543 data_.set_tree(CordRep::Ref(src_tree));
544 CordzInfo::MaybeTrackCord(data_, src.data_, method);
545 } else {
546 CordzInfo::MaybeUntrackCord(data_.cordz_info());
547 data_ = src.data_;
548 }
549 CordRep::Unref(tree);
550 }
551
UnrefTree()552 void Cord::InlineRep::UnrefTree() {
553 if (is_tree()) {
554 CordzInfo::MaybeUntrackCord(data_.cordz_info());
555 CordRep::Unref(tree());
556 }
557 }
558
559 // --------------------------------------------------------------------
560 // Constructors and destructors
561
Cord(absl::string_view src,MethodIdentifier method)562 Cord::Cord(absl::string_view src, MethodIdentifier method)
563 : contents_(InlineData::kDefaultInit) {
564 const size_t n = src.size();
565 if (n <= InlineRep::kMaxInline) {
566 contents_.set_data(src.data(), n, true);
567 } else {
568 CordRep* rep = NewTree(src.data(), n, 0);
569 contents_.EmplaceTree(rep, method);
570 }
571 }
572
573 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)574 Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) {
575 if (src.size() <= InlineRep::kMaxInline) {
576 contents_.set_data(src.data(), src.size(), true);
577 } else {
578 CordRep* rep = CordRepFromString(std::forward<T>(src));
579 contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
580 }
581 }
582
583 template Cord::Cord(std::string&& src);
584
585 // The destruction code is separate so that the compiler can determine
586 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()587 void Cord::DestroyCordSlow() {
588 assert(contents_.is_tree());
589 CordzInfo::MaybeUntrackCord(contents_.cordz_info());
590 CordRep::Unref(VerifyTree(contents_.as_tree()));
591 }
592
593 // --------------------------------------------------------------------
594 // Mutators
595
Clear()596 void Cord::Clear() {
597 if (CordRep* tree = contents_.clear()) {
598 CordRep::Unref(tree);
599 }
600 }
601
AssignLargeString(std::string && src)602 Cord& Cord::AssignLargeString(std::string&& src) {
603 auto constexpr method = CordzUpdateTracker::kAssignString;
604 assert(src.size() > kMaxBytesToCopy);
605 CordRep* rep = CordRepFromString(std::move(src));
606 if (CordRep* tree = contents_.tree()) {
607 CordzUpdateScope scope(contents_.cordz_info(), method);
608 contents_.SetTree(rep, scope);
609 CordRep::Unref(tree);
610 } else {
611 contents_.EmplaceTree(rep, method);
612 }
613 return *this;
614 }
615
operator =(absl::string_view src)616 Cord& Cord::operator=(absl::string_view src) {
617 auto constexpr method = CordzUpdateTracker::kAssignString;
618 const char* data = src.data();
619 size_t length = src.size();
620 CordRep* tree = contents_.tree();
621 if (length <= InlineRep::kMaxInline) {
622 // Embed into this->contents_, which is somewhat subtle:
623 // - MaybeUntrackCord must be called before Unref(tree).
624 // - MaybeUntrackCord must be called before set_data() clobbers cordz_info.
625 // - set_data() must be called before Unref(tree) as it may reference tree.
626 if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info());
627 contents_.set_data(data, length, true);
628 if (tree != nullptr) CordRep::Unref(tree);
629 return *this;
630 }
631 if (tree != nullptr) {
632 CordzUpdateScope scope(contents_.cordz_info(), method);
633 if (tree->tag >= FLAT && tree->flat()->Capacity() >= length &&
634 tree->refcount.IsOne()) {
635 // Copy in place if the existing FLAT node is reusable.
636 memmove(tree->flat()->Data(), data, length);
637 tree->length = length;
638 VerifyTree(tree);
639 return *this;
640 }
641 contents_.SetTree(NewTree(data, length, 0), scope);
642 CordRep::Unref(tree);
643 } else {
644 contents_.EmplaceTree(NewTree(data, length, 0), method);
645 }
646 return *this;
647 }
648
649 // TODO(sanjay): Move to Cord::InlineRep section of file. For now,
650 // we keep it here to make diffs easier.
AppendArray(absl::string_view src,MethodIdentifier method)651 void Cord::InlineRep::AppendArray(absl::string_view src,
652 MethodIdentifier method) {
653 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
654
655 size_t appended = 0;
656 CordRep* rep = tree();
657 const CordRep* const root = rep;
658 CordzUpdateScope scope(root ? cordz_info() : nullptr, method);
659 if (root != nullptr) {
660 char* region;
661 if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) {
662 memcpy(region, src.data(), appended);
663 }
664 } else {
665 // Try to fit in the inline buffer if possible.
666 size_t inline_length = inline_size();
667 if (src.size() <= kMaxInline - inline_length) {
668 // Append new data to embedded array
669 memcpy(data_.as_chars() + inline_length, src.data(), src.size());
670 set_inline_size(inline_length + src.size());
671 return;
672 }
673
674 // Note: we don't concern ourselves if src aliases data stored in the
675 // inlined data of 'this', as we update the InlineData only at the end.
676 // We are going from an inline size to beyond inline size. Make the new size
677 // either double the inlined size, or the added size + 10%.
678 const size_t size1 = inline_length * 2 + src.size();
679 const size_t size2 = inline_length + src.size() / 10;
680 rep = CordRepFlat::New(std::max<size_t>(size1, size2));
681 appended = std::min(src.size(), rep->flat()->Capacity() - inline_length);
682 memcpy(rep->flat()->Data(), data_.as_chars(), inline_length);
683 memcpy(rep->flat()->Data() + inline_length, src.data(), appended);
684 rep->length = inline_length + appended;
685 }
686
687 src.remove_prefix(appended);
688 if (src.empty()) {
689 CommitTree(root, rep, scope, method);
690 return;
691 }
692
693 if (cord_ring_enabled()) {
694 rep = ForceRing(rep, (src.size() - 1) / kMaxFlatLength + 1);
695 rep = CordRepRing::Append(rep->ring(), src);
696 } else {
697 // Use new block(s) for any remaining bytes that were not handled above.
698 // Alloc extra memory only if the right child of the root of the new tree
699 // is going to be a FLAT node, which will permit further inplace appends.
700 size_t length = src.size();
701 if (src.size() < kMaxFlatLength) {
702 // The new length is either
703 // - old size + 10%
704 // - old_size + src.size()
705 // This will cause a reasonable conservative step-up in size that is
706 // still large enough to avoid excessive amounts of small fragments
707 // being added.
708 length = std::max<size_t>(rep->length / 10, src.size());
709 }
710 rep = Concat(rep, NewTree(src.data(), src.size(), length - src.size()));
711 }
712 CommitTree(root, rep, scope, method);
713 }
714
TakeRep() const715 inline CordRep* Cord::TakeRep() const& {
716 return CordRep::Ref(contents_.tree());
717 }
718
TakeRep()719 inline CordRep* Cord::TakeRep() && {
720 CordRep* rep = contents_.tree();
721 contents_.clear();
722 return rep;
723 }
724
725 template <typename C>
AppendImpl(C && src)726 inline void Cord::AppendImpl(C&& src) {
727 auto constexpr method = CordzUpdateTracker::kAppendCord;
728 if (empty()) {
729 // Since destination is empty, we can avoid allocating a node,
730 if (src.contents_.is_tree()) {
731 // by taking the tree directly
732 CordRep* rep = std::forward<C>(src).TakeRep();
733 contents_.EmplaceTree(rep, method);
734 } else {
735 // or copying over inline data
736 contents_.data_ = src.contents_.data_;
737 }
738 return;
739 }
740
741 // For short cords, it is faster to copy data if there is room in dst.
742 const size_t src_size = src.contents_.size();
743 if (src_size <= kMaxBytesToCopy) {
744 CordRep* src_tree = src.contents_.tree();
745 if (src_tree == nullptr) {
746 // src has embedded data.
747 contents_.AppendArray({src.contents_.data(), src_size}, method);
748 return;
749 }
750 if (src_tree->tag >= FLAT) {
751 // src tree just has one flat node.
752 contents_.AppendArray({src_tree->flat()->Data(), src_size}, method);
753 return;
754 }
755 if (&src == this) {
756 // ChunkIterator below assumes that src is not modified during traversal.
757 Append(Cord(src));
758 return;
759 }
760 // TODO(mec): Should we only do this if "dst" has space?
761 for (absl::string_view chunk : src.Chunks()) {
762 Append(chunk);
763 }
764 return;
765 }
766
767 // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize)
768 CordRep* rep = std::forward<C>(src).TakeRep();
769 contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord);
770 }
771
Append(const Cord & src)772 void Cord::Append(const Cord& src) { AppendImpl(src); }
773
Append(Cord && src)774 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
775
776 template <typename T, Cord::EnableIfString<T>>
Append(T && src)777 void Cord::Append(T&& src) {
778 if (src.size() <= kMaxBytesToCopy) {
779 Append(absl::string_view(src));
780 } else {
781 CordRep* rep = CordRepFromString(std::forward<T>(src));
782 contents_.AppendTree(rep, CordzUpdateTracker::kAppendString);
783 }
784 }
785
786 template void Cord::Append(std::string&& src);
787
Prepend(const Cord & src)788 void Cord::Prepend(const Cord& src) {
789 CordRep* src_tree = src.contents_.tree();
790 if (src_tree != nullptr) {
791 CordRep::Ref(src_tree);
792 contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord);
793 return;
794 }
795
796 // `src` cord is inlined.
797 absl::string_view src_contents(src.contents_.data(), src.contents_.size());
798 return Prepend(src_contents);
799 }
800
Prepend(absl::string_view src)801 void Cord::Prepend(absl::string_view src) {
802 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
803 if (!contents_.is_tree()) {
804 size_t cur_size = contents_.inline_size();
805 if (cur_size + src.size() <= InlineRep::kMaxInline) {
806 // Use embedded storage.
807 char data[InlineRep::kMaxInline + 1] = {0};
808 memcpy(data, src.data(), src.size());
809 memcpy(data + src.size(), contents_.data(), cur_size);
810 memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1);
811 contents_.set_inline_size(cur_size + src.size());
812 return;
813 }
814 }
815 CordRep* rep = NewTree(src.data(), src.size(), 0);
816 contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
817 }
818
819 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)820 inline void Cord::Prepend(T&& src) {
821 if (src.size() <= kMaxBytesToCopy) {
822 Prepend(absl::string_view(src));
823 } else {
824 CordRep* rep = CordRepFromString(std::forward<T>(src));
825 contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
826 }
827 }
828
829 template void Cord::Prepend(std::string&& src);
830
RemovePrefixFrom(CordRep * node,size_t n)831 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
832 if (n >= node->length) return nullptr;
833 if (n == 0) return CordRep::Ref(node);
834 absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
835
836 while (node->tag == CONCAT) {
837 assert(n <= node->length);
838 if (n < node->concat()->left->length) {
839 // Push right to stack, descend left.
840 rhs_stack.push_back(node->concat()->right);
841 node = node->concat()->left;
842 } else {
843 // Drop left, descend right.
844 n -= node->concat()->left->length;
845 node = node->concat()->right;
846 }
847 }
848 assert(n <= node->length);
849
850 if (n == 0) {
851 CordRep::Ref(node);
852 } else {
853 size_t start = n;
854 size_t len = node->length - n;
855 if (node->tag == SUBSTRING) {
856 // Consider in-place update of node, similar to in RemoveSuffixFrom().
857 start += node->substring()->start;
858 node = node->substring()->child;
859 }
860 node = NewSubstring(CordRep::Ref(node), start, len);
861 }
862 while (!rhs_stack.empty()) {
863 node = Concat(node, CordRep::Ref(rhs_stack.back()));
864 rhs_stack.pop_back();
865 }
866 return node;
867 }
868
869 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
870 // exception that removing a suffix has an optimization where a node may be
871 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)872 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
873 if (n >= node->length) return nullptr;
874 if (n == 0) return CordRep::Ref(node);
875 absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
876 bool inplace_ok = node->refcount.IsOne();
877
878 while (node->tag == CONCAT) {
879 assert(n <= node->length);
880 if (n < node->concat()->right->length) {
881 // Push left to stack, descend right.
882 lhs_stack.push_back(node->concat()->left);
883 node = node->concat()->right;
884 } else {
885 // Drop right, descend left.
886 n -= node->concat()->right->length;
887 node = node->concat()->left;
888 }
889 inplace_ok = inplace_ok && node->refcount.IsOne();
890 }
891 assert(n <= node->length);
892
893 if (n == 0) {
894 CordRep::Ref(node);
895 } else if (inplace_ok && node->tag != EXTERNAL) {
896 // Consider making a new buffer if the current node capacity is much
897 // larger than the new length.
898 CordRep::Ref(node);
899 node->length -= n;
900 } else {
901 size_t start = 0;
902 size_t len = node->length - n;
903 if (node->tag == SUBSTRING) {
904 start = node->substring()->start;
905 node = node->substring()->child;
906 }
907 node = NewSubstring(CordRep::Ref(node), start, len);
908 }
909 while (!lhs_stack.empty()) {
910 node = Concat(CordRep::Ref(lhs_stack.back()), node);
911 lhs_stack.pop_back();
912 }
913 return node;
914 }
915
RemovePrefix(size_t n)916 void Cord::RemovePrefix(size_t n) {
917 ABSL_INTERNAL_CHECK(n <= size(),
918 absl::StrCat("Requested prefix size ", n,
919 " exceeds Cord's size ", size()));
920 CordRep* tree = contents_.tree();
921 if (tree == nullptr) {
922 contents_.remove_prefix(n);
923 } else {
924 auto constexpr method = CordzUpdateTracker::kRemovePrefix;
925 CordzUpdateScope scope(contents_.cordz_info(), method);
926 if (tree->tag == RING) {
927 tree = CordRepRing::RemovePrefix(tree->ring(), n);
928 } else {
929 CordRep* newrep = RemovePrefixFrom(tree, n);
930 CordRep::Unref(tree);
931 tree = VerifyTree(newrep);
932 }
933 contents_.SetTreeOrEmpty(tree, scope);
934 }
935 }
936
RemoveSuffix(size_t n)937 void Cord::RemoveSuffix(size_t n) {
938 ABSL_INTERNAL_CHECK(n <= size(),
939 absl::StrCat("Requested suffix size ", n,
940 " exceeds Cord's size ", size()));
941 CordRep* tree = contents_.tree();
942 if (tree == nullptr) {
943 contents_.reduce_size(n);
944 } else {
945 auto constexpr method = CordzUpdateTracker::kRemoveSuffix;
946 CordzUpdateScope scope(contents_.cordz_info(), method);
947 if (tree->tag == RING) {
948 tree = CordRepRing::RemoveSuffix(tree->ring(), n);
949 } else {
950 CordRep* newrep = RemoveSuffixFrom(tree, n);
951 CordRep::Unref(tree);
952 tree = VerifyTree(newrep);
953 }
954 contents_.SetTreeOrEmpty(tree, scope);
955 }
956 }
957
958 // Work item for NewSubRange().
959 struct SubRange {
SubRangeabsl::SubRange960 SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
961 : node(a_node), pos(a_pos), n(a_n) {}
962 CordRep* node; // nullptr means concat last 2 results.
963 size_t pos;
964 size_t n;
965 };
966
NewSubRange(CordRep * node,size_t pos,size_t n)967 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
968 absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
969 absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
970 todo.push_back(SubRange(node, pos, n));
971 do {
972 const SubRange& sr = todo.back();
973 node = sr.node;
974 pos = sr.pos;
975 n = sr.n;
976 todo.pop_back();
977
978 if (node == nullptr) {
979 assert(results.size() >= 2);
980 CordRep* right = results.back();
981 results.pop_back();
982 CordRep* left = results.back();
983 results.pop_back();
984 results.push_back(Concat(left, right));
985 } else if (pos == 0 && n == node->length) {
986 results.push_back(CordRep::Ref(node));
987 } else if (node->tag != CONCAT) {
988 if (node->tag == SUBSTRING) {
989 pos += node->substring()->start;
990 node = node->substring()->child;
991 }
992 results.push_back(NewSubstring(CordRep::Ref(node), pos, n));
993 } else if (pos + n <= node->concat()->left->length) {
994 todo.push_back(SubRange(node->concat()->left, pos, n));
995 } else if (pos >= node->concat()->left->length) {
996 pos -= node->concat()->left->length;
997 todo.push_back(SubRange(node->concat()->right, pos, n));
998 } else {
999 size_t left_n = node->concat()->left->length - pos;
1000 todo.push_back(SubRange(nullptr, 0, 0)); // Concat()
1001 todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1002 todo.push_back(SubRange(node->concat()->left, pos, left_n));
1003 }
1004 } while (!todo.empty());
1005 assert(results.size() == 1);
1006 return results[0];
1007 }
1008
Subcord(size_t pos,size_t new_size) const1009 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1010 Cord sub_cord;
1011 size_t length = size();
1012 if (pos > length) pos = length;
1013 if (new_size > length - pos) new_size = length - pos;
1014 if (new_size == 0) return sub_cord;
1015
1016 CordRep* tree = contents_.tree();
1017 if (tree == nullptr) {
1018 // sub_cord is newly constructed, no need to re-zero-out the tail of
1019 // contents_ memory.
1020 sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1021 return sub_cord;
1022 }
1023
1024 if (new_size <= InlineRep::kMaxInline) {
1025 char* dest = sub_cord.contents_.data_.as_chars();
1026 Cord::ChunkIterator it = chunk_begin();
1027 it.AdvanceBytes(pos);
1028 size_t remaining_size = new_size;
1029 while (remaining_size > it->size()) {
1030 cord_internal::SmallMemmove(dest, it->data(), it->size());
1031 remaining_size -= it->size();
1032 dest += it->size();
1033 ++it;
1034 }
1035 cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1036 sub_cord.contents_.set_inline_size(new_size);
1037 return sub_cord;
1038 }
1039
1040 if (tree->tag == RING) {
1041 CordRepRing* ring = CordRep::Ref(tree)->ring();
1042 tree = CordRepRing::SubRing(ring, pos, new_size);
1043 } else {
1044 tree = NewSubRange(tree, pos, new_size);
1045 }
1046 sub_cord.contents_.EmplaceTree(tree, contents_.data_,
1047 CordzUpdateTracker::kSubCord);
1048 return sub_cord;
1049 }
1050
1051 // --------------------------------------------------------------------
1052 // Balancing
1053
1054 class CordForest {
1055 public:
CordForest(size_t length)1056 explicit CordForest(size_t length)
1057 : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1058
Build(CordRep * cord_root)1059 void Build(CordRep* cord_root) {
1060 std::vector<CordRep*> pending = {cord_root};
1061
1062 while (!pending.empty()) {
1063 CordRep* node = pending.back();
1064 pending.pop_back();
1065 CheckNode(node);
1066 if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1067 AddNode(node);
1068 continue;
1069 }
1070
1071 CordRepConcat* concat_node = node->concat();
1072 if (concat_node->depth() >= kMinLengthSize ||
1073 concat_node->length < min_length[concat_node->depth()]) {
1074 pending.push_back(concat_node->right);
1075 pending.push_back(concat_node->left);
1076
1077 if (concat_node->refcount.IsOne()) {
1078 concat_node->left = concat_freelist_;
1079 concat_freelist_ = concat_node;
1080 } else {
1081 CordRep::Ref(concat_node->right);
1082 CordRep::Ref(concat_node->left);
1083 CordRep::Unref(concat_node);
1084 }
1085 } else {
1086 AddNode(node);
1087 }
1088 }
1089 }
1090
ConcatNodes()1091 CordRep* ConcatNodes() {
1092 CordRep* sum = nullptr;
1093 for (auto* node : trees_) {
1094 if (node == nullptr) continue;
1095
1096 sum = PrependNode(node, sum);
1097 root_length_ -= node->length;
1098 if (root_length_ == 0) break;
1099 }
1100 ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1101 return VerifyTree(sum);
1102 }
1103
1104 private:
AppendNode(CordRep * node,CordRep * sum)1105 CordRep* AppendNode(CordRep* node, CordRep* sum) {
1106 return (sum == nullptr) ? node : MakeConcat(sum, node);
1107 }
1108
PrependNode(CordRep * node,CordRep * sum)1109 CordRep* PrependNode(CordRep* node, CordRep* sum) {
1110 return (sum == nullptr) ? node : MakeConcat(node, sum);
1111 }
1112
AddNode(CordRep * node)1113 void AddNode(CordRep* node) {
1114 CordRep* sum = nullptr;
1115
1116 // Collect together everything with which we will merge with node
1117 int i = 0;
1118 for (; node->length > min_length[i + 1]; ++i) {
1119 auto& tree_at_i = trees_[i];
1120
1121 if (tree_at_i == nullptr) continue;
1122 sum = PrependNode(tree_at_i, sum);
1123 tree_at_i = nullptr;
1124 }
1125
1126 sum = AppendNode(node, sum);
1127
1128 // Insert sum into appropriate place in the forest
1129 for (; sum->length >= min_length[i]; ++i) {
1130 auto& tree_at_i = trees_[i];
1131 if (tree_at_i == nullptr) continue;
1132
1133 sum = MakeConcat(tree_at_i, sum);
1134 tree_at_i = nullptr;
1135 }
1136
1137 // min_length[0] == 1, which means sum->length >= min_length[0]
1138 assert(i > 0);
1139 trees_[i - 1] = sum;
1140 }
1141
1142 // Make concat node trying to resue existing CordRepConcat nodes we
1143 // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1144 CordRep* MakeConcat(CordRep* left, CordRep* right) {
1145 if (concat_freelist_ == nullptr) return RawConcat(left, right);
1146
1147 CordRepConcat* rep = concat_freelist_;
1148 if (concat_freelist_->left == nullptr) {
1149 concat_freelist_ = nullptr;
1150 } else {
1151 concat_freelist_ = concat_freelist_->left->concat();
1152 }
1153 SetConcatChildren(rep, left, right);
1154
1155 return rep;
1156 }
1157
CheckNode(CordRep * node)1158 static void CheckNode(CordRep* node) {
1159 ABSL_INTERNAL_CHECK(node->length != 0u, "");
1160 if (node->tag == CONCAT) {
1161 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1162 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1163 ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1164 node->concat()->right->length),
1165 "");
1166 }
1167 }
1168
1169 size_t root_length_;
1170
1171 // use an inlined vector instead of a flat array to get bounds checking
1172 absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1173
1174 // List of concat nodes we can re-use for Cord balancing.
1175 CordRepConcat* concat_freelist_ = nullptr;
1176 };
1177
Rebalance(CordRep * node)1178 static CordRep* Rebalance(CordRep* node) {
1179 VerifyTree(node);
1180 assert(node->tag == CONCAT);
1181
1182 if (node->length == 0) {
1183 return nullptr;
1184 }
1185
1186 CordForest forest(node->length);
1187 forest.Build(node);
1188 return forest.ConcatNodes();
1189 }
1190
1191 // --------------------------------------------------------------------
1192 // Comparators
1193
1194 namespace {
1195
ClampResult(int memcmp_res)1196 int ClampResult(int memcmp_res) {
1197 return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1198 }
1199
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1200 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1201 size_t* size_to_compare) {
1202 size_t compared_size = std::min(lhs->size(), rhs->size());
1203 assert(*size_to_compare >= compared_size);
1204 *size_to_compare -= compared_size;
1205
1206 int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1207 if (memcmp_res != 0) return memcmp_res;
1208
1209 lhs->remove_prefix(compared_size);
1210 rhs->remove_prefix(compared_size);
1211
1212 return 0;
1213 }
1214
1215 // This overload set computes comparison results from memcmp result. This
1216 // interface is used inside GenericCompare below. Differet implementations
1217 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1218 // set. For bool we just interested in "value == 0".
1219 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1220 ResultType ComputeCompareResult(int memcmp_res) {
1221 return ClampResult(memcmp_res);
1222 }
1223 template <>
ComputeCompareResult(int memcmp_res)1224 bool ComputeCompareResult<bool>(int memcmp_res) {
1225 return memcmp_res == 0;
1226 }
1227
1228 } // namespace
1229
1230 // Helper routine. Locates the first flat chunk of the Cord without
1231 // initializing the iterator.
FindFlatStartPiece() const1232 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1233 if (!is_tree()) {
1234 return absl::string_view(data_.as_chars(), data_.inline_size());
1235 }
1236
1237 CordRep* node = tree();
1238 if (node->tag >= FLAT) {
1239 return absl::string_view(node->flat()->Data(), node->length);
1240 }
1241
1242 if (node->tag == EXTERNAL) {
1243 return absl::string_view(node->external()->base, node->length);
1244 }
1245
1246 if (node->tag == RING) {
1247 return node->ring()->entry_data(node->ring()->head());
1248 }
1249
1250 // Walk down the left branches until we hit a non-CONCAT node.
1251 while (node->tag == CONCAT) {
1252 node = node->concat()->left;
1253 }
1254
1255 // Get the child node if we encounter a SUBSTRING.
1256 size_t offset = 0;
1257 size_t length = node->length;
1258 assert(length != 0);
1259
1260 if (node->tag == SUBSTRING) {
1261 offset = node->substring()->start;
1262 node = node->substring()->child;
1263 }
1264
1265 if (node->tag >= FLAT) {
1266 return absl::string_view(node->flat()->Data() + offset, length);
1267 }
1268
1269 assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1270
1271 return absl::string_view(node->external()->base + offset, length);
1272 }
1273
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1274 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1275 size_t size_to_compare) const {
1276 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1277 if (!chunk->empty()) return true;
1278 ++*it;
1279 if (it->bytes_remaining_ == 0) return false;
1280 *chunk = **it;
1281 return true;
1282 };
1283
1284 Cord::ChunkIterator lhs_it = chunk_begin();
1285
1286 // compared_size is inside first chunk.
1287 absl::string_view lhs_chunk =
1288 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1289 assert(compared_size <= lhs_chunk.size());
1290 assert(compared_size <= rhs.size());
1291 lhs_chunk.remove_prefix(compared_size);
1292 rhs.remove_prefix(compared_size);
1293 size_to_compare -= compared_size; // skip already compared size.
1294
1295 while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1296 int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1297 if (comparison_result != 0) return comparison_result;
1298 if (size_to_compare == 0) return 0;
1299 }
1300
1301 return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1302 }
1303
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1304 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1305 size_t size_to_compare) const {
1306 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1307 if (!chunk->empty()) return true;
1308 ++*it;
1309 if (it->bytes_remaining_ == 0) return false;
1310 *chunk = **it;
1311 return true;
1312 };
1313
1314 Cord::ChunkIterator lhs_it = chunk_begin();
1315 Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1316
1317 // compared_size is inside both first chunks.
1318 absl::string_view lhs_chunk =
1319 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1320 absl::string_view rhs_chunk =
1321 (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1322 assert(compared_size <= lhs_chunk.size());
1323 assert(compared_size <= rhs_chunk.size());
1324 lhs_chunk.remove_prefix(compared_size);
1325 rhs_chunk.remove_prefix(compared_size);
1326 size_to_compare -= compared_size; // skip already compared size.
1327
1328 while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1329 int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1330 if (memcmp_res != 0) return memcmp_res;
1331 if (size_to_compare == 0) return 0;
1332 }
1333
1334 return static_cast<int>(rhs_chunk.empty()) -
1335 static_cast<int>(lhs_chunk.empty());
1336 }
1337
GetFirstChunk(const Cord & c)1338 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1339 return c.contents_.FindFlatStartPiece();
1340 }
GetFirstChunk(absl::string_view sv)1341 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1342 return sv;
1343 }
1344
1345 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1346 // that 'size_to_compare' is greater that size of smallest of first chunks.
1347 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1348 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1349 size_t size_to_compare) {
1350 absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1351 absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1352
1353 size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1354 assert(size_to_compare >= compared_size);
1355 int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1356 if (compared_size == size_to_compare || memcmp_res != 0) {
1357 return ComputeCompareResult<ResultType>(memcmp_res);
1358 }
1359
1360 return ComputeCompareResult<ResultType>(
1361 lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1362 }
1363
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1364 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1365 return GenericCompare<bool>(*this, rhs, size_to_compare);
1366 }
1367
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1368 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1369 return GenericCompare<bool>(*this, rhs, size_to_compare);
1370 }
1371
1372 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1373 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1374 size_t lhs_size = lhs.size();
1375 size_t rhs_size = rhs.size();
1376 if (lhs_size == rhs_size) {
1377 return GenericCompare<int>(lhs, rhs, lhs_size);
1378 }
1379 if (lhs_size < rhs_size) {
1380 auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1381 return data_comp_res == 0 ? -1 : data_comp_res;
1382 }
1383
1384 auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1385 return data_comp_res == 0 ? +1 : data_comp_res;
1386 }
1387
Compare(absl::string_view rhs) const1388 int Cord::Compare(absl::string_view rhs) const {
1389 return SharedCompareImpl(*this, rhs);
1390 }
1391
CompareImpl(const Cord & rhs) const1392 int Cord::CompareImpl(const Cord& rhs) const {
1393 return SharedCompareImpl(*this, rhs);
1394 }
1395
EndsWith(absl::string_view rhs) const1396 bool Cord::EndsWith(absl::string_view rhs) const {
1397 size_t my_size = size();
1398 size_t rhs_size = rhs.size();
1399
1400 if (my_size < rhs_size) return false;
1401
1402 Cord tmp(*this);
1403 tmp.RemovePrefix(my_size - rhs_size);
1404 return tmp.EqualsImpl(rhs, rhs_size);
1405 }
1406
EndsWith(const Cord & rhs) const1407 bool Cord::EndsWith(const Cord& rhs) const {
1408 size_t my_size = size();
1409 size_t rhs_size = rhs.size();
1410
1411 if (my_size < rhs_size) return false;
1412
1413 Cord tmp(*this);
1414 tmp.RemovePrefix(my_size - rhs_size);
1415 return tmp.EqualsImpl(rhs, rhs_size);
1416 }
1417
1418 // --------------------------------------------------------------------
1419 // Misc.
1420
operator std::string() const1421 Cord::operator std::string() const {
1422 std::string s;
1423 absl::CopyCordToString(*this, &s);
1424 return s;
1425 }
1426
CopyCordToString(const Cord & src,std::string * dst)1427 void CopyCordToString(const Cord& src, std::string* dst) {
1428 if (!src.contents_.is_tree()) {
1429 src.contents_.CopyTo(dst);
1430 } else {
1431 absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1432 src.CopyToArraySlowPath(&(*dst)[0]);
1433 }
1434 }
1435
CopyToArraySlowPath(char * dst) const1436 void Cord::CopyToArraySlowPath(char* dst) const {
1437 assert(contents_.is_tree());
1438 absl::string_view fragment;
1439 if (GetFlatAux(contents_.tree(), &fragment)) {
1440 memcpy(dst, fragment.data(), fragment.size());
1441 return;
1442 }
1443 for (absl::string_view chunk : Chunks()) {
1444 memcpy(dst, chunk.data(), chunk.size());
1445 dst += chunk.size();
1446 }
1447 }
1448
AdvanceStack()1449 Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
1450 auto& stack_of_right_children = stack_of_right_children_;
1451 if (stack_of_right_children.empty()) {
1452 assert(!current_chunk_.empty()); // Called on invalid iterator.
1453 // We have reached the end of the Cord.
1454 return *this;
1455 }
1456
1457 // Process the next node on the stack.
1458 CordRep* node = stack_of_right_children.back();
1459 stack_of_right_children.pop_back();
1460
1461 // Walk down the left branches until we hit a non-CONCAT node. Save the
1462 // right children to the stack for subsequent traversal.
1463 while (node->tag == CONCAT) {
1464 stack_of_right_children.push_back(node->concat()->right);
1465 node = node->concat()->left;
1466 }
1467
1468 // Get the child node if we encounter a SUBSTRING.
1469 size_t offset = 0;
1470 size_t length = node->length;
1471 if (node->tag == SUBSTRING) {
1472 offset = node->substring()->start;
1473 node = node->substring()->child;
1474 }
1475
1476 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1477 assert(length != 0);
1478 const char* data =
1479 node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1480 current_chunk_ = absl::string_view(data + offset, length);
1481 current_leaf_ = node;
1482 return *this;
1483 }
1484
AdvanceAndReadBytes(size_t n)1485 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1486 ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1487 "Attempted to iterate past `end()`");
1488 Cord subcord;
1489 auto constexpr method = CordzUpdateTracker::kCordReader;
1490
1491 if (n <= InlineRep::kMaxInline) {
1492 // Range to read fits in inline data. Flatten it.
1493 char* data = subcord.contents_.set_data(n);
1494 while (n > current_chunk_.size()) {
1495 memcpy(data, current_chunk_.data(), current_chunk_.size());
1496 data += current_chunk_.size();
1497 n -= current_chunk_.size();
1498 ++*this;
1499 }
1500 memcpy(data, current_chunk_.data(), n);
1501 if (n < current_chunk_.size()) {
1502 RemoveChunkPrefix(n);
1503 } else if (n > 0) {
1504 ++*this;
1505 }
1506 return subcord;
1507 }
1508
1509 if (ring_reader_) {
1510 size_t chunk_size = current_chunk_.size();
1511 if (n <= chunk_size && n <= kMaxBytesToCopy) {
1512 subcord = Cord(current_chunk_.substr(0, n), method);
1513 } else {
1514 auto* ring = CordRep::Ref(ring_reader_.ring())->ring();
1515 size_t offset = ring_reader_.length() - bytes_remaining_;
1516 CordRep* rep = CordRepRing::SubRing(ring, offset, n);
1517 subcord.contents_.EmplaceTree(rep, method);
1518 }
1519 if (n < chunk_size) {
1520 bytes_remaining_ -= n;
1521 current_chunk_.remove_prefix(n);
1522 } else {
1523 AdvanceBytesRing(n);
1524 }
1525 return subcord;
1526 }
1527
1528 auto& stack_of_right_children = stack_of_right_children_;
1529 if (n < current_chunk_.size()) {
1530 // Range to read is a proper subrange of the current chunk.
1531 assert(current_leaf_ != nullptr);
1532 CordRep* subnode = CordRep::Ref(current_leaf_);
1533 const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
1534 : subnode->flat()->Data();
1535 subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1536 subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1537 RemoveChunkPrefix(n);
1538 return subcord;
1539 }
1540
1541 // Range to read begins with a proper subrange of the current chunk.
1542 assert(!current_chunk_.empty());
1543 assert(current_leaf_ != nullptr);
1544 CordRep* subnode = CordRep::Ref(current_leaf_);
1545 if (current_chunk_.size() < subnode->length) {
1546 const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
1547 : subnode->flat()->Data();
1548 subnode = NewSubstring(subnode, current_chunk_.data() - data,
1549 current_chunk_.size());
1550 }
1551 n -= current_chunk_.size();
1552 bytes_remaining_ -= current_chunk_.size();
1553
1554 // Process the next node(s) on the stack, reading whole subtrees depending on
1555 // their length and how many bytes we are advancing.
1556 CordRep* node = nullptr;
1557 while (!stack_of_right_children.empty()) {
1558 node = stack_of_right_children.back();
1559 stack_of_right_children.pop_back();
1560 if (node->length > n) break;
1561 // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1562 // Avoiding that would need pretty complicated logic (instead of
1563 // current_leaf, keep current_subtree_ which points to the highest node
1564 // such that the current leaf can be found on the path of left children
1565 // starting from current_subtree_; delay creating subnode while node is
1566 // below current_subtree_; find the proper node along the path of left
1567 // children starting from current_subtree_ if this loop exits while staying
1568 // below current_subtree_; etc.; alternatively, push parents instead of
1569 // right children on the stack).
1570 subnode = Concat(subnode, CordRep::Ref(node));
1571 n -= node->length;
1572 bytes_remaining_ -= node->length;
1573 node = nullptr;
1574 }
1575
1576 if (node == nullptr) {
1577 // We have reached the end of the Cord.
1578 assert(bytes_remaining_ == 0);
1579 subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1580 return subcord;
1581 }
1582
1583 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1584 // right children to the stack for subsequent traversal.
1585 while (node->tag == CONCAT) {
1586 if (node->concat()->left->length > n) {
1587 // Push right, descend left.
1588 stack_of_right_children.push_back(node->concat()->right);
1589 node = node->concat()->left;
1590 } else {
1591 // Read left, descend right.
1592 subnode = Concat(subnode, CordRep::Ref(node->concat()->left));
1593 n -= node->concat()->left->length;
1594 bytes_remaining_ -= node->concat()->left->length;
1595 node = node->concat()->right;
1596 }
1597 }
1598
1599 // Get the child node if we encounter a SUBSTRING.
1600 size_t offset = 0;
1601 size_t length = node->length;
1602 if (node->tag == SUBSTRING) {
1603 offset = node->substring()->start;
1604 node = node->substring()->child;
1605 }
1606
1607 // Range to read ends with a proper (possibly empty) subrange of the current
1608 // chunk.
1609 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1610 assert(length > n);
1611 if (n > 0) {
1612 subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
1613 }
1614 const char* data =
1615 node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1616 current_chunk_ = absl::string_view(data + offset + n, length - n);
1617 current_leaf_ = node;
1618 bytes_remaining_ -= n;
1619 subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1620 return subcord;
1621 }
1622
AdvanceBytesSlowPath(size_t n)1623 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1624 assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1625 assert(n >= current_chunk_.size()); // This should only be called when
1626 // iterating to a new node.
1627
1628 n -= current_chunk_.size();
1629 bytes_remaining_ -= current_chunk_.size();
1630
1631 if (stack_of_right_children_.empty()) {
1632 // We have reached the end of the Cord.
1633 assert(bytes_remaining_ == 0);
1634 return;
1635 }
1636
1637 // Process the next node(s) on the stack, skipping whole subtrees depending on
1638 // their length and how many bytes we are advancing.
1639 CordRep* node = nullptr;
1640 auto& stack_of_right_children = stack_of_right_children_;
1641 while (!stack_of_right_children.empty()) {
1642 node = stack_of_right_children.back();
1643 stack_of_right_children.pop_back();
1644 if (node->length > n) break;
1645 n -= node->length;
1646 bytes_remaining_ -= node->length;
1647 node = nullptr;
1648 }
1649
1650 if (node == nullptr) {
1651 // We have reached the end of the Cord.
1652 assert(bytes_remaining_ == 0);
1653 return;
1654 }
1655
1656 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1657 // right children to the stack for subsequent traversal.
1658 while (node->tag == CONCAT) {
1659 if (node->concat()->left->length > n) {
1660 // Push right, descend left.
1661 stack_of_right_children.push_back(node->concat()->right);
1662 node = node->concat()->left;
1663 } else {
1664 // Skip left, descend right.
1665 n -= node->concat()->left->length;
1666 bytes_remaining_ -= node->concat()->left->length;
1667 node = node->concat()->right;
1668 }
1669 }
1670
1671 // Get the child node if we encounter a SUBSTRING.
1672 size_t offset = 0;
1673 size_t length = node->length;
1674 if (node->tag == SUBSTRING) {
1675 offset = node->substring()->start;
1676 node = node->substring()->child;
1677 }
1678
1679 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1680 assert(length > n);
1681 const char* data =
1682 node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1683 current_chunk_ = absl::string_view(data + offset + n, length - n);
1684 current_leaf_ = node;
1685 bytes_remaining_ -= n;
1686 }
1687
operator [](size_t i) const1688 char Cord::operator[](size_t i) const {
1689 ABSL_HARDENING_ASSERT(i < size());
1690 size_t offset = i;
1691 const CordRep* rep = contents_.tree();
1692 if (rep == nullptr) {
1693 return contents_.data()[i];
1694 }
1695 while (true) {
1696 assert(rep != nullptr);
1697 assert(offset < rep->length);
1698 if (rep->tag >= FLAT) {
1699 // Get the "i"th character directly from the flat array.
1700 return rep->flat()->Data()[offset];
1701 } else if (rep->tag == RING) {
1702 return rep->ring()->GetCharacter(offset);
1703 } else if (rep->tag == EXTERNAL) {
1704 // Get the "i"th character from the external array.
1705 return rep->external()->base[offset];
1706 } else if (rep->tag == CONCAT) {
1707 // Recursively branch to the side of the concatenation that the "i"th
1708 // character is on.
1709 size_t left_length = rep->concat()->left->length;
1710 if (offset < left_length) {
1711 rep = rep->concat()->left;
1712 } else {
1713 offset -= left_length;
1714 rep = rep->concat()->right;
1715 }
1716 } else {
1717 // This must be a substring a node, so bypass it to get to the child.
1718 assert(rep->tag == SUBSTRING);
1719 offset += rep->substring()->start;
1720 rep = rep->substring()->child;
1721 }
1722 }
1723 }
1724
FlattenSlowPath()1725 absl::string_view Cord::FlattenSlowPath() {
1726 assert(contents_.is_tree());
1727 size_t total_size = size();
1728 CordRep* new_rep;
1729 char* new_buffer;
1730
1731 // Try to put the contents into a new flat rep. If they won't fit in the
1732 // biggest possible flat node, use an external rep instead.
1733 if (total_size <= kMaxFlatLength) {
1734 new_rep = CordRepFlat::New(total_size);
1735 new_rep->length = total_size;
1736 new_buffer = new_rep->flat()->Data();
1737 CopyToArraySlowPath(new_buffer);
1738 } else {
1739 new_buffer = std::allocator<char>().allocate(total_size);
1740 CopyToArraySlowPath(new_buffer);
1741 new_rep = absl::cord_internal::NewExternalRep(
1742 absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1743 std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1744 s.size());
1745 });
1746 }
1747 CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten);
1748 CordRep::Unref(contents_.as_tree());
1749 contents_.SetTree(new_rep, scope);
1750 return absl::string_view(new_buffer, total_size);
1751 }
1752
GetFlatAux(CordRep * rep,absl::string_view * fragment)1753 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1754 assert(rep != nullptr);
1755 if (rep->tag >= FLAT) {
1756 *fragment = absl::string_view(rep->flat()->Data(), rep->length);
1757 return true;
1758 } else if (rep->tag == EXTERNAL) {
1759 *fragment = absl::string_view(rep->external()->base, rep->length);
1760 return true;
1761 } else if (rep->tag == RING) {
1762 return rep->ring()->IsFlat(fragment);
1763 } else if (rep->tag == SUBSTRING) {
1764 CordRep* child = rep->substring()->child;
1765 if (child->tag >= FLAT) {
1766 *fragment = absl::string_view(
1767 child->flat()->Data() + rep->substring()->start, rep->length);
1768 return true;
1769 } else if (child->tag == EXTERNAL) {
1770 *fragment = absl::string_view(
1771 child->external()->base + rep->substring()->start, rep->length);
1772 return true;
1773 } else if (child->tag == RING) {
1774 return child->ring()->IsFlat(rep->substring()->start, rep->length,
1775 fragment);
1776 }
1777 }
1778 return false;
1779 }
1780
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1781 /* static */ void Cord::ForEachChunkAux(
1782 absl::cord_internal::CordRep* rep,
1783 absl::FunctionRef<void(absl::string_view)> callback) {
1784 if (rep->tag == RING) {
1785 ChunkIterator it(rep), end;
1786 while (it != end) {
1787 callback(*it);
1788 ++it;
1789 }
1790 return;
1791 }
1792
1793 assert(rep != nullptr);
1794 int stack_pos = 0;
1795 constexpr int stack_max = 128;
1796 // Stack of right branches for tree traversal
1797 absl::cord_internal::CordRep* stack[stack_max];
1798 absl::cord_internal::CordRep* current_node = rep;
1799 while (true) {
1800 if (current_node->tag == CONCAT) {
1801 if (stack_pos == stack_max) {
1802 // There's no more room on our stack array to add another right branch,
1803 // and the idea is to avoid allocations, so call this function
1804 // recursively to navigate this subtree further. (This is not something
1805 // we expect to happen in practice).
1806 ForEachChunkAux(current_node, callback);
1807
1808 // Pop the next right branch and iterate.
1809 current_node = stack[--stack_pos];
1810 continue;
1811 } else {
1812 // Save the right branch for later traversal and continue down the left
1813 // branch.
1814 stack[stack_pos++] = current_node->concat()->right;
1815 current_node = current_node->concat()->left;
1816 continue;
1817 }
1818 }
1819 // This is a leaf node, so invoke our callback.
1820 absl::string_view chunk;
1821 bool success = GetFlatAux(current_node, &chunk);
1822 assert(success);
1823 if (success) {
1824 callback(chunk);
1825 }
1826 if (stack_pos == 0) {
1827 // end of traversal
1828 return;
1829 }
1830 current_node = stack[--stack_pos];
1831 }
1832 }
1833
DumpNode(CordRep * rep,bool include_data,std::ostream * os,int indent)1834 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
1835 int indent) {
1836 const int kIndentStep = 1;
1837 absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1838 absl::InlinedVector<int, kInlinedVectorSize> indents;
1839 for (;;) {
1840 *os << std::setw(3) << rep->refcount.Get();
1841 *os << " " << std::setw(7) << rep->length;
1842 *os << " [";
1843 if (include_data) *os << static_cast<void*>(rep);
1844 *os << "]";
1845 *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1846 *os << " " << std::setw(indent) << "";
1847 if (rep->tag == CONCAT) {
1848 *os << "CONCAT depth=" << Depth(rep) << "\n";
1849 indent += kIndentStep;
1850 indents.push_back(indent);
1851 stack.push_back(rep->concat()->right);
1852 rep = rep->concat()->left;
1853 } else if (rep->tag == SUBSTRING) {
1854 *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1855 indent += kIndentStep;
1856 rep = rep->substring()->child;
1857 } else { // Leaf or ring
1858 if (rep->tag == EXTERNAL) {
1859 *os << "EXTERNAL [";
1860 if (include_data)
1861 *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1862 *os << "]\n";
1863 } else if (rep->tag >= FLAT) {
1864 *os << "FLAT cap=" << rep->flat()->Capacity() << " [";
1865 if (include_data)
1866 *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length));
1867 *os << "]\n";
1868 } else {
1869 assert(rep->tag == RING);
1870 auto* ring = rep->ring();
1871 *os << "RING, entries = " << ring->entries() << "\n";
1872 CordRepRing::index_type head = ring->head();
1873 do {
1874 DumpNode(ring->entry_child(head), include_data, os,
1875 indent + kIndentStep);
1876 head = ring->advance(head);
1877 } while (head != ring->tail());
1878 }
1879 if (stack.empty()) break;
1880 rep = stack.back();
1881 stack.pop_back();
1882 indent = indents.back();
1883 indents.pop_back();
1884 }
1885 }
1886 ABSL_INTERNAL_CHECK(indents.empty(), "");
1887 }
1888
ReportError(CordRep * root,CordRep * node)1889 static std::string ReportError(CordRep* root, CordRep* node) {
1890 std::ostringstream buf;
1891 buf << "Error at node " << node << " in:";
1892 DumpNode(root, true, &buf);
1893 return buf.str();
1894 }
1895
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1896 static bool VerifyNode(CordRep* root, CordRep* start_node,
1897 bool full_validation) {
1898 absl::InlinedVector<CordRep*, 2> worklist;
1899 worklist.push_back(start_node);
1900 do {
1901 CordRep* node = worklist.back();
1902 worklist.pop_back();
1903
1904 ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1905 if (node != root) {
1906 ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1907 }
1908
1909 if (node->tag == CONCAT) {
1910 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1911 ReportError(root, node));
1912 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1913 ReportError(root, node));
1914 ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1915 node->concat()->right->length),
1916 ReportError(root, node));
1917 if (full_validation) {
1918 worklist.push_back(node->concat()->right);
1919 worklist.push_back(node->concat()->left);
1920 }
1921 } else if (node->tag >= FLAT) {
1922 ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(),
1923 ReportError(root, node));
1924 } else if (node->tag == EXTERNAL) {
1925 ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1926 ReportError(root, node));
1927 } else if (node->tag == SUBSTRING) {
1928 ABSL_INTERNAL_CHECK(
1929 node->substring()->start < node->substring()->child->length,
1930 ReportError(root, node));
1931 ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1932 node->substring()->child->length,
1933 ReportError(root, node));
1934 }
1935 } while (!worklist.empty());
1936 return true;
1937 }
1938
1939 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1940 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1941 size_t total_mem_usage = 0;
1942
1943 // Allow a quick exit for the common case that the root is a leaf.
1944 if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1945 return total_mem_usage;
1946 }
1947
1948 // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1949 // never be appended to tree_stack. This reduces overhead from manipulating
1950 // tree_stack.
1951 absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1952 const CordRep* cur_node = rep;
1953 while (true) {
1954 const CordRep* next_node = nullptr;
1955
1956 if (cur_node->tag == CONCAT) {
1957 total_mem_usage += sizeof(CordRepConcat);
1958 const CordRep* left = cur_node->concat()->left;
1959 if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1960 next_node = left;
1961 }
1962
1963 const CordRep* right = cur_node->concat()->right;
1964 if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1965 if (next_node) {
1966 tree_stack.push_back(next_node);
1967 }
1968 next_node = right;
1969 }
1970 } else if (cur_node->tag == RING) {
1971 total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity());
1972 const CordRepRing* ring = cur_node->ring();
1973 CordRepRing::index_type pos = ring->head(), tail = ring->tail();
1974 do {
1975 CordRep* node = ring->entry_child(pos);
1976 assert(node->tag >= FLAT || node->tag == EXTERNAL);
1977 RepMemoryUsageLeaf(node, &total_mem_usage);
1978 } while ((pos = ring->advance(pos)) != tail);
1979 } else {
1980 // Since cur_node is not a leaf or a concat node it must be a substring.
1981 assert(cur_node->tag == SUBSTRING);
1982 total_mem_usage += sizeof(CordRepSubstring);
1983 next_node = cur_node->substring()->child;
1984 if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1985 next_node = nullptr;
1986 }
1987 }
1988
1989 if (!next_node) {
1990 if (tree_stack.empty()) {
1991 return total_mem_usage;
1992 }
1993 next_node = tree_stack.back();
1994 tree_stack.pop_back();
1995 }
1996 cur_node = next_node;
1997 }
1998 }
1999
operator <<(std::ostream & out,const Cord & cord)2000 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
2001 for (absl::string_view chunk : cord.Chunks()) {
2002 out.write(chunk.data(), chunk.size());
2003 }
2004 return out;
2005 }
2006
2007 namespace strings_internal {
FlatOverhead()2008 size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; }
MaxFlatLength()2009 size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; }
FlatTagToLength(uint8_t tag)2010 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
2011 return cord_internal::TagToLength(tag);
2012 }
LengthToTag(size_t s)2013 uint8_t CordTestAccess::LengthToTag(size_t s) {
2014 ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
2015 return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead);
2016 }
SizeofCordRepConcat()2017 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()2018 size_t CordTestAccess::SizeofCordRepExternal() {
2019 return sizeof(CordRepExternal);
2020 }
SizeofCordRepSubstring()2021 size_t CordTestAccess::SizeofCordRepSubstring() {
2022 return sizeof(CordRepSubstring);
2023 }
2024 } // namespace strings_internal
2025 ABSL_NAMESPACE_END
2026 } // namespace absl
2027