• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/cord.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/cord_rep_flat.h"
40 #include "absl/strings/internal/cord_rep_ring.h"
41 #include "absl/strings/internal/cordz_statistics.h"
42 #include "absl/strings/internal/cordz_update_scope.h"
43 #include "absl/strings/internal/cordz_update_tracker.h"
44 #include "absl/strings/internal/resize_uninitialized.h"
45 #include "absl/strings/str_cat.h"
46 #include "absl/strings/str_format.h"
47 #include "absl/strings/str_join.h"
48 #include "absl/strings/string_view.h"
49 
50 namespace absl {
51 ABSL_NAMESPACE_BEGIN
52 
53 using ::absl::cord_internal::CordRep;
54 using ::absl::cord_internal::CordRepConcat;
55 using ::absl::cord_internal::CordRepExternal;
56 using ::absl::cord_internal::CordRepFlat;
57 using ::absl::cord_internal::CordRepRing;
58 using ::absl::cord_internal::CordRepSubstring;
59 using ::absl::cord_internal::CordzUpdateTracker;
60 using ::absl::cord_internal::InlineData;
61 using ::absl::cord_internal::kMaxFlatLength;
62 using ::absl::cord_internal::kMinFlatLength;
63 
64 using ::absl::cord_internal::CONCAT;
65 using ::absl::cord_internal::EXTERNAL;
66 using ::absl::cord_internal::FLAT;
67 using ::absl::cord_internal::RING;
68 using ::absl::cord_internal::SUBSTRING;
69 
70 using ::absl::cord_internal::kInlinedVectorSize;
71 using ::absl::cord_internal::kMaxBytesToCopy;
72 
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)73 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
74   return n == 0 ? a : Fibonacci(n - 1, b, a + b);
75 }
76 
77 static_assert(Fibonacci(63) == 6557470319842,
78               "Fibonacci values computed incorrectly");
79 
80 // Minimum length required for a given depth tree -- a tree is considered
81 // balanced if
82 //      length(t) >= min_length[depth(t)]
83 // The root node depth is allowed to become twice as large to reduce rebalancing
84 // for larger strings (see IsRootBalanced).
85 static constexpr uint64_t min_length[] = {
86     Fibonacci(2),          Fibonacci(3),  Fibonacci(4),  Fibonacci(5),
87     Fibonacci(6),          Fibonacci(7),  Fibonacci(8),  Fibonacci(9),
88     Fibonacci(10),         Fibonacci(11), Fibonacci(12), Fibonacci(13),
89     Fibonacci(14),         Fibonacci(15), Fibonacci(16), Fibonacci(17),
90     Fibonacci(18),         Fibonacci(19), Fibonacci(20), Fibonacci(21),
91     Fibonacci(22),         Fibonacci(23), Fibonacci(24), Fibonacci(25),
92     Fibonacci(26),         Fibonacci(27), Fibonacci(28), Fibonacci(29),
93     Fibonacci(30),         Fibonacci(31), Fibonacci(32), Fibonacci(33),
94     Fibonacci(34),         Fibonacci(35), Fibonacci(36), Fibonacci(37),
95     Fibonacci(38),         Fibonacci(39), Fibonacci(40), Fibonacci(41),
96     Fibonacci(42),         Fibonacci(43), Fibonacci(44), Fibonacci(45),
97     Fibonacci(46),         Fibonacci(47),
98     0xffffffffffffffffull,  // Avoid overflow
99 };
100 
101 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
102 
cord_ring_enabled()103 static inline bool cord_ring_enabled() {
104   return cord_internal::cord_ring_buffer_enabled.load(
105       std::memory_order_relaxed);
106 }
107 
IsRootBalanced(CordRep * node)108 static inline bool IsRootBalanced(CordRep* node) {
109   if (node->tag != CONCAT) {
110     return true;
111   } else if (node->concat()->depth() <= 15) {
112     return true;
113   } else if (node->concat()->depth() > kMinLengthSize) {
114     return false;
115   } else {
116     // Allow depth to become twice as large as implied by fibonacci rule to
117     // reduce rebalancing for larger strings.
118     return (node->length >= min_length[node->concat()->depth() / 2]);
119   }
120 }
121 
122 static CordRep* Rebalance(CordRep* node);
123 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
124                      int indent = 0);
125 static bool VerifyNode(CordRep* root, CordRep* start_node,
126                        bool full_validation);
127 
VerifyTree(CordRep * node)128 static inline CordRep* VerifyTree(CordRep* node) {
129   // Verification is expensive, so only do it in debug mode.
130   // Even in debug mode we normally do only light validation.
131   // If you are debugging Cord itself, you should define the
132   // macro EXTRA_CORD_VALIDATION, e.g. by adding
133   // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
134 #ifdef EXTRA_CORD_VALIDATION
135   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
136 #else   // EXTRA_CORD_VALIDATION
137   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
138 #endif  // EXTRA_CORD_VALIDATION
139   static_cast<void>(&VerifyNode);
140 
141   return node;
142 }
143 
144 // Return the depth of a node
Depth(const CordRep * rep)145 static int Depth(const CordRep* rep) {
146   if (rep->tag == CONCAT) {
147     return rep->concat()->depth();
148   } else {
149     return 0;
150   }
151 }
152 
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)153 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
154                               CordRep* right) {
155   concat->left = left;
156   concat->right = right;
157 
158   concat->length = left->length + right->length;
159   concat->set_depth(1 + std::max(Depth(left), Depth(right)));
160 }
161 
162 // Create a concatenation of the specified nodes.
163 // Does not change the refcounts of "left" and "right".
164 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)165 static CordRep* RawConcat(CordRep* left, CordRep* right) {
166   // Avoid making degenerate concat nodes (one child is empty)
167   if (left == nullptr) return right;
168   if (right == nullptr) return left;
169   if (left->length == 0) {
170     CordRep::Unref(left);
171     return right;
172   }
173   if (right->length == 0) {
174     CordRep::Unref(right);
175     return left;
176   }
177 
178   CordRepConcat* rep = new CordRepConcat();
179   rep->tag = CONCAT;
180   SetConcatChildren(rep, left, right);
181 
182   return rep;
183 }
184 
Concat(CordRep * left,CordRep * right)185 static CordRep* Concat(CordRep* left, CordRep* right) {
186   CordRep* rep = RawConcat(left, right);
187   if (rep != nullptr && !IsRootBalanced(rep)) {
188     rep = Rebalance(rep);
189   }
190   return VerifyTree(rep);
191 }
192 
193 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)194 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
195   // Make repeated passes over the array, merging adjacent pairs
196   // until we are left with just a single node.
197   while (n > 1) {
198     size_t dst = 0;
199     for (size_t src = 0; src < n; src += 2) {
200       if (src + 1 < n) {
201         reps[dst] = Concat(reps[src], reps[src + 1]);
202       } else {
203         reps[dst] = reps[src];
204       }
205       dst++;
206     }
207     n = dst;
208   }
209 
210   return reps[0];
211 }
212 
CreateFlat(const char * data,size_t length,size_t alloc_hint)213 static CordRepFlat* CreateFlat(const char* data, size_t length,
214                                size_t alloc_hint) {
215   CordRepFlat* flat = CordRepFlat::New(length + alloc_hint);
216   flat->length = length;
217   memcpy(flat->Data(), data, length);
218   return flat;
219 }
220 
221 // Creates a new flat or ringbuffer out of the specified array.
222 // The returned node has a refcount of 1.
RingNewTree(const char * data,size_t length,size_t alloc_hint)223 static CordRep* RingNewTree(const char* data, size_t length,
224                             size_t alloc_hint) {
225   if (length <= kMaxFlatLength) {
226     return CreateFlat(data, length, alloc_hint);
227   }
228   CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0);
229   data += kMaxFlatLength;
230   length -= kMaxFlatLength;
231   size_t extra = (length - 1) / kMaxFlatLength + 1;
232   auto* root = CordRepRing::Create(flat, extra);
233   return CordRepRing::Append(root, {data, length}, alloc_hint);
234 }
235 
236 // Create a new tree out of the specified array.
237 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)238 static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) {
239   if (length == 0) return nullptr;
240   if (cord_ring_enabled()) {
241     return RingNewTree(data, length, alloc_hint);
242   }
243   absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
244   size_t n = 0;
245   do {
246     const size_t len = std::min(length, kMaxFlatLength);
247     CordRepFlat* rep = CordRepFlat::New(len + alloc_hint);
248     rep->length = len;
249     memcpy(rep->Data(), data, len);
250     reps[n++] = VerifyTree(rep);
251     data += len;
252     length -= len;
253   } while (length != 0);
254   return MakeBalancedTree(reps.data(), n);
255 }
256 
257 namespace cord_internal {
258 
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)259 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
260   assert(!data.empty());
261   rep->length = data.size();
262   rep->tag = EXTERNAL;
263   rep->base = data.data();
264   VerifyTree(rep);
265 }
266 
267 }  // namespace cord_internal
268 
NewSubstring(CordRep * child,size_t offset,size_t length)269 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
270   // Never create empty substring nodes
271   if (length == 0) {
272     CordRep::Unref(child);
273     return nullptr;
274   } else {
275     CordRepSubstring* rep = new CordRepSubstring();
276     assert((offset + length) <= child->length);
277     rep->length = length;
278     rep->tag = SUBSTRING;
279     rep->start = offset;
280     rep->child = child;
281     return VerifyTree(rep);
282   }
283 }
284 
285 // Creates a CordRep from the provided string. If the string is large enough,
286 // and not wasteful, we move the string into an external cord rep, preserving
287 // the already allocated string contents.
288 // Requires the provided string length to be larger than `kMaxInline`.
CordRepFromString(std::string && src)289 static CordRep* CordRepFromString(std::string&& src) {
290   assert(src.length() > cord_internal::kMaxInline);
291   if (
292       // String is short: copy data to avoid external block overhead.
293       src.size() <= kMaxBytesToCopy ||
294       // String is wasteful: copy data to avoid pinning too much unused memory.
295       src.size() < src.capacity() / 2
296   ) {
297     return NewTree(src.data(), src.size(), 0);
298   }
299 
300   struct StringReleaser {
301     void operator()(absl::string_view /* data */) {}
302     std::string data;
303   };
304   const absl::string_view original_data = src;
305   auto* rep =
306       static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
307           absl::cord_internal::NewExternalRep(original_data,
308                                               StringReleaser{std::move(src)}));
309   // Moving src may have invalidated its data pointer, so adjust it.
310   rep->base = rep->template get<0>().data.data();
311   return rep;
312 }
313 
314 // --------------------------------------------------------------------
315 // Cord::InlineRep functions
316 
317 constexpr unsigned char Cord::InlineRep::kMaxInline;
318 
set_data(const char * data,size_t n,bool nullify_tail)319 inline void Cord::InlineRep::set_data(const char* data, size_t n,
320                                       bool nullify_tail) {
321   static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
322 
323   cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail);
324   set_inline_size(n);
325 }
326 
set_data(size_t n)327 inline char* Cord::InlineRep::set_data(size_t n) {
328   assert(n <= kMaxInline);
329   ResetToEmpty();
330   set_inline_size(n);
331   return data_.as_chars();
332 }
333 
reduce_size(size_t n)334 inline void Cord::InlineRep::reduce_size(size_t n) {
335   size_t tag = inline_size();
336   assert(tag <= kMaxInline);
337   assert(tag >= n);
338   tag -= n;
339   memset(data_.as_chars() + tag, 0, n);
340   set_inline_size(static_cast<char>(tag));
341 }
342 
remove_prefix(size_t n)343 inline void Cord::InlineRep::remove_prefix(size_t n) {
344   cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n,
345                               inline_size() - n);
346   reduce_size(n);
347 }
348 
349 // Returns `rep` converted into a CordRepRing.
350 // Directly returns `rep` if `rep` is already a CordRepRing.
ForceRing(CordRep * rep,size_t extra)351 static CordRepRing* ForceRing(CordRep* rep, size_t extra) {
352   return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra);
353 }
354 
AppendTreeToInlined(CordRep * tree,MethodIdentifier method)355 void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
356                                           MethodIdentifier method) {
357   assert(!is_tree());
358   if (!data_.is_empty()) {
359     CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
360     if (cord_ring_enabled()) {
361       tree = CordRepRing::Append(CordRepRing::Create(flat, 1), tree);
362     } else {
363       tree = Concat(flat, tree);
364     }
365   }
366   EmplaceTree(tree, method);
367 }
368 
AppendTreeToTree(CordRep * tree,MethodIdentifier method)369 void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) {
370   assert(is_tree());
371   const CordzUpdateScope scope(data_.cordz_info(), method);
372   if (cord_ring_enabled()) {
373     tree = CordRepRing::Append(ForceRing(data_.as_tree(), 1), tree);
374   } else {
375     tree = Concat(data_.as_tree(), tree);
376   }
377   SetTree(tree, scope);
378 }
379 
AppendTree(CordRep * tree,MethodIdentifier method)380 void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) {
381   if (tree == nullptr) return;
382   if (data_.is_tree()) {
383     AppendTreeToTree(tree, method);
384   } else {
385     AppendTreeToInlined(tree, method);
386   }
387 }
388 
PrependTreeToInlined(CordRep * tree,MethodIdentifier method)389 void Cord::InlineRep::PrependTreeToInlined(CordRep* tree,
390                                            MethodIdentifier method) {
391   assert(!is_tree());
392   if (!data_.is_empty()) {
393     CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
394     if (cord_ring_enabled()) {
395       tree = CordRepRing::Prepend(CordRepRing::Create(flat, 1), tree);
396     } else {
397       tree = Concat(tree, flat);
398     }
399   }
400   EmplaceTree(tree, method);
401 }
402 
PrependTreeToTree(CordRep * tree,MethodIdentifier method)403 void Cord::InlineRep::PrependTreeToTree(CordRep* tree,
404                                         MethodIdentifier method) {
405   assert(is_tree());
406   const CordzUpdateScope scope(data_.cordz_info(), method);
407   if (cord_ring_enabled()) {
408     tree = CordRepRing::Prepend(ForceRing(data_.as_tree(), 1), tree);
409   } else {
410     tree = Concat(tree, data_.as_tree());
411   }
412   SetTree(tree, scope);
413 }
414 
PrependTree(CordRep * tree,MethodIdentifier method)415 void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) {
416   assert(tree != nullptr);
417   if (data_.is_tree()) {
418     PrependTreeToTree(tree, method);
419   } else {
420     PrependTreeToInlined(tree, method);
421   }
422 }
423 
424 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
425 // suitable leaf is found, the function will update the length field for all
426 // nodes to account for the size increase. The append region address will be
427 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)428 static inline bool PrepareAppendRegion(CordRep* root, char** region,
429                                        size_t* size, size_t max_length) {
430   if (root->tag == RING && root->refcount.IsOne()) {
431     Span<char> span = root->ring()->GetAppendBuffer(max_length);
432     if (!span.empty()) {
433       *region = span.data();
434       *size = span.size();
435       return true;
436     }
437   }
438 
439   // Search down the right-hand path for a non-full FLAT node.
440   CordRep* dst = root;
441   while (dst->tag == CONCAT && dst->refcount.IsOne()) {
442     dst = dst->concat()->right;
443   }
444 
445   if (dst->tag < FLAT || !dst->refcount.IsOne()) {
446     *region = nullptr;
447     *size = 0;
448     return false;
449   }
450 
451   const size_t in_use = dst->length;
452   const size_t capacity = dst->flat()->Capacity();
453   if (in_use == capacity) {
454     *region = nullptr;
455     *size = 0;
456     return false;
457   }
458 
459   size_t size_increase = std::min(capacity - in_use, max_length);
460 
461   // We need to update the length fields for all nodes, including the leaf node.
462   for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
463     rep->length += size_increase;
464   }
465   dst->length += size_increase;
466 
467   *region = dst->flat()->Data() + in_use;
468   *size = size_increase;
469   return true;
470 }
471 
472 template <bool has_length>
GetAppendRegion(char ** region,size_t * size,size_t length)473 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
474                                       size_t length) {
475   auto constexpr method = CordzUpdateTracker::kGetAppendRegion;
476 
477   CordRep* root = tree();
478   size_t sz = root ? root->length : inline_size();
479   if (root == nullptr) {
480     size_t available = kMaxInline - sz;
481     if (available >= (has_length ? length : 1)) {
482       *region = data_.as_chars() + sz;
483       *size = has_length ? length : available;
484       set_inline_size(has_length ? sz + length : kMaxInline);
485       return;
486     }
487   }
488 
489   size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength);
490   CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra);
491   CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method);
492   if (PrepareAppendRegion(rep, region, size, length)) {
493     CommitTree(root, rep, scope, method);
494     return;
495   }
496 
497   // Allocate new node.
498   CordRepFlat* new_node = CordRepFlat::New(extra);
499   new_node->length = std::min(new_node->Capacity(), length);
500   *region = new_node->Data();
501   *size = new_node->length;
502 
503   if (cord_ring_enabled()) {
504     rep = CordRepRing::Append(ForceRing(rep, 1), new_node);
505   } else {
506     rep = Concat(rep, new_node);
507   }
508   CommitTree(root, rep, scope, method);
509 }
510 
511 // If the rep is a leaf, this will increment the value at total_mem_usage and
512 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)513 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
514   if (rep->tag >= FLAT) {
515     *total_mem_usage += rep->flat()->AllocatedSize();
516     return true;
517   }
518   if (rep->tag == EXTERNAL) {
519     // We don't know anything about the embedded / bound data, but we can safely
520     // assume it is 'at least' a word / pointer to data. In the future we may
521     // choose to use the 'data' byte as a tag to identify the types of some
522     // well-known externals, such as a std::string instance.
523     *total_mem_usage +=
524         sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length;
525     return true;
526   }
527   return false;
528 }
529 
AssignSlow(const Cord::InlineRep & src)530 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
531   assert(&src != this);
532   assert(is_tree() || src.is_tree());
533   auto constexpr method = CordzUpdateTracker::kAssignCord;
534   if (ABSL_PREDICT_TRUE(!is_tree())) {
535     EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method);
536     return;
537   }
538 
539   CordRep* tree = as_tree();
540   if (CordRep* src_tree = src.tree()) {
541     // Leave any existing `cordz_info` in place, and let MaybeTrackCord()
542     // decide if this cord should be (or remains to be) sampled or not.
543     data_.set_tree(CordRep::Ref(src_tree));
544     CordzInfo::MaybeTrackCord(data_, src.data_, method);
545   } else {
546     CordzInfo::MaybeUntrackCord(data_.cordz_info());
547     data_ = src.data_;
548   }
549   CordRep::Unref(tree);
550 }
551 
UnrefTree()552 void Cord::InlineRep::UnrefTree() {
553   if (is_tree()) {
554     CordzInfo::MaybeUntrackCord(data_.cordz_info());
555     CordRep::Unref(tree());
556   }
557 }
558 
559 // --------------------------------------------------------------------
560 // Constructors and destructors
561 
Cord(absl::string_view src,MethodIdentifier method)562 Cord::Cord(absl::string_view src, MethodIdentifier method)
563     : contents_(InlineData::kDefaultInit) {
564   const size_t n = src.size();
565   if (n <= InlineRep::kMaxInline) {
566     contents_.set_data(src.data(), n, true);
567   } else {
568     CordRep* rep = NewTree(src.data(), n, 0);
569     contents_.EmplaceTree(rep, method);
570   }
571 }
572 
573 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)574 Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) {
575   if (src.size() <= InlineRep::kMaxInline) {
576     contents_.set_data(src.data(), src.size(), true);
577   } else {
578     CordRep* rep = CordRepFromString(std::forward<T>(src));
579     contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
580   }
581 }
582 
583 template Cord::Cord(std::string&& src);
584 
585 // The destruction code is separate so that the compiler can determine
586 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()587 void Cord::DestroyCordSlow() {
588   assert(contents_.is_tree());
589   CordzInfo::MaybeUntrackCord(contents_.cordz_info());
590   CordRep::Unref(VerifyTree(contents_.as_tree()));
591 }
592 
593 // --------------------------------------------------------------------
594 // Mutators
595 
Clear()596 void Cord::Clear() {
597   if (CordRep* tree = contents_.clear()) {
598     CordRep::Unref(tree);
599   }
600 }
601 
AssignLargeString(std::string && src)602 Cord& Cord::AssignLargeString(std::string&& src) {
603   auto constexpr method = CordzUpdateTracker::kAssignString;
604   assert(src.size() > kMaxBytesToCopy);
605   CordRep* rep = CordRepFromString(std::move(src));
606   if (CordRep* tree = contents_.tree()) {
607     CordzUpdateScope scope(contents_.cordz_info(), method);
608     contents_.SetTree(rep, scope);
609     CordRep::Unref(tree);
610   } else {
611     contents_.EmplaceTree(rep, method);
612   }
613   return *this;
614 }
615 
operator =(absl::string_view src)616 Cord& Cord::operator=(absl::string_view src) {
617   auto constexpr method = CordzUpdateTracker::kAssignString;
618   const char* data = src.data();
619   size_t length = src.size();
620   CordRep* tree = contents_.tree();
621   if (length <= InlineRep::kMaxInline) {
622     // Embed into this->contents_, which is somewhat subtle:
623     // - MaybeUntrackCord must be called before Unref(tree).
624     // - MaybeUntrackCord must be called before set_data() clobbers cordz_info.
625     // - set_data() must be called before Unref(tree) as it may reference tree.
626     if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info());
627     contents_.set_data(data, length, true);
628     if (tree != nullptr) CordRep::Unref(tree);
629     return *this;
630   }
631   if (tree != nullptr) {
632     CordzUpdateScope scope(contents_.cordz_info(), method);
633     if (tree->tag >= FLAT && tree->flat()->Capacity() >= length &&
634         tree->refcount.IsOne()) {
635       // Copy in place if the existing FLAT node is reusable.
636       memmove(tree->flat()->Data(), data, length);
637       tree->length = length;
638       VerifyTree(tree);
639       return *this;
640     }
641     contents_.SetTree(NewTree(data, length, 0), scope);
642     CordRep::Unref(tree);
643   } else {
644     contents_.EmplaceTree(NewTree(data, length, 0), method);
645   }
646   return *this;
647 }
648 
649 // TODO(sanjay): Move to Cord::InlineRep section of file.  For now,
650 // we keep it here to make diffs easier.
AppendArray(absl::string_view src,MethodIdentifier method)651 void Cord::InlineRep::AppendArray(absl::string_view src,
652                                   MethodIdentifier method) {
653   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
654 
655   size_t appended = 0;
656   CordRep* rep = tree();
657   const CordRep* const root = rep;
658   CordzUpdateScope scope(root ? cordz_info() : nullptr, method);
659   if (root != nullptr) {
660     char* region;
661     if (PrepareAppendRegion(rep, &region, &appended, src.size())) {
662       memcpy(region, src.data(), appended);
663     }
664   } else {
665     // Try to fit in the inline buffer if possible.
666     size_t inline_length = inline_size();
667     if (src.size() <= kMaxInline - inline_length) {
668       // Append new data to embedded array
669       memcpy(data_.as_chars() + inline_length, src.data(), src.size());
670       set_inline_size(inline_length + src.size());
671       return;
672     }
673 
674     // Note: we don't concern ourselves if src aliases data stored in the
675     // inlined data of 'this',  as we update the InlineData only at the end.
676     // We are going from an inline size to beyond inline size. Make the new size
677     // either double the inlined size, or the added size + 10%.
678     const size_t size1 = inline_length * 2 + src.size();
679     const size_t size2 = inline_length + src.size() / 10;
680     rep = CordRepFlat::New(std::max<size_t>(size1, size2));
681     appended = std::min(src.size(), rep->flat()->Capacity() - inline_length);
682     memcpy(rep->flat()->Data(), data_.as_chars(), inline_length);
683     memcpy(rep->flat()->Data() + inline_length, src.data(), appended);
684     rep->length = inline_length + appended;
685   }
686 
687   src.remove_prefix(appended);
688   if (src.empty()) {
689     CommitTree(root, rep, scope, method);
690     return;
691   }
692 
693   if (cord_ring_enabled()) {
694     rep = ForceRing(rep, (src.size() - 1) / kMaxFlatLength + 1);
695     rep = CordRepRing::Append(rep->ring(), src);
696   } else {
697     // Use new block(s) for any remaining bytes that were not handled above.
698     // Alloc extra memory only if the right child of the root of the new tree
699     // is going to be a FLAT node, which will permit further inplace appends.
700     size_t length = src.size();
701     if (src.size() < kMaxFlatLength) {
702       // The new length is either
703       // - old size + 10%
704       // - old_size + src.size()
705       // This will cause a reasonable conservative step-up in size that is
706       // still large enough to avoid excessive amounts of small fragments
707       // being added.
708       length = std::max<size_t>(rep->length / 10, src.size());
709     }
710     rep = Concat(rep, NewTree(src.data(), src.size(), length - src.size()));
711   }
712   CommitTree(root, rep, scope, method);
713 }
714 
TakeRep() const715 inline CordRep* Cord::TakeRep() const& {
716   return CordRep::Ref(contents_.tree());
717 }
718 
TakeRep()719 inline CordRep* Cord::TakeRep() && {
720   CordRep* rep = contents_.tree();
721   contents_.clear();
722   return rep;
723 }
724 
725 template <typename C>
AppendImpl(C && src)726 inline void Cord::AppendImpl(C&& src) {
727   auto constexpr method = CordzUpdateTracker::kAppendCord;
728   if (empty()) {
729     // Since destination is empty, we can avoid allocating a node,
730     if (src.contents_.is_tree()) {
731       // by taking the tree directly
732       CordRep* rep = std::forward<C>(src).TakeRep();
733       contents_.EmplaceTree(rep, method);
734     } else {
735       // or copying over inline data
736       contents_.data_ = src.contents_.data_;
737     }
738     return;
739   }
740 
741   // For short cords, it is faster to copy data if there is room in dst.
742   const size_t src_size = src.contents_.size();
743   if (src_size <= kMaxBytesToCopy) {
744     CordRep* src_tree = src.contents_.tree();
745     if (src_tree == nullptr) {
746       // src has embedded data.
747       contents_.AppendArray({src.contents_.data(), src_size}, method);
748       return;
749     }
750     if (src_tree->tag >= FLAT) {
751       // src tree just has one flat node.
752       contents_.AppendArray({src_tree->flat()->Data(), src_size}, method);
753       return;
754     }
755     if (&src == this) {
756       // ChunkIterator below assumes that src is not modified during traversal.
757       Append(Cord(src));
758       return;
759     }
760     // TODO(mec): Should we only do this if "dst" has space?
761     for (absl::string_view chunk : src.Chunks()) {
762       Append(chunk);
763     }
764     return;
765   }
766 
767   // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize)
768   CordRep* rep = std::forward<C>(src).TakeRep();
769   contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord);
770 }
771 
Append(const Cord & src)772 void Cord::Append(const Cord& src) { AppendImpl(src); }
773 
Append(Cord && src)774 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
775 
776 template <typename T, Cord::EnableIfString<T>>
Append(T && src)777 void Cord::Append(T&& src) {
778   if (src.size() <= kMaxBytesToCopy) {
779     Append(absl::string_view(src));
780   } else {
781     CordRep* rep = CordRepFromString(std::forward<T>(src));
782     contents_.AppendTree(rep, CordzUpdateTracker::kAppendString);
783   }
784 }
785 
786 template void Cord::Append(std::string&& src);
787 
Prepend(const Cord & src)788 void Cord::Prepend(const Cord& src) {
789   CordRep* src_tree = src.contents_.tree();
790   if (src_tree != nullptr) {
791     CordRep::Ref(src_tree);
792     contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord);
793     return;
794   }
795 
796   // `src` cord is inlined.
797   absl::string_view src_contents(src.contents_.data(), src.contents_.size());
798   return Prepend(src_contents);
799 }
800 
Prepend(absl::string_view src)801 void Cord::Prepend(absl::string_view src) {
802   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
803   if (!contents_.is_tree()) {
804     size_t cur_size = contents_.inline_size();
805     if (cur_size + src.size() <= InlineRep::kMaxInline) {
806       // Use embedded storage.
807       char data[InlineRep::kMaxInline + 1] = {0};
808       memcpy(data, src.data(), src.size());
809       memcpy(data + src.size(), contents_.data(), cur_size);
810       memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1);
811       contents_.set_inline_size(cur_size + src.size());
812       return;
813     }
814   }
815   CordRep* rep = NewTree(src.data(), src.size(), 0);
816   contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
817 }
818 
819 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)820 inline void Cord::Prepend(T&& src) {
821   if (src.size() <= kMaxBytesToCopy) {
822     Prepend(absl::string_view(src));
823   } else {
824     CordRep* rep = CordRepFromString(std::forward<T>(src));
825     contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
826   }
827 }
828 
829 template void Cord::Prepend(std::string&& src);
830 
RemovePrefixFrom(CordRep * node,size_t n)831 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
832   if (n >= node->length) return nullptr;
833   if (n == 0) return CordRep::Ref(node);
834   absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
835 
836   while (node->tag == CONCAT) {
837     assert(n <= node->length);
838     if (n < node->concat()->left->length) {
839       // Push right to stack, descend left.
840       rhs_stack.push_back(node->concat()->right);
841       node = node->concat()->left;
842     } else {
843       // Drop left, descend right.
844       n -= node->concat()->left->length;
845       node = node->concat()->right;
846     }
847   }
848   assert(n <= node->length);
849 
850   if (n == 0) {
851     CordRep::Ref(node);
852   } else {
853     size_t start = n;
854     size_t len = node->length - n;
855     if (node->tag == SUBSTRING) {
856       // Consider in-place update of node, similar to in RemoveSuffixFrom().
857       start += node->substring()->start;
858       node = node->substring()->child;
859     }
860     node = NewSubstring(CordRep::Ref(node), start, len);
861   }
862   while (!rhs_stack.empty()) {
863     node = Concat(node, CordRep::Ref(rhs_stack.back()));
864     rhs_stack.pop_back();
865   }
866   return node;
867 }
868 
869 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
870 // exception that removing a suffix has an optimization where a node may be
871 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)872 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
873   if (n >= node->length) return nullptr;
874   if (n == 0) return CordRep::Ref(node);
875   absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
876   bool inplace_ok = node->refcount.IsOne();
877 
878   while (node->tag == CONCAT) {
879     assert(n <= node->length);
880     if (n < node->concat()->right->length) {
881       // Push left to stack, descend right.
882       lhs_stack.push_back(node->concat()->left);
883       node = node->concat()->right;
884     } else {
885       // Drop right, descend left.
886       n -= node->concat()->right->length;
887       node = node->concat()->left;
888     }
889     inplace_ok = inplace_ok && node->refcount.IsOne();
890   }
891   assert(n <= node->length);
892 
893   if (n == 0) {
894     CordRep::Ref(node);
895   } else if (inplace_ok && node->tag != EXTERNAL) {
896     // Consider making a new buffer if the current node capacity is much
897     // larger than the new length.
898     CordRep::Ref(node);
899     node->length -= n;
900   } else {
901     size_t start = 0;
902     size_t len = node->length - n;
903     if (node->tag == SUBSTRING) {
904       start = node->substring()->start;
905       node = node->substring()->child;
906     }
907     node = NewSubstring(CordRep::Ref(node), start, len);
908   }
909   while (!lhs_stack.empty()) {
910     node = Concat(CordRep::Ref(lhs_stack.back()), node);
911     lhs_stack.pop_back();
912   }
913   return node;
914 }
915 
RemovePrefix(size_t n)916 void Cord::RemovePrefix(size_t n) {
917   ABSL_INTERNAL_CHECK(n <= size(),
918                       absl::StrCat("Requested prefix size ", n,
919                                    " exceeds Cord's size ", size()));
920   CordRep* tree = contents_.tree();
921   if (tree == nullptr) {
922     contents_.remove_prefix(n);
923   } else {
924     auto constexpr method = CordzUpdateTracker::kRemovePrefix;
925     CordzUpdateScope scope(contents_.cordz_info(), method);
926     if (tree->tag == RING) {
927       tree = CordRepRing::RemovePrefix(tree->ring(), n);
928     } else {
929       CordRep* newrep = RemovePrefixFrom(tree, n);
930       CordRep::Unref(tree);
931       tree = VerifyTree(newrep);
932     }
933     contents_.SetTreeOrEmpty(tree, scope);
934   }
935 }
936 
RemoveSuffix(size_t n)937 void Cord::RemoveSuffix(size_t n) {
938   ABSL_INTERNAL_CHECK(n <= size(),
939                       absl::StrCat("Requested suffix size ", n,
940                                    " exceeds Cord's size ", size()));
941   CordRep* tree = contents_.tree();
942   if (tree == nullptr) {
943     contents_.reduce_size(n);
944   } else {
945     auto constexpr method = CordzUpdateTracker::kRemoveSuffix;
946     CordzUpdateScope scope(contents_.cordz_info(), method);
947     if (tree->tag == RING) {
948       tree = CordRepRing::RemoveSuffix(tree->ring(), n);
949     } else {
950       CordRep* newrep = RemoveSuffixFrom(tree, n);
951       CordRep::Unref(tree);
952       tree = VerifyTree(newrep);
953     }
954     contents_.SetTreeOrEmpty(tree, scope);
955   }
956 }
957 
958 // Work item for NewSubRange().
959 struct SubRange {
SubRangeabsl::SubRange960   SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
961       : node(a_node), pos(a_pos), n(a_n) {}
962   CordRep* node;  // nullptr means concat last 2 results.
963   size_t pos;
964   size_t n;
965 };
966 
NewSubRange(CordRep * node,size_t pos,size_t n)967 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
968   absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
969   absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
970   todo.push_back(SubRange(node, pos, n));
971   do {
972     const SubRange& sr = todo.back();
973     node = sr.node;
974     pos = sr.pos;
975     n = sr.n;
976     todo.pop_back();
977 
978     if (node == nullptr) {
979       assert(results.size() >= 2);
980       CordRep* right = results.back();
981       results.pop_back();
982       CordRep* left = results.back();
983       results.pop_back();
984       results.push_back(Concat(left, right));
985     } else if (pos == 0 && n == node->length) {
986       results.push_back(CordRep::Ref(node));
987     } else if (node->tag != CONCAT) {
988       if (node->tag == SUBSTRING) {
989         pos += node->substring()->start;
990         node = node->substring()->child;
991       }
992       results.push_back(NewSubstring(CordRep::Ref(node), pos, n));
993     } else if (pos + n <= node->concat()->left->length) {
994       todo.push_back(SubRange(node->concat()->left, pos, n));
995     } else if (pos >= node->concat()->left->length) {
996       pos -= node->concat()->left->length;
997       todo.push_back(SubRange(node->concat()->right, pos, n));
998     } else {
999       size_t left_n = node->concat()->left->length - pos;
1000       todo.push_back(SubRange(nullptr, 0, 0));  // Concat()
1001       todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1002       todo.push_back(SubRange(node->concat()->left, pos, left_n));
1003     }
1004   } while (!todo.empty());
1005   assert(results.size() == 1);
1006   return results[0];
1007 }
1008 
Subcord(size_t pos,size_t new_size) const1009 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1010   Cord sub_cord;
1011   size_t length = size();
1012   if (pos > length) pos = length;
1013   if (new_size > length - pos) new_size = length - pos;
1014   if (new_size == 0) return sub_cord;
1015 
1016   CordRep* tree = contents_.tree();
1017   if (tree == nullptr) {
1018     // sub_cord is newly constructed, no need to re-zero-out the tail of
1019     // contents_ memory.
1020     sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1021     return sub_cord;
1022   }
1023 
1024   if (new_size <= InlineRep::kMaxInline) {
1025     char* dest = sub_cord.contents_.data_.as_chars();
1026     Cord::ChunkIterator it = chunk_begin();
1027     it.AdvanceBytes(pos);
1028     size_t remaining_size = new_size;
1029     while (remaining_size > it->size()) {
1030       cord_internal::SmallMemmove(dest, it->data(), it->size());
1031       remaining_size -= it->size();
1032       dest += it->size();
1033       ++it;
1034     }
1035     cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1036     sub_cord.contents_.set_inline_size(new_size);
1037     return sub_cord;
1038   }
1039 
1040   if (tree->tag == RING) {
1041     CordRepRing* ring = CordRep::Ref(tree)->ring();
1042     tree = CordRepRing::SubRing(ring, pos, new_size);
1043   } else {
1044     tree = NewSubRange(tree, pos, new_size);
1045   }
1046   sub_cord.contents_.EmplaceTree(tree, contents_.data_,
1047                                  CordzUpdateTracker::kSubCord);
1048   return sub_cord;
1049 }
1050 
1051 // --------------------------------------------------------------------
1052 // Balancing
1053 
1054 class CordForest {
1055  public:
CordForest(size_t length)1056   explicit CordForest(size_t length)
1057       : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1058 
Build(CordRep * cord_root)1059   void Build(CordRep* cord_root) {
1060     std::vector<CordRep*> pending = {cord_root};
1061 
1062     while (!pending.empty()) {
1063       CordRep* node = pending.back();
1064       pending.pop_back();
1065       CheckNode(node);
1066       if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1067         AddNode(node);
1068         continue;
1069       }
1070 
1071       CordRepConcat* concat_node = node->concat();
1072       if (concat_node->depth() >= kMinLengthSize ||
1073           concat_node->length < min_length[concat_node->depth()]) {
1074         pending.push_back(concat_node->right);
1075         pending.push_back(concat_node->left);
1076 
1077         if (concat_node->refcount.IsOne()) {
1078           concat_node->left = concat_freelist_;
1079           concat_freelist_ = concat_node;
1080         } else {
1081           CordRep::Ref(concat_node->right);
1082           CordRep::Ref(concat_node->left);
1083           CordRep::Unref(concat_node);
1084         }
1085       } else {
1086         AddNode(node);
1087       }
1088     }
1089   }
1090 
ConcatNodes()1091   CordRep* ConcatNodes() {
1092     CordRep* sum = nullptr;
1093     for (auto* node : trees_) {
1094       if (node == nullptr) continue;
1095 
1096       sum = PrependNode(node, sum);
1097       root_length_ -= node->length;
1098       if (root_length_ == 0) break;
1099     }
1100     ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1101     return VerifyTree(sum);
1102   }
1103 
1104  private:
AppendNode(CordRep * node,CordRep * sum)1105   CordRep* AppendNode(CordRep* node, CordRep* sum) {
1106     return (sum == nullptr) ? node : MakeConcat(sum, node);
1107   }
1108 
PrependNode(CordRep * node,CordRep * sum)1109   CordRep* PrependNode(CordRep* node, CordRep* sum) {
1110     return (sum == nullptr) ? node : MakeConcat(node, sum);
1111   }
1112 
AddNode(CordRep * node)1113   void AddNode(CordRep* node) {
1114     CordRep* sum = nullptr;
1115 
1116     // Collect together everything with which we will merge with node
1117     int i = 0;
1118     for (; node->length > min_length[i + 1]; ++i) {
1119       auto& tree_at_i = trees_[i];
1120 
1121       if (tree_at_i == nullptr) continue;
1122       sum = PrependNode(tree_at_i, sum);
1123       tree_at_i = nullptr;
1124     }
1125 
1126     sum = AppendNode(node, sum);
1127 
1128     // Insert sum into appropriate place in the forest
1129     for (; sum->length >= min_length[i]; ++i) {
1130       auto& tree_at_i = trees_[i];
1131       if (tree_at_i == nullptr) continue;
1132 
1133       sum = MakeConcat(tree_at_i, sum);
1134       tree_at_i = nullptr;
1135     }
1136 
1137     // min_length[0] == 1, which means sum->length >= min_length[0]
1138     assert(i > 0);
1139     trees_[i - 1] = sum;
1140   }
1141 
1142   // Make concat node trying to resue existing CordRepConcat nodes we
1143   // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1144   CordRep* MakeConcat(CordRep* left, CordRep* right) {
1145     if (concat_freelist_ == nullptr) return RawConcat(left, right);
1146 
1147     CordRepConcat* rep = concat_freelist_;
1148     if (concat_freelist_->left == nullptr) {
1149       concat_freelist_ = nullptr;
1150     } else {
1151       concat_freelist_ = concat_freelist_->left->concat();
1152     }
1153     SetConcatChildren(rep, left, right);
1154 
1155     return rep;
1156   }
1157 
CheckNode(CordRep * node)1158   static void CheckNode(CordRep* node) {
1159     ABSL_INTERNAL_CHECK(node->length != 0u, "");
1160     if (node->tag == CONCAT) {
1161       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1162       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1163       ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1164                                            node->concat()->right->length),
1165                           "");
1166     }
1167   }
1168 
1169   size_t root_length_;
1170 
1171   // use an inlined vector instead of a flat array to get bounds checking
1172   absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1173 
1174   // List of concat nodes we can re-use for Cord balancing.
1175   CordRepConcat* concat_freelist_ = nullptr;
1176 };
1177 
Rebalance(CordRep * node)1178 static CordRep* Rebalance(CordRep* node) {
1179   VerifyTree(node);
1180   assert(node->tag == CONCAT);
1181 
1182   if (node->length == 0) {
1183     return nullptr;
1184   }
1185 
1186   CordForest forest(node->length);
1187   forest.Build(node);
1188   return forest.ConcatNodes();
1189 }
1190 
1191 // --------------------------------------------------------------------
1192 // Comparators
1193 
1194 namespace {
1195 
ClampResult(int memcmp_res)1196 int ClampResult(int memcmp_res) {
1197   return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1198 }
1199 
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1200 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1201                   size_t* size_to_compare) {
1202   size_t compared_size = std::min(lhs->size(), rhs->size());
1203   assert(*size_to_compare >= compared_size);
1204   *size_to_compare -= compared_size;
1205 
1206   int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1207   if (memcmp_res != 0) return memcmp_res;
1208 
1209   lhs->remove_prefix(compared_size);
1210   rhs->remove_prefix(compared_size);
1211 
1212   return 0;
1213 }
1214 
1215 // This overload set computes comparison results from memcmp result. This
1216 // interface is used inside GenericCompare below. Differet implementations
1217 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1218 // set. For bool we just interested in "value == 0".
1219 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1220 ResultType ComputeCompareResult(int memcmp_res) {
1221   return ClampResult(memcmp_res);
1222 }
1223 template <>
ComputeCompareResult(int memcmp_res)1224 bool ComputeCompareResult<bool>(int memcmp_res) {
1225   return memcmp_res == 0;
1226 }
1227 
1228 }  // namespace
1229 
1230 // Helper routine. Locates the first flat chunk of the Cord without
1231 // initializing the iterator.
FindFlatStartPiece() const1232 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1233   if (!is_tree()) {
1234     return absl::string_view(data_.as_chars(), data_.inline_size());
1235   }
1236 
1237   CordRep* node = tree();
1238   if (node->tag >= FLAT) {
1239     return absl::string_view(node->flat()->Data(), node->length);
1240   }
1241 
1242   if (node->tag == EXTERNAL) {
1243     return absl::string_view(node->external()->base, node->length);
1244   }
1245 
1246   if (node->tag == RING) {
1247     return node->ring()->entry_data(node->ring()->head());
1248   }
1249 
1250   // Walk down the left branches until we hit a non-CONCAT node.
1251   while (node->tag == CONCAT) {
1252     node = node->concat()->left;
1253   }
1254 
1255   // Get the child node if we encounter a SUBSTRING.
1256   size_t offset = 0;
1257   size_t length = node->length;
1258   assert(length != 0);
1259 
1260   if (node->tag == SUBSTRING) {
1261     offset = node->substring()->start;
1262     node = node->substring()->child;
1263   }
1264 
1265   if (node->tag >= FLAT) {
1266     return absl::string_view(node->flat()->Data() + offset, length);
1267   }
1268 
1269   assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1270 
1271   return absl::string_view(node->external()->base + offset, length);
1272 }
1273 
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1274 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1275                                  size_t size_to_compare) const {
1276   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1277     if (!chunk->empty()) return true;
1278     ++*it;
1279     if (it->bytes_remaining_ == 0) return false;
1280     *chunk = **it;
1281     return true;
1282   };
1283 
1284   Cord::ChunkIterator lhs_it = chunk_begin();
1285 
1286   // compared_size is inside first chunk.
1287   absl::string_view lhs_chunk =
1288       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1289   assert(compared_size <= lhs_chunk.size());
1290   assert(compared_size <= rhs.size());
1291   lhs_chunk.remove_prefix(compared_size);
1292   rhs.remove_prefix(compared_size);
1293   size_to_compare -= compared_size;  // skip already compared size.
1294 
1295   while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1296     int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1297     if (comparison_result != 0) return comparison_result;
1298     if (size_to_compare == 0) return 0;
1299   }
1300 
1301   return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1302 }
1303 
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1304 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1305                                  size_t size_to_compare) const {
1306   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1307     if (!chunk->empty()) return true;
1308     ++*it;
1309     if (it->bytes_remaining_ == 0) return false;
1310     *chunk = **it;
1311     return true;
1312   };
1313 
1314   Cord::ChunkIterator lhs_it = chunk_begin();
1315   Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1316 
1317   // compared_size is inside both first chunks.
1318   absl::string_view lhs_chunk =
1319       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1320   absl::string_view rhs_chunk =
1321       (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1322   assert(compared_size <= lhs_chunk.size());
1323   assert(compared_size <= rhs_chunk.size());
1324   lhs_chunk.remove_prefix(compared_size);
1325   rhs_chunk.remove_prefix(compared_size);
1326   size_to_compare -= compared_size;  // skip already compared size.
1327 
1328   while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1329     int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1330     if (memcmp_res != 0) return memcmp_res;
1331     if (size_to_compare == 0) return 0;
1332   }
1333 
1334   return static_cast<int>(rhs_chunk.empty()) -
1335          static_cast<int>(lhs_chunk.empty());
1336 }
1337 
GetFirstChunk(const Cord & c)1338 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1339   return c.contents_.FindFlatStartPiece();
1340 }
GetFirstChunk(absl::string_view sv)1341 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1342   return sv;
1343 }
1344 
1345 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1346 // that 'size_to_compare' is greater that size of smallest of first chunks.
1347 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1348 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1349                           size_t size_to_compare) {
1350   absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1351   absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1352 
1353   size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1354   assert(size_to_compare >= compared_size);
1355   int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1356   if (compared_size == size_to_compare || memcmp_res != 0) {
1357     return ComputeCompareResult<ResultType>(memcmp_res);
1358   }
1359 
1360   return ComputeCompareResult<ResultType>(
1361       lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1362 }
1363 
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1364 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1365   return GenericCompare<bool>(*this, rhs, size_to_compare);
1366 }
1367 
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1368 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1369   return GenericCompare<bool>(*this, rhs, size_to_compare);
1370 }
1371 
1372 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1373 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1374   size_t lhs_size = lhs.size();
1375   size_t rhs_size = rhs.size();
1376   if (lhs_size == rhs_size) {
1377     return GenericCompare<int>(lhs, rhs, lhs_size);
1378   }
1379   if (lhs_size < rhs_size) {
1380     auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1381     return data_comp_res == 0 ? -1 : data_comp_res;
1382   }
1383 
1384   auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1385   return data_comp_res == 0 ? +1 : data_comp_res;
1386 }
1387 
Compare(absl::string_view rhs) const1388 int Cord::Compare(absl::string_view rhs) const {
1389   return SharedCompareImpl(*this, rhs);
1390 }
1391 
CompareImpl(const Cord & rhs) const1392 int Cord::CompareImpl(const Cord& rhs) const {
1393   return SharedCompareImpl(*this, rhs);
1394 }
1395 
EndsWith(absl::string_view rhs) const1396 bool Cord::EndsWith(absl::string_view rhs) const {
1397   size_t my_size = size();
1398   size_t rhs_size = rhs.size();
1399 
1400   if (my_size < rhs_size) return false;
1401 
1402   Cord tmp(*this);
1403   tmp.RemovePrefix(my_size - rhs_size);
1404   return tmp.EqualsImpl(rhs, rhs_size);
1405 }
1406 
EndsWith(const Cord & rhs) const1407 bool Cord::EndsWith(const Cord& rhs) const {
1408   size_t my_size = size();
1409   size_t rhs_size = rhs.size();
1410 
1411   if (my_size < rhs_size) return false;
1412 
1413   Cord tmp(*this);
1414   tmp.RemovePrefix(my_size - rhs_size);
1415   return tmp.EqualsImpl(rhs, rhs_size);
1416 }
1417 
1418 // --------------------------------------------------------------------
1419 // Misc.
1420 
operator std::string() const1421 Cord::operator std::string() const {
1422   std::string s;
1423   absl::CopyCordToString(*this, &s);
1424   return s;
1425 }
1426 
CopyCordToString(const Cord & src,std::string * dst)1427 void CopyCordToString(const Cord& src, std::string* dst) {
1428   if (!src.contents_.is_tree()) {
1429     src.contents_.CopyTo(dst);
1430   } else {
1431     absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1432     src.CopyToArraySlowPath(&(*dst)[0]);
1433   }
1434 }
1435 
CopyToArraySlowPath(char * dst) const1436 void Cord::CopyToArraySlowPath(char* dst) const {
1437   assert(contents_.is_tree());
1438   absl::string_view fragment;
1439   if (GetFlatAux(contents_.tree(), &fragment)) {
1440     memcpy(dst, fragment.data(), fragment.size());
1441     return;
1442   }
1443   for (absl::string_view chunk : Chunks()) {
1444     memcpy(dst, chunk.data(), chunk.size());
1445     dst += chunk.size();
1446   }
1447 }
1448 
AdvanceStack()1449 Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
1450   auto& stack_of_right_children = stack_of_right_children_;
1451   if (stack_of_right_children.empty()) {
1452     assert(!current_chunk_.empty());  // Called on invalid iterator.
1453     // We have reached the end of the Cord.
1454     return *this;
1455   }
1456 
1457   // Process the next node on the stack.
1458   CordRep* node = stack_of_right_children.back();
1459   stack_of_right_children.pop_back();
1460 
1461   // Walk down the left branches until we hit a non-CONCAT node. Save the
1462   // right children to the stack for subsequent traversal.
1463   while (node->tag == CONCAT) {
1464     stack_of_right_children.push_back(node->concat()->right);
1465     node = node->concat()->left;
1466   }
1467 
1468   // Get the child node if we encounter a SUBSTRING.
1469   size_t offset = 0;
1470   size_t length = node->length;
1471   if (node->tag == SUBSTRING) {
1472     offset = node->substring()->start;
1473     node = node->substring()->child;
1474   }
1475 
1476   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1477   assert(length != 0);
1478   const char* data =
1479       node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1480   current_chunk_ = absl::string_view(data + offset, length);
1481   current_leaf_ = node;
1482   return *this;
1483 }
1484 
AdvanceAndReadBytes(size_t n)1485 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1486   ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1487                         "Attempted to iterate past `end()`");
1488   Cord subcord;
1489   auto constexpr method = CordzUpdateTracker::kCordReader;
1490 
1491   if (n <= InlineRep::kMaxInline) {
1492     // Range to read fits in inline data. Flatten it.
1493     char* data = subcord.contents_.set_data(n);
1494     while (n > current_chunk_.size()) {
1495       memcpy(data, current_chunk_.data(), current_chunk_.size());
1496       data += current_chunk_.size();
1497       n -= current_chunk_.size();
1498       ++*this;
1499     }
1500     memcpy(data, current_chunk_.data(), n);
1501     if (n < current_chunk_.size()) {
1502       RemoveChunkPrefix(n);
1503     } else if (n > 0) {
1504       ++*this;
1505     }
1506     return subcord;
1507   }
1508 
1509   if (ring_reader_) {
1510     size_t chunk_size = current_chunk_.size();
1511     if (n <= chunk_size && n <= kMaxBytesToCopy) {
1512       subcord = Cord(current_chunk_.substr(0, n), method);
1513     } else {
1514       auto* ring = CordRep::Ref(ring_reader_.ring())->ring();
1515       size_t offset = ring_reader_.length() - bytes_remaining_;
1516       CordRep* rep = CordRepRing::SubRing(ring, offset, n);
1517       subcord.contents_.EmplaceTree(rep, method);
1518     }
1519     if (n < chunk_size) {
1520       bytes_remaining_ -= n;
1521       current_chunk_.remove_prefix(n);
1522     } else {
1523       AdvanceBytesRing(n);
1524     }
1525     return subcord;
1526   }
1527 
1528   auto& stack_of_right_children = stack_of_right_children_;
1529   if (n < current_chunk_.size()) {
1530     // Range to read is a proper subrange of the current chunk.
1531     assert(current_leaf_ != nullptr);
1532     CordRep* subnode = CordRep::Ref(current_leaf_);
1533     const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
1534                                                 : subnode->flat()->Data();
1535     subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1536     subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1537     RemoveChunkPrefix(n);
1538     return subcord;
1539   }
1540 
1541   // Range to read begins with a proper subrange of the current chunk.
1542   assert(!current_chunk_.empty());
1543   assert(current_leaf_ != nullptr);
1544   CordRep* subnode = CordRep::Ref(current_leaf_);
1545   if (current_chunk_.size() < subnode->length) {
1546     const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
1547                                                 : subnode->flat()->Data();
1548     subnode = NewSubstring(subnode, current_chunk_.data() - data,
1549                            current_chunk_.size());
1550   }
1551   n -= current_chunk_.size();
1552   bytes_remaining_ -= current_chunk_.size();
1553 
1554   // Process the next node(s) on the stack, reading whole subtrees depending on
1555   // their length and how many bytes we are advancing.
1556   CordRep* node = nullptr;
1557   while (!stack_of_right_children.empty()) {
1558     node = stack_of_right_children.back();
1559     stack_of_right_children.pop_back();
1560     if (node->length > n) break;
1561     // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1562     // Avoiding that would need pretty complicated logic (instead of
1563     // current_leaf, keep current_subtree_ which points to the highest node
1564     // such that the current leaf can be found on the path of left children
1565     // starting from current_subtree_; delay creating subnode while node is
1566     // below current_subtree_; find the proper node along the path of left
1567     // children starting from current_subtree_ if this loop exits while staying
1568     // below current_subtree_; etc.; alternatively, push parents instead of
1569     // right children on the stack).
1570     subnode = Concat(subnode, CordRep::Ref(node));
1571     n -= node->length;
1572     bytes_remaining_ -= node->length;
1573     node = nullptr;
1574   }
1575 
1576   if (node == nullptr) {
1577     // We have reached the end of the Cord.
1578     assert(bytes_remaining_ == 0);
1579     subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1580     return subcord;
1581   }
1582 
1583   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1584   // right children to the stack for subsequent traversal.
1585   while (node->tag == CONCAT) {
1586     if (node->concat()->left->length > n) {
1587       // Push right, descend left.
1588       stack_of_right_children.push_back(node->concat()->right);
1589       node = node->concat()->left;
1590     } else {
1591       // Read left, descend right.
1592       subnode = Concat(subnode, CordRep::Ref(node->concat()->left));
1593       n -= node->concat()->left->length;
1594       bytes_remaining_ -= node->concat()->left->length;
1595       node = node->concat()->right;
1596     }
1597   }
1598 
1599   // Get the child node if we encounter a SUBSTRING.
1600   size_t offset = 0;
1601   size_t length = node->length;
1602   if (node->tag == SUBSTRING) {
1603     offset = node->substring()->start;
1604     node = node->substring()->child;
1605   }
1606 
1607   // Range to read ends with a proper (possibly empty) subrange of the current
1608   // chunk.
1609   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1610   assert(length > n);
1611   if (n > 0) {
1612     subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
1613   }
1614   const char* data =
1615       node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1616   current_chunk_ = absl::string_view(data + offset + n, length - n);
1617   current_leaf_ = node;
1618   bytes_remaining_ -= n;
1619   subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1620   return subcord;
1621 }
1622 
AdvanceBytesSlowPath(size_t n)1623 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1624   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1625   assert(n >= current_chunk_.size());  // This should only be called when
1626                                        // iterating to a new node.
1627 
1628   n -= current_chunk_.size();
1629   bytes_remaining_ -= current_chunk_.size();
1630 
1631   if (stack_of_right_children_.empty()) {
1632     // We have reached the end of the Cord.
1633     assert(bytes_remaining_ == 0);
1634     return;
1635   }
1636 
1637   // Process the next node(s) on the stack, skipping whole subtrees depending on
1638   // their length and how many bytes we are advancing.
1639   CordRep* node = nullptr;
1640   auto& stack_of_right_children = stack_of_right_children_;
1641   while (!stack_of_right_children.empty()) {
1642     node = stack_of_right_children.back();
1643     stack_of_right_children.pop_back();
1644     if (node->length > n) break;
1645     n -= node->length;
1646     bytes_remaining_ -= node->length;
1647     node = nullptr;
1648   }
1649 
1650   if (node == nullptr) {
1651     // We have reached the end of the Cord.
1652     assert(bytes_remaining_ == 0);
1653     return;
1654   }
1655 
1656   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1657   // right children to the stack for subsequent traversal.
1658   while (node->tag == CONCAT) {
1659     if (node->concat()->left->length > n) {
1660       // Push right, descend left.
1661       stack_of_right_children.push_back(node->concat()->right);
1662       node = node->concat()->left;
1663     } else {
1664       // Skip left, descend right.
1665       n -= node->concat()->left->length;
1666       bytes_remaining_ -= node->concat()->left->length;
1667       node = node->concat()->right;
1668     }
1669   }
1670 
1671   // Get the child node if we encounter a SUBSTRING.
1672   size_t offset = 0;
1673   size_t length = node->length;
1674   if (node->tag == SUBSTRING) {
1675     offset = node->substring()->start;
1676     node = node->substring()->child;
1677   }
1678 
1679   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1680   assert(length > n);
1681   const char* data =
1682       node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
1683   current_chunk_ = absl::string_view(data + offset + n, length - n);
1684   current_leaf_ = node;
1685   bytes_remaining_ -= n;
1686 }
1687 
operator [](size_t i) const1688 char Cord::operator[](size_t i) const {
1689   ABSL_HARDENING_ASSERT(i < size());
1690   size_t offset = i;
1691   const CordRep* rep = contents_.tree();
1692   if (rep == nullptr) {
1693     return contents_.data()[i];
1694   }
1695   while (true) {
1696     assert(rep != nullptr);
1697     assert(offset < rep->length);
1698     if (rep->tag >= FLAT) {
1699       // Get the "i"th character directly from the flat array.
1700       return rep->flat()->Data()[offset];
1701     } else if (rep->tag == RING) {
1702       return rep->ring()->GetCharacter(offset);
1703     } else if (rep->tag == EXTERNAL) {
1704       // Get the "i"th character from the external array.
1705       return rep->external()->base[offset];
1706     } else if (rep->tag == CONCAT) {
1707       // Recursively branch to the side of the concatenation that the "i"th
1708       // character is on.
1709       size_t left_length = rep->concat()->left->length;
1710       if (offset < left_length) {
1711         rep = rep->concat()->left;
1712       } else {
1713         offset -= left_length;
1714         rep = rep->concat()->right;
1715       }
1716     } else {
1717       // This must be a substring a node, so bypass it to get to the child.
1718       assert(rep->tag == SUBSTRING);
1719       offset += rep->substring()->start;
1720       rep = rep->substring()->child;
1721     }
1722   }
1723 }
1724 
FlattenSlowPath()1725 absl::string_view Cord::FlattenSlowPath() {
1726   assert(contents_.is_tree());
1727   size_t total_size = size();
1728   CordRep* new_rep;
1729   char* new_buffer;
1730 
1731   // Try to put the contents into a new flat rep. If they won't fit in the
1732   // biggest possible flat node, use an external rep instead.
1733   if (total_size <= kMaxFlatLength) {
1734     new_rep = CordRepFlat::New(total_size);
1735     new_rep->length = total_size;
1736     new_buffer = new_rep->flat()->Data();
1737     CopyToArraySlowPath(new_buffer);
1738   } else {
1739     new_buffer = std::allocator<char>().allocate(total_size);
1740     CopyToArraySlowPath(new_buffer);
1741     new_rep = absl::cord_internal::NewExternalRep(
1742         absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1743           std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1744                                             s.size());
1745         });
1746   }
1747   CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten);
1748   CordRep::Unref(contents_.as_tree());
1749   contents_.SetTree(new_rep, scope);
1750   return absl::string_view(new_buffer, total_size);
1751 }
1752 
GetFlatAux(CordRep * rep,absl::string_view * fragment)1753 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1754   assert(rep != nullptr);
1755   if (rep->tag >= FLAT) {
1756     *fragment = absl::string_view(rep->flat()->Data(), rep->length);
1757     return true;
1758   } else if (rep->tag == EXTERNAL) {
1759     *fragment = absl::string_view(rep->external()->base, rep->length);
1760     return true;
1761   } else if (rep->tag == RING) {
1762     return rep->ring()->IsFlat(fragment);
1763   } else if (rep->tag == SUBSTRING) {
1764     CordRep* child = rep->substring()->child;
1765     if (child->tag >= FLAT) {
1766       *fragment = absl::string_view(
1767           child->flat()->Data() + rep->substring()->start, rep->length);
1768       return true;
1769     } else if (child->tag == EXTERNAL) {
1770       *fragment = absl::string_view(
1771           child->external()->base + rep->substring()->start, rep->length);
1772       return true;
1773     } else if (child->tag == RING) {
1774       return child->ring()->IsFlat(rep->substring()->start, rep->length,
1775                                    fragment);
1776     }
1777   }
1778   return false;
1779 }
1780 
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1781 /* static */ void Cord::ForEachChunkAux(
1782     absl::cord_internal::CordRep* rep,
1783     absl::FunctionRef<void(absl::string_view)> callback) {
1784   if (rep->tag == RING) {
1785     ChunkIterator it(rep), end;
1786     while (it != end) {
1787       callback(*it);
1788       ++it;
1789     }
1790     return;
1791   }
1792 
1793   assert(rep != nullptr);
1794   int stack_pos = 0;
1795   constexpr int stack_max = 128;
1796   // Stack of right branches for tree traversal
1797   absl::cord_internal::CordRep* stack[stack_max];
1798   absl::cord_internal::CordRep* current_node = rep;
1799   while (true) {
1800     if (current_node->tag == CONCAT) {
1801       if (stack_pos == stack_max) {
1802         // There's no more room on our stack array to add another right branch,
1803         // and the idea is to avoid allocations, so call this function
1804         // recursively to navigate this subtree further.  (This is not something
1805         // we expect to happen in practice).
1806         ForEachChunkAux(current_node, callback);
1807 
1808         // Pop the next right branch and iterate.
1809         current_node = stack[--stack_pos];
1810         continue;
1811       } else {
1812         // Save the right branch for later traversal and continue down the left
1813         // branch.
1814         stack[stack_pos++] = current_node->concat()->right;
1815         current_node = current_node->concat()->left;
1816         continue;
1817       }
1818     }
1819     // This is a leaf node, so invoke our callback.
1820     absl::string_view chunk;
1821     bool success = GetFlatAux(current_node, &chunk);
1822     assert(success);
1823     if (success) {
1824       callback(chunk);
1825     }
1826     if (stack_pos == 0) {
1827       // end of traversal
1828       return;
1829     }
1830     current_node = stack[--stack_pos];
1831   }
1832 }
1833 
DumpNode(CordRep * rep,bool include_data,std::ostream * os,int indent)1834 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
1835                      int indent) {
1836   const int kIndentStep = 1;
1837   absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1838   absl::InlinedVector<int, kInlinedVectorSize> indents;
1839   for (;;) {
1840     *os << std::setw(3) << rep->refcount.Get();
1841     *os << " " << std::setw(7) << rep->length;
1842     *os << " [";
1843     if (include_data) *os << static_cast<void*>(rep);
1844     *os << "]";
1845     *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1846     *os << " " << std::setw(indent) << "";
1847     if (rep->tag == CONCAT) {
1848       *os << "CONCAT depth=" << Depth(rep) << "\n";
1849       indent += kIndentStep;
1850       indents.push_back(indent);
1851       stack.push_back(rep->concat()->right);
1852       rep = rep->concat()->left;
1853     } else if (rep->tag == SUBSTRING) {
1854       *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1855       indent += kIndentStep;
1856       rep = rep->substring()->child;
1857     } else {  // Leaf or ring
1858       if (rep->tag == EXTERNAL) {
1859         *os << "EXTERNAL [";
1860         if (include_data)
1861           *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1862         *os << "]\n";
1863       } else if (rep->tag >= FLAT) {
1864         *os << "FLAT cap=" << rep->flat()->Capacity() << " [";
1865         if (include_data)
1866           *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length));
1867         *os << "]\n";
1868       } else {
1869         assert(rep->tag == RING);
1870         auto* ring = rep->ring();
1871         *os << "RING, entries = " << ring->entries() << "\n";
1872         CordRepRing::index_type head = ring->head();
1873         do {
1874           DumpNode(ring->entry_child(head), include_data, os,
1875                    indent + kIndentStep);
1876           head = ring->advance(head);
1877         } while (head != ring->tail());
1878       }
1879       if (stack.empty()) break;
1880       rep = stack.back();
1881       stack.pop_back();
1882       indent = indents.back();
1883       indents.pop_back();
1884     }
1885   }
1886   ABSL_INTERNAL_CHECK(indents.empty(), "");
1887 }
1888 
ReportError(CordRep * root,CordRep * node)1889 static std::string ReportError(CordRep* root, CordRep* node) {
1890   std::ostringstream buf;
1891   buf << "Error at node " << node << " in:";
1892   DumpNode(root, true, &buf);
1893   return buf.str();
1894 }
1895 
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1896 static bool VerifyNode(CordRep* root, CordRep* start_node,
1897                        bool full_validation) {
1898   absl::InlinedVector<CordRep*, 2> worklist;
1899   worklist.push_back(start_node);
1900   do {
1901     CordRep* node = worklist.back();
1902     worklist.pop_back();
1903 
1904     ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1905     if (node != root) {
1906       ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1907     }
1908 
1909     if (node->tag == CONCAT) {
1910       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1911                           ReportError(root, node));
1912       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1913                           ReportError(root, node));
1914       ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1915                                                node->concat()->right->length),
1916                           ReportError(root, node));
1917       if (full_validation) {
1918         worklist.push_back(node->concat()->right);
1919         worklist.push_back(node->concat()->left);
1920       }
1921     } else if (node->tag >= FLAT) {
1922       ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(),
1923                           ReportError(root, node));
1924     } else if (node->tag == EXTERNAL) {
1925       ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1926                           ReportError(root, node));
1927     } else if (node->tag == SUBSTRING) {
1928       ABSL_INTERNAL_CHECK(
1929           node->substring()->start < node->substring()->child->length,
1930           ReportError(root, node));
1931       ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1932                               node->substring()->child->length,
1933                           ReportError(root, node));
1934     }
1935   } while (!worklist.empty());
1936   return true;
1937 }
1938 
1939 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1940 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1941   size_t total_mem_usage = 0;
1942 
1943   // Allow a quick exit for the common case that the root is a leaf.
1944   if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1945     return total_mem_usage;
1946   }
1947 
1948   // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1949   // never be appended to tree_stack. This reduces overhead from manipulating
1950   // tree_stack.
1951   absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1952   const CordRep* cur_node = rep;
1953   while (true) {
1954     const CordRep* next_node = nullptr;
1955 
1956     if (cur_node->tag == CONCAT) {
1957       total_mem_usage += sizeof(CordRepConcat);
1958       const CordRep* left = cur_node->concat()->left;
1959       if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1960         next_node = left;
1961       }
1962 
1963       const CordRep* right = cur_node->concat()->right;
1964       if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1965         if (next_node) {
1966           tree_stack.push_back(next_node);
1967         }
1968         next_node = right;
1969       }
1970     } else if (cur_node->tag == RING) {
1971       total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity());
1972       const CordRepRing* ring = cur_node->ring();
1973       CordRepRing::index_type pos = ring->head(), tail = ring->tail();
1974       do {
1975         CordRep* node = ring->entry_child(pos);
1976         assert(node->tag >= FLAT || node->tag == EXTERNAL);
1977         RepMemoryUsageLeaf(node, &total_mem_usage);
1978       } while ((pos = ring->advance(pos)) != tail);
1979     } else {
1980       // Since cur_node is not a leaf or a concat node it must be a substring.
1981       assert(cur_node->tag == SUBSTRING);
1982       total_mem_usage += sizeof(CordRepSubstring);
1983       next_node = cur_node->substring()->child;
1984       if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1985         next_node = nullptr;
1986       }
1987     }
1988 
1989     if (!next_node) {
1990       if (tree_stack.empty()) {
1991         return total_mem_usage;
1992       }
1993       next_node = tree_stack.back();
1994       tree_stack.pop_back();
1995     }
1996     cur_node = next_node;
1997   }
1998 }
1999 
operator <<(std::ostream & out,const Cord & cord)2000 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
2001   for (absl::string_view chunk : cord.Chunks()) {
2002     out.write(chunk.data(), chunk.size());
2003   }
2004   return out;
2005 }
2006 
2007 namespace strings_internal {
FlatOverhead()2008 size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; }
MaxFlatLength()2009 size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; }
FlatTagToLength(uint8_t tag)2010 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
2011   return cord_internal::TagToLength(tag);
2012 }
LengthToTag(size_t s)2013 uint8_t CordTestAccess::LengthToTag(size_t s) {
2014   ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
2015   return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead);
2016 }
SizeofCordRepConcat()2017 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()2018 size_t CordTestAccess::SizeofCordRepExternal() {
2019   return sizeof(CordRepExternal);
2020 }
SizeofCordRepSubstring()2021 size_t CordTestAccess::SizeofCordRepSubstring() {
2022   return sizeof(CordRepSubstring);
2023 }
2024 }  // namespace strings_internal
2025 ABSL_NAMESPACE_END
2026 }  // namespace absl
2027