• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/cord.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/cord_rep_btree.h"
40 #include "absl/strings/internal/cord_rep_flat.h"
41 #include "absl/strings/internal/cordz_statistics.h"
42 #include "absl/strings/internal/cordz_update_scope.h"
43 #include "absl/strings/internal/cordz_update_tracker.h"
44 #include "absl/strings/internal/resize_uninitialized.h"
45 #include "absl/strings/str_cat.h"
46 #include "absl/strings/str_format.h"
47 #include "absl/strings/str_join.h"
48 #include "absl/strings/string_view.h"
49 
50 namespace absl {
51 ABSL_NAMESPACE_BEGIN
52 
53 using ::absl::cord_internal::CordRep;
54 using ::absl::cord_internal::CordRepBtree;
55 using ::absl::cord_internal::CordRepConcat;
56 using ::absl::cord_internal::CordRepExternal;
57 using ::absl::cord_internal::CordRepFlat;
58 using ::absl::cord_internal::CordRepSubstring;
59 using ::absl::cord_internal::CordzUpdateTracker;
60 using ::absl::cord_internal::InlineData;
61 using ::absl::cord_internal::kMaxFlatLength;
62 using ::absl::cord_internal::kMinFlatLength;
63 
64 using ::absl::cord_internal::kInlinedVectorSize;
65 using ::absl::cord_internal::kMaxBytesToCopy;
66 
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)67 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
68   return n == 0 ? a : Fibonacci(n - 1, b, a + b);
69 }
70 
71 static_assert(Fibonacci(63) == 6557470319842,
72               "Fibonacci values computed incorrectly");
73 
74 // Minimum length required for a given depth tree -- a tree is considered
75 // balanced if
76 //      length(t) >= min_length[depth(t)]
77 // The root node depth is allowed to become twice as large to reduce rebalancing
78 // for larger strings (see IsRootBalanced).
79 static constexpr uint64_t min_length[] = {
80     Fibonacci(2),          Fibonacci(3),  Fibonacci(4),  Fibonacci(5),
81     Fibonacci(6),          Fibonacci(7),  Fibonacci(8),  Fibonacci(9),
82     Fibonacci(10),         Fibonacci(11), Fibonacci(12), Fibonacci(13),
83     Fibonacci(14),         Fibonacci(15), Fibonacci(16), Fibonacci(17),
84     Fibonacci(18),         Fibonacci(19), Fibonacci(20), Fibonacci(21),
85     Fibonacci(22),         Fibonacci(23), Fibonacci(24), Fibonacci(25),
86     Fibonacci(26),         Fibonacci(27), Fibonacci(28), Fibonacci(29),
87     Fibonacci(30),         Fibonacci(31), Fibonacci(32), Fibonacci(33),
88     Fibonacci(34),         Fibonacci(35), Fibonacci(36), Fibonacci(37),
89     Fibonacci(38),         Fibonacci(39), Fibonacci(40), Fibonacci(41),
90     Fibonacci(42),         Fibonacci(43), Fibonacci(44), Fibonacci(45),
91     Fibonacci(46),         Fibonacci(47),
92     0xffffffffffffffffull,  // Avoid overflow
93 };
94 
95 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
96 
btree_enabled()97 static inline bool btree_enabled() {
98   return cord_internal::cord_btree_enabled.load(
99       std::memory_order_relaxed);
100 }
101 
IsRootBalanced(CordRep * node)102 static inline bool IsRootBalanced(CordRep* node) {
103   if (!node->IsConcat()) {
104     return true;
105   } else if (node->concat()->depth() <= 15) {
106     return true;
107   } else if (node->concat()->depth() > kMinLengthSize) {
108     return false;
109   } else {
110     // Allow depth to become twice as large as implied by fibonacci rule to
111     // reduce rebalancing for larger strings.
112     return (node->length >= min_length[node->concat()->depth() / 2]);
113   }
114 }
115 
116 static CordRep* Rebalance(CordRep* node);
117 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
118                      int indent = 0);
119 static bool VerifyNode(CordRep* root, CordRep* start_node,
120                        bool full_validation);
121 
VerifyTree(CordRep * node)122 static inline CordRep* VerifyTree(CordRep* node) {
123   // Verification is expensive, so only do it in debug mode.
124   // Even in debug mode we normally do only light validation.
125   // If you are debugging Cord itself, you should define the
126   // macro EXTRA_CORD_VALIDATION, e.g. by adding
127   // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
128 #ifdef EXTRA_CORD_VALIDATION
129   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
130 #else   // EXTRA_CORD_VALIDATION
131   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
132 #endif  // EXTRA_CORD_VALIDATION
133   static_cast<void>(&VerifyNode);
134 
135   return node;
136 }
137 
138 // Return the depth of a node
Depth(const CordRep * rep)139 static int Depth(const CordRep* rep) {
140   if (rep->IsConcat()) {
141     return rep->concat()->depth();
142   } else {
143     return 0;
144   }
145 }
146 
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)147 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
148                               CordRep* right) {
149   concat->left = left;
150   concat->right = right;
151 
152   concat->length = left->length + right->length;
153   concat->set_depth(1 + std::max(Depth(left), Depth(right)));
154 }
155 
156 // Create a concatenation of the specified nodes.
157 // Does not change the refcounts of "left" and "right".
158 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)159 static CordRep* RawConcat(CordRep* left, CordRep* right) {
160   // Avoid making degenerate concat nodes (one child is empty)
161   if (left == nullptr) return right;
162   if (right == nullptr) return left;
163   if (left->length == 0) {
164     CordRep::Unref(left);
165     return right;
166   }
167   if (right->length == 0) {
168     CordRep::Unref(right);
169     return left;
170   }
171 
172   CordRepConcat* rep = new CordRepConcat();
173   rep->tag = cord_internal::CONCAT;
174   SetConcatChildren(rep, left, right);
175 
176   return rep;
177 }
178 
Concat(CordRep * left,CordRep * right)179 static CordRep* Concat(CordRep* left, CordRep* right) {
180   CordRep* rep = RawConcat(left, right);
181   if (rep != nullptr && !IsRootBalanced(rep)) {
182     rep = Rebalance(rep);
183   }
184   return VerifyTree(rep);
185 }
186 
187 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)188 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
189   // Make repeated passes over the array, merging adjacent pairs
190   // until we are left with just a single node.
191   while (n > 1) {
192     size_t dst = 0;
193     for (size_t src = 0; src < n; src += 2) {
194       if (src + 1 < n) {
195         reps[dst] = Concat(reps[src], reps[src + 1]);
196       } else {
197         reps[dst] = reps[src];
198       }
199       dst++;
200     }
201     n = dst;
202   }
203 
204   return reps[0];
205 }
206 
CreateFlat(const char * data,size_t length,size_t alloc_hint)207 static CordRepFlat* CreateFlat(const char* data, size_t length,
208                                size_t alloc_hint) {
209   CordRepFlat* flat = CordRepFlat::New(length + alloc_hint);
210   flat->length = length;
211   memcpy(flat->Data(), data, length);
212   return flat;
213 }
214 
215 // Creates a new flat or Btree out of the specified array.
216 // The returned node has a refcount of 1.
NewBtree(const char * data,size_t length,size_t alloc_hint)217 static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) {
218   if (length <= kMaxFlatLength) {
219     return CreateFlat(data, length, alloc_hint);
220   }
221   CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0);
222   data += kMaxFlatLength;
223   length -= kMaxFlatLength;
224   auto* root = CordRepBtree::Create(flat);
225   return CordRepBtree::Append(root, {data, length}, alloc_hint);
226 }
227 
228 // Create a new tree out of the specified array.
229 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)230 static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) {
231   if (length == 0) return nullptr;
232   if (btree_enabled()) {
233     return NewBtree(data, length, alloc_hint);
234   }
235   absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
236   size_t n = 0;
237   do {
238     const size_t len = std::min(length, kMaxFlatLength);
239     CordRepFlat* rep = CordRepFlat::New(len + alloc_hint);
240     rep->length = len;
241     memcpy(rep->Data(), data, len);
242     reps[n++] = VerifyTree(rep);
243     data += len;
244     length -= len;
245   } while (length != 0);
246   return MakeBalancedTree(reps.data(), n);
247 }
248 
249 namespace cord_internal {
250 
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)251 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
252   assert(!data.empty());
253   rep->length = data.size();
254   rep->tag = EXTERNAL;
255   rep->base = data.data();
256   VerifyTree(rep);
257 }
258 
259 }  // namespace cord_internal
260 
NewSubstring(CordRep * child,size_t offset,size_t length)261 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
262   // Never create empty substring nodes
263   if (length == 0) {
264     CordRep::Unref(child);
265     return nullptr;
266   } else {
267     CordRepSubstring* rep = new CordRepSubstring();
268     assert((offset + length) <= child->length);
269     rep->length = length;
270     rep->tag = cord_internal::SUBSTRING;
271     rep->start = offset;
272     rep->child = child;
273     return VerifyTree(rep);
274   }
275 }
276 
277 // Creates a CordRep from the provided string. If the string is large enough,
278 // and not wasteful, we move the string into an external cord rep, preserving
279 // the already allocated string contents.
280 // Requires the provided string length to be larger than `kMaxInline`.
CordRepFromString(std::string && src)281 static CordRep* CordRepFromString(std::string&& src) {
282   assert(src.length() > cord_internal::kMaxInline);
283   if (
284       // String is short: copy data to avoid external block overhead.
285       src.size() <= kMaxBytesToCopy ||
286       // String is wasteful: copy data to avoid pinning too much unused memory.
287       src.size() < src.capacity() / 2
288   ) {
289     return NewTree(src.data(), src.size(), 0);
290   }
291 
292   struct StringReleaser {
293     void operator()(absl::string_view /* data */) {}
294     std::string data;
295   };
296   const absl::string_view original_data = src;
297   auto* rep =
298       static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
299           absl::cord_internal::NewExternalRep(original_data,
300                                               StringReleaser{std::move(src)}));
301   // Moving src may have invalidated its data pointer, so adjust it.
302   rep->base = rep->template get<0>().data.data();
303   return rep;
304 }
305 
306 // --------------------------------------------------------------------
307 // Cord::InlineRep functions
308 
309 constexpr unsigned char Cord::InlineRep::kMaxInline;
310 
set_data(const char * data,size_t n,bool nullify_tail)311 inline void Cord::InlineRep::set_data(const char* data, size_t n,
312                                       bool nullify_tail) {
313   static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
314 
315   cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail);
316   set_inline_size(n);
317 }
318 
set_data(size_t n)319 inline char* Cord::InlineRep::set_data(size_t n) {
320   assert(n <= kMaxInline);
321   ResetToEmpty();
322   set_inline_size(n);
323   return data_.as_chars();
324 }
325 
reduce_size(size_t n)326 inline void Cord::InlineRep::reduce_size(size_t n) {
327   size_t tag = inline_size();
328   assert(tag <= kMaxInline);
329   assert(tag >= n);
330   tag -= n;
331   memset(data_.as_chars() + tag, 0, n);
332   set_inline_size(static_cast<char>(tag));
333 }
334 
remove_prefix(size_t n)335 inline void Cord::InlineRep::remove_prefix(size_t n) {
336   cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n,
337                               inline_size() - n);
338   reduce_size(n);
339 }
340 
341 // Returns `rep` converted into a CordRepBtree.
342 // Directly returns `rep` if `rep` is already a CordRepBtree.
ForceBtree(CordRep * rep)343 static CordRepBtree* ForceBtree(CordRep* rep) {
344   return rep->IsBtree() ? rep->btree() : CordRepBtree::Create(rep);
345 }
346 
AppendTreeToInlined(CordRep * tree,MethodIdentifier method)347 void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
348                                           MethodIdentifier method) {
349   assert(!is_tree());
350   if (!data_.is_empty()) {
351     CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
352     if (btree_enabled()) {
353       tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree);
354     } else {
355       tree = Concat(flat, tree);
356     }
357   }
358   EmplaceTree(tree, method);
359 }
360 
AppendTreeToTree(CordRep * tree,MethodIdentifier method)361 void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) {
362   assert(is_tree());
363   const CordzUpdateScope scope(data_.cordz_info(), method);
364   if (btree_enabled()) {
365     tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree);
366   } else {
367     tree = Concat(data_.as_tree(), tree);
368   }
369   SetTree(tree, scope);
370 }
371 
AppendTree(CordRep * tree,MethodIdentifier method)372 void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) {
373   if (tree == nullptr) return;
374   if (data_.is_tree()) {
375     AppendTreeToTree(tree, method);
376   } else {
377     AppendTreeToInlined(tree, method);
378   }
379 }
380 
PrependTreeToInlined(CordRep * tree,MethodIdentifier method)381 void Cord::InlineRep::PrependTreeToInlined(CordRep* tree,
382                                            MethodIdentifier method) {
383   assert(!is_tree());
384   if (!data_.is_empty()) {
385     CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
386     if (btree_enabled()) {
387       tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree);
388     } else {
389       tree = Concat(tree, flat);
390     }
391   }
392   EmplaceTree(tree, method);
393 }
394 
PrependTreeToTree(CordRep * tree,MethodIdentifier method)395 void Cord::InlineRep::PrependTreeToTree(CordRep* tree,
396                                         MethodIdentifier method) {
397   assert(is_tree());
398   const CordzUpdateScope scope(data_.cordz_info(), method);
399   if (btree_enabled()) {
400     tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree);
401   } else {
402     tree = Concat(tree, data_.as_tree());
403   }
404   SetTree(tree, scope);
405 }
406 
PrependTree(CordRep * tree,MethodIdentifier method)407 void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) {
408   assert(tree != nullptr);
409   if (data_.is_tree()) {
410     PrependTreeToTree(tree, method);
411   } else {
412     PrependTreeToInlined(tree, method);
413   }
414 }
415 
416 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
417 // suitable leaf is found, the function will update the length field for all
418 // nodes to account for the size increase. The append region address will be
419 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)420 static inline bool PrepareAppendRegion(CordRep* root, char** region,
421                                        size_t* size, size_t max_length) {
422   if (root->IsBtree() && root->refcount.IsOne()) {
423     Span<char> span = root->btree()->GetAppendBuffer(max_length);
424     if (!span.empty()) {
425       *region = span.data();
426       *size = span.size();
427       return true;
428     }
429   }
430 
431   // Search down the right-hand path for a non-full FLAT node.
432   CordRep* dst = root;
433   while (dst->IsConcat() && dst->refcount.IsOne()) {
434     dst = dst->concat()->right;
435   }
436 
437   if (!dst->IsFlat() || !dst->refcount.IsOne()) {
438     *region = nullptr;
439     *size = 0;
440     return false;
441   }
442 
443   const size_t in_use = dst->length;
444   const size_t capacity = dst->flat()->Capacity();
445   if (in_use == capacity) {
446     *region = nullptr;
447     *size = 0;
448     return false;
449   }
450 
451   size_t size_increase = std::min(capacity - in_use, max_length);
452 
453   // We need to update the length fields for all nodes, including the leaf node.
454   for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
455     rep->length += size_increase;
456   }
457   dst->length += size_increase;
458 
459   *region = dst->flat()->Data() + in_use;
460   *size = size_increase;
461   return true;
462 }
463 
464 template <bool has_length>
GetAppendRegion(char ** region,size_t * size,size_t length)465 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
466                                       size_t length) {
467   auto constexpr method = CordzUpdateTracker::kGetAppendRegion;
468 
469   CordRep* root = tree();
470   size_t sz = root ? root->length : inline_size();
471   if (root == nullptr) {
472     size_t available = kMaxInline - sz;
473     if (available >= (has_length ? length : 1)) {
474       *region = data_.as_chars() + sz;
475       *size = has_length ? length : available;
476       set_inline_size(has_length ? sz + length : kMaxInline);
477       return;
478     }
479   }
480 
481   size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength);
482   CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra);
483   CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method);
484   if (PrepareAppendRegion(rep, region, size, length)) {
485     CommitTree(root, rep, scope, method);
486     return;
487   }
488 
489   // Allocate new node.
490   CordRepFlat* new_node = CordRepFlat::New(extra);
491   new_node->length = std::min(new_node->Capacity(), length);
492   *region = new_node->Data();
493   *size = new_node->length;
494 
495   if (btree_enabled()) {
496     rep = CordRepBtree::Append(ForceBtree(rep), new_node);
497   } else {
498     rep = Concat(rep, new_node);
499   }
500   CommitTree(root, rep, scope, method);
501 }
502 
503 // Computes the memory side of the provided edge which must be a valid data edge
504 // for a btrtee, i.e., a FLAT, EXTERNAL or SUBSTRING of a FLAT or EXTERNAL node.
RepMemoryUsageDataEdge(const CordRep * rep,size_t * total_mem_usage)505 static bool RepMemoryUsageDataEdge(const CordRep* rep,
506                                    size_t* total_mem_usage) {
507   size_t maybe_sub_size = 0;
508   if (ABSL_PREDICT_FALSE(rep->IsSubstring())) {
509     maybe_sub_size = sizeof(cord_internal::CordRepSubstring);
510     rep = rep->substring()->child;
511   }
512   if (rep->IsFlat()) {
513     *total_mem_usage += maybe_sub_size + rep->flat()->AllocatedSize();
514     return true;
515   }
516   if (rep->IsExternal()) {
517     // We don't know anything about the embedded / bound data, but we can safely
518     // assume it is 'at least' a word / pointer to data. In the future we may
519     // choose to use the 'data' byte as a tag to identify the types of some
520     // well-known externals, such as a std::string instance.
521     *total_mem_usage += maybe_sub_size +
522                         sizeof(cord_internal::CordRepExternalImpl<intptr_t>) +
523                         rep->length;
524     return true;
525   }
526   return false;
527 }
528 
529 // If the rep is a leaf, this will increment the value at total_mem_usage and
530 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)531 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
532   if (rep->IsFlat()) {
533     *total_mem_usage += rep->flat()->AllocatedSize();
534     return true;
535   }
536   if (rep->IsExternal()) {
537     // We don't know anything about the embedded / bound data, but we can safely
538     // assume it is 'at least' a word / pointer to data. In the future we may
539     // choose to use the 'data' byte as a tag to identify the types of some
540     // well-known externals, such as a std::string instance.
541     *total_mem_usage +=
542         sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length;
543     return true;
544   }
545   return false;
546 }
547 
AssignSlow(const Cord::InlineRep & src)548 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
549   assert(&src != this);
550   assert(is_tree() || src.is_tree());
551   auto constexpr method = CordzUpdateTracker::kAssignCord;
552   if (ABSL_PREDICT_TRUE(!is_tree())) {
553     EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method);
554     return;
555   }
556 
557   CordRep* tree = as_tree();
558   if (CordRep* src_tree = src.tree()) {
559     // Leave any existing `cordz_info` in place, and let MaybeTrackCord()
560     // decide if this cord should be (or remains to be) sampled or not.
561     data_.set_tree(CordRep::Ref(src_tree));
562     CordzInfo::MaybeTrackCord(data_, src.data_, method);
563   } else {
564     CordzInfo::MaybeUntrackCord(data_.cordz_info());
565     data_ = src.data_;
566   }
567   CordRep::Unref(tree);
568 }
569 
UnrefTree()570 void Cord::InlineRep::UnrefTree() {
571   if (is_tree()) {
572     CordzInfo::MaybeUntrackCord(data_.cordz_info());
573     CordRep::Unref(tree());
574   }
575 }
576 
577 // --------------------------------------------------------------------
578 // Constructors and destructors
579 
Cord(absl::string_view src,MethodIdentifier method)580 Cord::Cord(absl::string_view src, MethodIdentifier method)
581     : contents_(InlineData::kDefaultInit) {
582   const size_t n = src.size();
583   if (n <= InlineRep::kMaxInline) {
584     contents_.set_data(src.data(), n, true);
585   } else {
586     CordRep* rep = NewTree(src.data(), n, 0);
587     contents_.EmplaceTree(rep, method);
588   }
589 }
590 
591 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)592 Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) {
593   if (src.size() <= InlineRep::kMaxInline) {
594     contents_.set_data(src.data(), src.size(), true);
595   } else {
596     CordRep* rep = CordRepFromString(std::forward<T>(src));
597     contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
598   }
599 }
600 
601 template Cord::Cord(std::string&& src);
602 
603 // The destruction code is separate so that the compiler can determine
604 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()605 void Cord::DestroyCordSlow() {
606   assert(contents_.is_tree());
607   CordzInfo::MaybeUntrackCord(contents_.cordz_info());
608   CordRep::Unref(VerifyTree(contents_.as_tree()));
609 }
610 
611 // --------------------------------------------------------------------
612 // Mutators
613 
Clear()614 void Cord::Clear() {
615   if (CordRep* tree = contents_.clear()) {
616     CordRep::Unref(tree);
617   }
618 }
619 
AssignLargeString(std::string && src)620 Cord& Cord::AssignLargeString(std::string&& src) {
621   auto constexpr method = CordzUpdateTracker::kAssignString;
622   assert(src.size() > kMaxBytesToCopy);
623   CordRep* rep = CordRepFromString(std::move(src));
624   if (CordRep* tree = contents_.tree()) {
625     CordzUpdateScope scope(contents_.cordz_info(), method);
626     contents_.SetTree(rep, scope);
627     CordRep::Unref(tree);
628   } else {
629     contents_.EmplaceTree(rep, method);
630   }
631   return *this;
632 }
633 
operator =(absl::string_view src)634 Cord& Cord::operator=(absl::string_view src) {
635   auto constexpr method = CordzUpdateTracker::kAssignString;
636   const char* data = src.data();
637   size_t length = src.size();
638   CordRep* tree = contents_.tree();
639   if (length <= InlineRep::kMaxInline) {
640     // Embed into this->contents_, which is somewhat subtle:
641     // - MaybeUntrackCord must be called before Unref(tree).
642     // - MaybeUntrackCord must be called before set_data() clobbers cordz_info.
643     // - set_data() must be called before Unref(tree) as it may reference tree.
644     if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info());
645     contents_.set_data(data, length, true);
646     if (tree != nullptr) CordRep::Unref(tree);
647     return *this;
648   }
649   if (tree != nullptr) {
650     CordzUpdateScope scope(contents_.cordz_info(), method);
651     if (tree->IsFlat() && tree->flat()->Capacity() >= length &&
652         tree->refcount.IsOne()) {
653       // Copy in place if the existing FLAT node is reusable.
654       memmove(tree->flat()->Data(), data, length);
655       tree->length = length;
656       VerifyTree(tree);
657       return *this;
658     }
659     contents_.SetTree(NewTree(data, length, 0), scope);
660     CordRep::Unref(tree);
661   } else {
662     contents_.EmplaceTree(NewTree(data, length, 0), method);
663   }
664   return *this;
665 }
666 
667 // TODO(sanjay): Move to Cord::InlineRep section of file.  For now,
668 // we keep it here to make diffs easier.
AppendArray(absl::string_view src,MethodIdentifier method)669 void Cord::InlineRep::AppendArray(absl::string_view src,
670                                   MethodIdentifier method) {
671   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
672 
673   size_t appended = 0;
674   CordRep* rep = tree();
675   const CordRep* const root = rep;
676   CordzUpdateScope scope(root ? cordz_info() : nullptr, method);
677   if (root != nullptr) {
678     char* region;
679     if (PrepareAppendRegion(rep, &region, &appended, src.size())) {
680       memcpy(region, src.data(), appended);
681     }
682   } else {
683     // Try to fit in the inline buffer if possible.
684     size_t inline_length = inline_size();
685     if (src.size() <= kMaxInline - inline_length) {
686       // Append new data to embedded array
687       memcpy(data_.as_chars() + inline_length, src.data(), src.size());
688       set_inline_size(inline_length + src.size());
689       return;
690     }
691 
692     // Note: we don't concern ourselves if src aliases data stored in the
693     // inlined data of 'this',  as we update the InlineData only at the end.
694     // We are going from an inline size to beyond inline size. Make the new size
695     // either double the inlined size, or the added size + 10%.
696     const size_t size1 = inline_length * 2 + src.size();
697     const size_t size2 = inline_length + src.size() / 10;
698     rep = CordRepFlat::New(std::max<size_t>(size1, size2));
699     appended = std::min(src.size(), rep->flat()->Capacity() - inline_length);
700     memcpy(rep->flat()->Data(), data_.as_chars(), inline_length);
701     memcpy(rep->flat()->Data() + inline_length, src.data(), appended);
702     rep->length = inline_length + appended;
703   }
704 
705   src.remove_prefix(appended);
706   if (src.empty()) {
707     CommitTree(root, rep, scope, method);
708     return;
709   }
710 
711   if (btree_enabled()) {
712     // TODO(b/192061034): keep legacy 10% growth rate: consider other rates.
713     rep = ForceBtree(rep);
714     const size_t alloc_hint = (std::min)(kMaxFlatLength, rep->length / 10);
715     rep = CordRepBtree::Append(rep->btree(), src, alloc_hint);
716   } else {
717     // Use new block(s) for any remaining bytes that were not handled above.
718     // Alloc extra memory only if the right child of the root of the new tree
719     // is going to be a FLAT node, which will permit further inplace appends.
720     size_t length = src.size();
721     if (src.size() < kMaxFlatLength) {
722       // The new length is either
723       // - old size + 10%
724       // - old_size + src.size()
725       // This will cause a reasonable conservative step-up in size that is
726       // still large enough to avoid excessive amounts of small fragments
727       // being added.
728       length = std::max<size_t>(rep->length / 10, src.size());
729     }
730     rep = Concat(rep, NewTree(src.data(), src.size(), length - src.size()));
731   }
732   CommitTree(root, rep, scope, method);
733 }
734 
TakeRep() const735 inline CordRep* Cord::TakeRep() const& {
736   return CordRep::Ref(contents_.tree());
737 }
738 
TakeRep()739 inline CordRep* Cord::TakeRep() && {
740   CordRep* rep = contents_.tree();
741   contents_.clear();
742   return rep;
743 }
744 
745 template <typename C>
AppendImpl(C && src)746 inline void Cord::AppendImpl(C&& src) {
747   auto constexpr method = CordzUpdateTracker::kAppendCord;
748   if (empty()) {
749     // Since destination is empty, we can avoid allocating a node,
750     if (src.contents_.is_tree()) {
751       // by taking the tree directly
752       CordRep* rep = std::forward<C>(src).TakeRep();
753       contents_.EmplaceTree(rep, method);
754     } else {
755       // or copying over inline data
756       contents_.data_ = src.contents_.data_;
757     }
758     return;
759   }
760 
761   // For short cords, it is faster to copy data if there is room in dst.
762   const size_t src_size = src.contents_.size();
763   if (src_size <= kMaxBytesToCopy) {
764     CordRep* src_tree = src.contents_.tree();
765     if (src_tree == nullptr) {
766       // src has embedded data.
767       contents_.AppendArray({src.contents_.data(), src_size}, method);
768       return;
769     }
770     if (src_tree->IsFlat()) {
771       // src tree just has one flat node.
772       contents_.AppendArray({src_tree->flat()->Data(), src_size}, method);
773       return;
774     }
775     if (&src == this) {
776       // ChunkIterator below assumes that src is not modified during traversal.
777       Append(Cord(src));
778       return;
779     }
780     // TODO(mec): Should we only do this if "dst" has space?
781     for (absl::string_view chunk : src.Chunks()) {
782       Append(chunk);
783     }
784     return;
785   }
786 
787   // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize)
788   CordRep* rep = std::forward<C>(src).TakeRep();
789   contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord);
790 }
791 
Append(const Cord & src)792 void Cord::Append(const Cord& src) {
793   AppendImpl(src);
794 }
795 
Append(Cord && src)796 void Cord::Append(Cord&& src) {
797   AppendImpl(std::move(src));
798 }
799 
800 template <typename T, Cord::EnableIfString<T>>
Append(T && src)801 void Cord::Append(T&& src) {
802   if (src.size() <= kMaxBytesToCopy) {
803     Append(absl::string_view(src));
804   } else {
805     CordRep* rep = CordRepFromString(std::forward<T>(src));
806     contents_.AppendTree(rep, CordzUpdateTracker::kAppendString);
807   }
808 }
809 
810 template void Cord::Append(std::string&& src);
811 
Prepend(const Cord & src)812 void Cord::Prepend(const Cord& src) {
813   CordRep* src_tree = src.contents_.tree();
814   if (src_tree != nullptr) {
815     CordRep::Ref(src_tree);
816     contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord);
817     return;
818   }
819 
820   // `src` cord is inlined.
821   absl::string_view src_contents(src.contents_.data(), src.contents_.size());
822   return Prepend(src_contents);
823 }
824 
Prepend(absl::string_view src)825 void Cord::Prepend(absl::string_view src) {
826   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
827   if (!contents_.is_tree()) {
828     size_t cur_size = contents_.inline_size();
829     if (cur_size + src.size() <= InlineRep::kMaxInline) {
830       // Use embedded storage.
831       char data[InlineRep::kMaxInline + 1] = {0};
832       memcpy(data, src.data(), src.size());
833       memcpy(data + src.size(), contents_.data(), cur_size);
834       memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1);
835       contents_.set_inline_size(cur_size + src.size());
836       return;
837     }
838   }
839   CordRep* rep = NewTree(src.data(), src.size(), 0);
840   contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
841 }
842 
843 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)844 inline void Cord::Prepend(T&& src) {
845   if (src.size() <= kMaxBytesToCopy) {
846     Prepend(absl::string_view(src));
847   } else {
848     CordRep* rep = CordRepFromString(std::forward<T>(src));
849     contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
850   }
851 }
852 
853 template void Cord::Prepend(std::string&& src);
854 
RemovePrefixFrom(CordRep * node,size_t n)855 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
856   if (n >= node->length) return nullptr;
857   if (n == 0) return CordRep::Ref(node);
858   absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
859 
860   while (node->IsConcat()) {
861     assert(n <= node->length);
862     if (n < node->concat()->left->length) {
863       // Push right to stack, descend left.
864       rhs_stack.push_back(node->concat()->right);
865       node = node->concat()->left;
866     } else {
867       // Drop left, descend right.
868       n -= node->concat()->left->length;
869       node = node->concat()->right;
870     }
871   }
872   assert(n <= node->length);
873 
874   if (n == 0) {
875     CordRep::Ref(node);
876   } else {
877     size_t start = n;
878     size_t len = node->length - n;
879     if (node->IsSubstring()) {
880       // Consider in-place update of node, similar to in RemoveSuffixFrom().
881       start += node->substring()->start;
882       node = node->substring()->child;
883     }
884     node = NewSubstring(CordRep::Ref(node), start, len);
885   }
886   while (!rhs_stack.empty()) {
887     node = Concat(node, CordRep::Ref(rhs_stack.back()));
888     rhs_stack.pop_back();
889   }
890   return node;
891 }
892 
893 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
894 // exception that removing a suffix has an optimization where a node may be
895 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)896 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
897   if (n >= node->length) return nullptr;
898   if (n == 0) return CordRep::Ref(node);
899   absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
900   bool inplace_ok = node->refcount.IsOne();
901 
902   while (node->IsConcat()) {
903     assert(n <= node->length);
904     if (n < node->concat()->right->length) {
905       // Push left to stack, descend right.
906       lhs_stack.push_back(node->concat()->left);
907       node = node->concat()->right;
908     } else {
909       // Drop right, descend left.
910       n -= node->concat()->right->length;
911       node = node->concat()->left;
912     }
913     inplace_ok = inplace_ok && node->refcount.IsOne();
914   }
915   assert(n <= node->length);
916 
917   if (n == 0) {
918     CordRep::Ref(node);
919   } else if (inplace_ok && !node->IsExternal()) {
920     // Consider making a new buffer if the current node capacity is much
921     // larger than the new length.
922     CordRep::Ref(node);
923     node->length -= n;
924   } else {
925     size_t start = 0;
926     size_t len = node->length - n;
927     if (node->IsSubstring()) {
928       start = node->substring()->start;
929       node = node->substring()->child;
930     }
931     node = NewSubstring(CordRep::Ref(node), start, len);
932   }
933   while (!lhs_stack.empty()) {
934     node = Concat(CordRep::Ref(lhs_stack.back()), node);
935     lhs_stack.pop_back();
936   }
937   return node;
938 }
939 
RemovePrefix(size_t n)940 void Cord::RemovePrefix(size_t n) {
941   ABSL_INTERNAL_CHECK(n <= size(),
942                       absl::StrCat("Requested prefix size ", n,
943                                    " exceeds Cord's size ", size()));
944   CordRep* tree = contents_.tree();
945   if (tree == nullptr) {
946     contents_.remove_prefix(n);
947   } else {
948     auto constexpr method = CordzUpdateTracker::kRemovePrefix;
949     CordzUpdateScope scope(contents_.cordz_info(), method);
950     if (tree->IsBtree()) {
951       CordRep* old = tree;
952       tree = tree->btree()->SubTree(n, tree->length - n);
953       CordRep::Unref(old);
954     } else {
955       CordRep* newrep = RemovePrefixFrom(tree, n);
956       CordRep::Unref(tree);
957       tree = VerifyTree(newrep);
958     }
959     contents_.SetTreeOrEmpty(tree, scope);
960   }
961 }
962 
RemoveSuffix(size_t n)963 void Cord::RemoveSuffix(size_t n) {
964   ABSL_INTERNAL_CHECK(n <= size(),
965                       absl::StrCat("Requested suffix size ", n,
966                                    " exceeds Cord's size ", size()));
967   CordRep* tree = contents_.tree();
968   if (tree == nullptr) {
969     contents_.reduce_size(n);
970   } else {
971     auto constexpr method = CordzUpdateTracker::kRemoveSuffix;
972     CordzUpdateScope scope(contents_.cordz_info(), method);
973     if (tree->IsBtree()) {
974       CordRep* old = tree;
975       tree = tree->btree()->SubTree(0, tree->length - n);
976       CordRep::Unref(old);
977     } else {
978       CordRep* newrep = RemoveSuffixFrom(tree, n);
979       CordRep::Unref(tree);
980       tree = VerifyTree(newrep);
981     }
982     contents_.SetTreeOrEmpty(tree, scope);
983   }
984 }
985 
986 // Work item for NewSubRange().
987 struct SubRange {
SubRangeabsl::SubRange988   SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
989       : node(a_node), pos(a_pos), n(a_n) {}
990   CordRep* node;  // nullptr means concat last 2 results.
991   size_t pos;
992   size_t n;
993 };
994 
NewSubRange(CordRep * node,size_t pos,size_t n)995 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
996   absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
997   absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
998   todo.push_back(SubRange(node, pos, n));
999   do {
1000     const SubRange& sr = todo.back();
1001     node = sr.node;
1002     pos = sr.pos;
1003     n = sr.n;
1004     todo.pop_back();
1005 
1006     if (node == nullptr) {
1007       assert(results.size() >= 2);
1008       CordRep* right = results.back();
1009       results.pop_back();
1010       CordRep* left = results.back();
1011       results.pop_back();
1012       results.push_back(Concat(left, right));
1013     } else if (pos == 0 && n == node->length) {
1014       results.push_back(CordRep::Ref(node));
1015     } else if (!node->IsConcat()) {
1016       if (node->IsSubstring()) {
1017         pos += node->substring()->start;
1018         node = node->substring()->child;
1019       }
1020       results.push_back(NewSubstring(CordRep::Ref(node), pos, n));
1021     } else if (pos + n <= node->concat()->left->length) {
1022       todo.push_back(SubRange(node->concat()->left, pos, n));
1023     } else if (pos >= node->concat()->left->length) {
1024       pos -= node->concat()->left->length;
1025       todo.push_back(SubRange(node->concat()->right, pos, n));
1026     } else {
1027       size_t left_n = node->concat()->left->length - pos;
1028       todo.push_back(SubRange(nullptr, 0, 0));  // Concat()
1029       todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1030       todo.push_back(SubRange(node->concat()->left, pos, left_n));
1031     }
1032   } while (!todo.empty());
1033   assert(results.size() == 1);
1034   return results[0];
1035 }
1036 
Subcord(size_t pos,size_t new_size) const1037 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1038   Cord sub_cord;
1039   size_t length = size();
1040   if (pos > length) pos = length;
1041   if (new_size > length - pos) new_size = length - pos;
1042   if (new_size == 0) return sub_cord;
1043 
1044   CordRep* tree = contents_.tree();
1045   if (tree == nullptr) {
1046     // sub_cord is newly constructed, no need to re-zero-out the tail of
1047     // contents_ memory.
1048     sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1049     return sub_cord;
1050   }
1051 
1052   if (new_size <= InlineRep::kMaxInline) {
1053     char* dest = sub_cord.contents_.data_.as_chars();
1054     Cord::ChunkIterator it = chunk_begin();
1055     it.AdvanceBytes(pos);
1056     size_t remaining_size = new_size;
1057     while (remaining_size > it->size()) {
1058       cord_internal::SmallMemmove(dest, it->data(), it->size());
1059       remaining_size -= it->size();
1060       dest += it->size();
1061       ++it;
1062     }
1063     cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1064     sub_cord.contents_.set_inline_size(new_size);
1065     return sub_cord;
1066   }
1067 
1068   if (tree->IsBtree()) {
1069     tree = tree->btree()->SubTree(pos, new_size);
1070   } else {
1071     tree = NewSubRange(tree, pos, new_size);
1072   }
1073   sub_cord.contents_.EmplaceTree(tree, contents_.data_,
1074                                  CordzUpdateTracker::kSubCord);
1075   return sub_cord;
1076 }
1077 
1078 // --------------------------------------------------------------------
1079 // Balancing
1080 
1081 class CordForest {
1082  public:
CordForest(size_t length)1083   explicit CordForest(size_t length)
1084       : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1085 
Build(CordRep * cord_root)1086   void Build(CordRep* cord_root) {
1087     std::vector<CordRep*> pending = {cord_root};
1088 
1089     while (!pending.empty()) {
1090       CordRep* node = pending.back();
1091       pending.pop_back();
1092       CheckNode(node);
1093       if (ABSL_PREDICT_FALSE(!node->IsConcat())) {
1094         AddNode(node);
1095         continue;
1096       }
1097 
1098       CordRepConcat* concat_node = node->concat();
1099       if (concat_node->depth() >= kMinLengthSize ||
1100           concat_node->length < min_length[concat_node->depth()]) {
1101         pending.push_back(concat_node->right);
1102         pending.push_back(concat_node->left);
1103 
1104         if (concat_node->refcount.IsOne()) {
1105           concat_node->left = concat_freelist_;
1106           concat_freelist_ = concat_node;
1107         } else {
1108           CordRep::Ref(concat_node->right);
1109           CordRep::Ref(concat_node->left);
1110           CordRep::Unref(concat_node);
1111         }
1112       } else {
1113         AddNode(node);
1114       }
1115     }
1116   }
1117 
ConcatNodes()1118   CordRep* ConcatNodes() {
1119     CordRep* sum = nullptr;
1120     for (auto* node : trees_) {
1121       if (node == nullptr) continue;
1122 
1123       sum = PrependNode(node, sum);
1124       root_length_ -= node->length;
1125       if (root_length_ == 0) break;
1126     }
1127     ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1128     return VerifyTree(sum);
1129   }
1130 
1131  private:
AppendNode(CordRep * node,CordRep * sum)1132   CordRep* AppendNode(CordRep* node, CordRep* sum) {
1133     return (sum == nullptr) ? node : MakeConcat(sum, node);
1134   }
1135 
PrependNode(CordRep * node,CordRep * sum)1136   CordRep* PrependNode(CordRep* node, CordRep* sum) {
1137     return (sum == nullptr) ? node : MakeConcat(node, sum);
1138   }
1139 
AddNode(CordRep * node)1140   void AddNode(CordRep* node) {
1141     CordRep* sum = nullptr;
1142 
1143     // Collect together everything with which we will merge with node
1144     int i = 0;
1145     for (; node->length > min_length[i + 1]; ++i) {
1146       auto& tree_at_i = trees_[i];
1147 
1148       if (tree_at_i == nullptr) continue;
1149       sum = PrependNode(tree_at_i, sum);
1150       tree_at_i = nullptr;
1151     }
1152 
1153     sum = AppendNode(node, sum);
1154 
1155     // Insert sum into appropriate place in the forest
1156     for (; sum->length >= min_length[i]; ++i) {
1157       auto& tree_at_i = trees_[i];
1158       if (tree_at_i == nullptr) continue;
1159 
1160       sum = MakeConcat(tree_at_i, sum);
1161       tree_at_i = nullptr;
1162     }
1163 
1164     // min_length[0] == 1, which means sum->length >= min_length[0]
1165     assert(i > 0);
1166     trees_[i - 1] = sum;
1167   }
1168 
1169   // Make concat node trying to resue existing CordRepConcat nodes we
1170   // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1171   CordRep* MakeConcat(CordRep* left, CordRep* right) {
1172     if (concat_freelist_ == nullptr) return RawConcat(left, right);
1173 
1174     CordRepConcat* rep = concat_freelist_;
1175     if (concat_freelist_->left == nullptr) {
1176       concat_freelist_ = nullptr;
1177     } else {
1178       concat_freelist_ = concat_freelist_->left->concat();
1179     }
1180     SetConcatChildren(rep, left, right);
1181 
1182     return rep;
1183   }
1184 
CheckNode(CordRep * node)1185   static void CheckNode(CordRep* node) {
1186     ABSL_INTERNAL_CHECK(node->length != 0u, "");
1187     if (node->IsConcat()) {
1188       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1189       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1190       ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1191                                            node->concat()->right->length),
1192                           "");
1193     }
1194   }
1195 
1196   size_t root_length_;
1197 
1198   // use an inlined vector instead of a flat array to get bounds checking
1199   absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1200 
1201   // List of concat nodes we can re-use for Cord balancing.
1202   CordRepConcat* concat_freelist_ = nullptr;
1203 };
1204 
Rebalance(CordRep * node)1205 static CordRep* Rebalance(CordRep* node) {
1206   VerifyTree(node);
1207   assert(node->IsConcat());
1208 
1209   if (node->length == 0) {
1210     return nullptr;
1211   }
1212 
1213   CordForest forest(node->length);
1214   forest.Build(node);
1215   return forest.ConcatNodes();
1216 }
1217 
1218 // --------------------------------------------------------------------
1219 // Comparators
1220 
1221 namespace {
1222 
ClampResult(int memcmp_res)1223 int ClampResult(int memcmp_res) {
1224   return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1225 }
1226 
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1227 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1228                   size_t* size_to_compare) {
1229   size_t compared_size = std::min(lhs->size(), rhs->size());
1230   assert(*size_to_compare >= compared_size);
1231   *size_to_compare -= compared_size;
1232 
1233   int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1234   if (memcmp_res != 0) return memcmp_res;
1235 
1236   lhs->remove_prefix(compared_size);
1237   rhs->remove_prefix(compared_size);
1238 
1239   return 0;
1240 }
1241 
1242 // This overload set computes comparison results from memcmp result. This
1243 // interface is used inside GenericCompare below. Differet implementations
1244 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1245 // set. For bool we just interested in "value == 0".
1246 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1247 ResultType ComputeCompareResult(int memcmp_res) {
1248   return ClampResult(memcmp_res);
1249 }
1250 template <>
ComputeCompareResult(int memcmp_res)1251 bool ComputeCompareResult<bool>(int memcmp_res) {
1252   return memcmp_res == 0;
1253 }
1254 
1255 }  // namespace
1256 
1257 // Helper routine. Locates the first flat or external chunk of the Cord without
1258 // initializing the iterator, and returns a string_view referencing the data.
FindFlatStartPiece() const1259 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1260   if (!is_tree()) {
1261     return absl::string_view(data_.as_chars(), data_.inline_size());
1262   }
1263 
1264   CordRep* node = tree();
1265   if (node->IsFlat()) {
1266     return absl::string_view(node->flat()->Data(), node->length);
1267   }
1268 
1269   if (node->IsExternal()) {
1270     return absl::string_view(node->external()->base, node->length);
1271   }
1272 
1273   if (node->IsBtree()) {
1274     CordRepBtree* tree = node->btree();
1275     int height = tree->height();
1276     while (--height >= 0) {
1277       tree = tree->Edge(CordRepBtree::kFront)->btree();
1278     }
1279     return tree->Data(tree->begin());
1280   }
1281 
1282   // Walk down the left branches until we hit a non-CONCAT node.
1283   while (node->IsConcat()) {
1284     node = node->concat()->left;
1285   }
1286 
1287   // Get the child node if we encounter a SUBSTRING.
1288   size_t offset = 0;
1289   size_t length = node->length;
1290   assert(length != 0);
1291 
1292   if (node->IsSubstring()) {
1293     offset = node->substring()->start;
1294     node = node->substring()->child;
1295   }
1296 
1297   if (node->IsFlat()) {
1298     return absl::string_view(node->flat()->Data() + offset, length);
1299   }
1300 
1301   assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here");
1302 
1303   return absl::string_view(node->external()->base + offset, length);
1304 }
1305 
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1306 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1307                                  size_t size_to_compare) const {
1308   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1309     if (!chunk->empty()) return true;
1310     ++*it;
1311     if (it->bytes_remaining_ == 0) return false;
1312     *chunk = **it;
1313     return true;
1314   };
1315 
1316   Cord::ChunkIterator lhs_it = chunk_begin();
1317 
1318   // compared_size is inside first chunk.
1319   absl::string_view lhs_chunk =
1320       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1321   assert(compared_size <= lhs_chunk.size());
1322   assert(compared_size <= rhs.size());
1323   lhs_chunk.remove_prefix(compared_size);
1324   rhs.remove_prefix(compared_size);
1325   size_to_compare -= compared_size;  // skip already compared size.
1326 
1327   while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1328     int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1329     if (comparison_result != 0) return comparison_result;
1330     if (size_to_compare == 0) return 0;
1331   }
1332 
1333   return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1334 }
1335 
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1336 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1337                                  size_t size_to_compare) const {
1338   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1339     if (!chunk->empty()) return true;
1340     ++*it;
1341     if (it->bytes_remaining_ == 0) return false;
1342     *chunk = **it;
1343     return true;
1344   };
1345 
1346   Cord::ChunkIterator lhs_it = chunk_begin();
1347   Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1348 
1349   // compared_size is inside both first chunks.
1350   absl::string_view lhs_chunk =
1351       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1352   absl::string_view rhs_chunk =
1353       (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1354   assert(compared_size <= lhs_chunk.size());
1355   assert(compared_size <= rhs_chunk.size());
1356   lhs_chunk.remove_prefix(compared_size);
1357   rhs_chunk.remove_prefix(compared_size);
1358   size_to_compare -= compared_size;  // skip already compared size.
1359 
1360   while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1361     int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1362     if (memcmp_res != 0) return memcmp_res;
1363     if (size_to_compare == 0) return 0;
1364   }
1365 
1366   return static_cast<int>(rhs_chunk.empty()) -
1367          static_cast<int>(lhs_chunk.empty());
1368 }
1369 
GetFirstChunk(const Cord & c)1370 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1371   return c.contents_.FindFlatStartPiece();
1372 }
GetFirstChunk(absl::string_view sv)1373 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1374   return sv;
1375 }
1376 
1377 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1378 // that 'size_to_compare' is greater that size of smallest of first chunks.
1379 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1380 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1381                           size_t size_to_compare) {
1382   absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1383   absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1384 
1385   size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1386   assert(size_to_compare >= compared_size);
1387   int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1388   if (compared_size == size_to_compare || memcmp_res != 0) {
1389     return ComputeCompareResult<ResultType>(memcmp_res);
1390   }
1391 
1392   return ComputeCompareResult<ResultType>(
1393       lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1394 }
1395 
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1396 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1397   return GenericCompare<bool>(*this, rhs, size_to_compare);
1398 }
1399 
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1400 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1401   return GenericCompare<bool>(*this, rhs, size_to_compare);
1402 }
1403 
1404 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1405 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1406   size_t lhs_size = lhs.size();
1407   size_t rhs_size = rhs.size();
1408   if (lhs_size == rhs_size) {
1409     return GenericCompare<int>(lhs, rhs, lhs_size);
1410   }
1411   if (lhs_size < rhs_size) {
1412     auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1413     return data_comp_res == 0 ? -1 : data_comp_res;
1414   }
1415 
1416   auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1417   return data_comp_res == 0 ? +1 : data_comp_res;
1418 }
1419 
Compare(absl::string_view rhs) const1420 int Cord::Compare(absl::string_view rhs) const {
1421   return SharedCompareImpl(*this, rhs);
1422 }
1423 
CompareImpl(const Cord & rhs) const1424 int Cord::CompareImpl(const Cord& rhs) const {
1425   return SharedCompareImpl(*this, rhs);
1426 }
1427 
EndsWith(absl::string_view rhs) const1428 bool Cord::EndsWith(absl::string_view rhs) const {
1429   size_t my_size = size();
1430   size_t rhs_size = rhs.size();
1431 
1432   if (my_size < rhs_size) return false;
1433 
1434   Cord tmp(*this);
1435   tmp.RemovePrefix(my_size - rhs_size);
1436   return tmp.EqualsImpl(rhs, rhs_size);
1437 }
1438 
EndsWith(const Cord & rhs) const1439 bool Cord::EndsWith(const Cord& rhs) const {
1440   size_t my_size = size();
1441   size_t rhs_size = rhs.size();
1442 
1443   if (my_size < rhs_size) return false;
1444 
1445   Cord tmp(*this);
1446   tmp.RemovePrefix(my_size - rhs_size);
1447   return tmp.EqualsImpl(rhs, rhs_size);
1448 }
1449 
1450 // --------------------------------------------------------------------
1451 // Misc.
1452 
operator std::string() const1453 Cord::operator std::string() const {
1454   std::string s;
1455   absl::CopyCordToString(*this, &s);
1456   return s;
1457 }
1458 
CopyCordToString(const Cord & src,std::string * dst)1459 void CopyCordToString(const Cord& src, std::string* dst) {
1460   if (!src.contents_.is_tree()) {
1461     src.contents_.CopyTo(dst);
1462   } else {
1463     absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1464     src.CopyToArraySlowPath(&(*dst)[0]);
1465   }
1466 }
1467 
CopyToArraySlowPath(char * dst) const1468 void Cord::CopyToArraySlowPath(char* dst) const {
1469   assert(contents_.is_tree());
1470   absl::string_view fragment;
1471   if (GetFlatAux(contents_.tree(), &fragment)) {
1472     memcpy(dst, fragment.data(), fragment.size());
1473     return;
1474   }
1475   for (absl::string_view chunk : Chunks()) {
1476     memcpy(dst, chunk.data(), chunk.size());
1477     dst += chunk.size();
1478   }
1479 }
1480 
AdvanceStack()1481 Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
1482   auto& stack_of_right_children = stack_of_right_children_;
1483   if (stack_of_right_children.empty()) {
1484     assert(!current_chunk_.empty());  // Called on invalid iterator.
1485     // We have reached the end of the Cord.
1486     return *this;
1487   }
1488 
1489   // Process the next node on the stack.
1490   CordRep* node = stack_of_right_children.back();
1491   stack_of_right_children.pop_back();
1492 
1493   // Walk down the left branches until we hit a non-CONCAT node. Save the
1494   // right children to the stack for subsequent traversal.
1495   while (node->IsConcat()) {
1496     stack_of_right_children.push_back(node->concat()->right);
1497     node = node->concat()->left;
1498   }
1499 
1500   // Get the child node if we encounter a SUBSTRING.
1501   size_t offset = 0;
1502   size_t length = node->length;
1503   if (node->IsSubstring()) {
1504     offset = node->substring()->start;
1505     node = node->substring()->child;
1506   }
1507 
1508   assert(node->IsExternal() || node->IsFlat());
1509   assert(length != 0);
1510   const char* data =
1511       node->IsExternal() ? node->external()->base : node->flat()->Data();
1512   current_chunk_ = absl::string_view(data + offset, length);
1513   current_leaf_ = node;
1514   return *this;
1515 }
1516 
AdvanceAndReadBytes(size_t n)1517 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1518   ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1519                         "Attempted to iterate past `end()`");
1520   Cord subcord;
1521   auto constexpr method = CordzUpdateTracker::kCordReader;
1522 
1523   if (n <= InlineRep::kMaxInline) {
1524     // Range to read fits in inline data. Flatten it.
1525     char* data = subcord.contents_.set_data(n);
1526     while (n > current_chunk_.size()) {
1527       memcpy(data, current_chunk_.data(), current_chunk_.size());
1528       data += current_chunk_.size();
1529       n -= current_chunk_.size();
1530       ++*this;
1531     }
1532     memcpy(data, current_chunk_.data(), n);
1533     if (n < current_chunk_.size()) {
1534       RemoveChunkPrefix(n);
1535     } else if (n > 0) {
1536       ++*this;
1537     }
1538     return subcord;
1539   }
1540 
1541   if (btree_reader_) {
1542     size_t chunk_size = current_chunk_.size();
1543     if (n <= chunk_size && n <= kMaxBytesToCopy) {
1544       subcord = Cord(current_chunk_.substr(0, n), method);
1545       if (n < chunk_size) {
1546         current_chunk_.remove_prefix(n);
1547       } else {
1548         current_chunk_ = btree_reader_.Next();
1549       }
1550     } else {
1551       CordRep* rep;
1552       current_chunk_ = btree_reader_.Read(n, chunk_size, rep);
1553       subcord.contents_.EmplaceTree(rep, method);
1554     }
1555     bytes_remaining_ -= n;
1556     return subcord;
1557   }
1558 
1559   auto& stack_of_right_children = stack_of_right_children_;
1560   if (n < current_chunk_.size()) {
1561     // Range to read is a proper subrange of the current chunk.
1562     assert(current_leaf_ != nullptr);
1563     CordRep* subnode = CordRep::Ref(current_leaf_);
1564     const char* data = subnode->IsExternal() ? subnode->external()->base
1565                                              : subnode->flat()->Data();
1566     subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1567     subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1568     RemoveChunkPrefix(n);
1569     return subcord;
1570   }
1571 
1572   // Range to read begins with a proper subrange of the current chunk.
1573   assert(!current_chunk_.empty());
1574   assert(current_leaf_ != nullptr);
1575   CordRep* subnode = CordRep::Ref(current_leaf_);
1576   if (current_chunk_.size() < subnode->length) {
1577     const char* data = subnode->IsExternal() ? subnode->external()->base
1578                                              : subnode->flat()->Data();
1579     subnode = NewSubstring(subnode, current_chunk_.data() - data,
1580                            current_chunk_.size());
1581   }
1582   n -= current_chunk_.size();
1583   bytes_remaining_ -= current_chunk_.size();
1584 
1585   // Process the next node(s) on the stack, reading whole subtrees depending on
1586   // their length and how many bytes we are advancing.
1587   CordRep* node = nullptr;
1588   while (!stack_of_right_children.empty()) {
1589     node = stack_of_right_children.back();
1590     stack_of_right_children.pop_back();
1591     if (node->length > n) break;
1592     // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1593     // Avoiding that would need pretty complicated logic (instead of
1594     // current_leaf, keep current_subtree_ which points to the highest node
1595     // such that the current leaf can be found on the path of left children
1596     // starting from current_subtree_; delay creating subnode while node is
1597     // below current_subtree_; find the proper node along the path of left
1598     // children starting from current_subtree_ if this loop exits while staying
1599     // below current_subtree_; etc.; alternatively, push parents instead of
1600     // right children on the stack).
1601     subnode = Concat(subnode, CordRep::Ref(node));
1602     n -= node->length;
1603     bytes_remaining_ -= node->length;
1604     node = nullptr;
1605   }
1606 
1607   if (node == nullptr) {
1608     // We have reached the end of the Cord.
1609     assert(bytes_remaining_ == 0);
1610     subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1611     return subcord;
1612   }
1613 
1614   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1615   // right children to the stack for subsequent traversal.
1616   while (node->IsConcat()) {
1617     if (node->concat()->left->length > n) {
1618       // Push right, descend left.
1619       stack_of_right_children.push_back(node->concat()->right);
1620       node = node->concat()->left;
1621     } else {
1622       // Read left, descend right.
1623       subnode = Concat(subnode, CordRep::Ref(node->concat()->left));
1624       n -= node->concat()->left->length;
1625       bytes_remaining_ -= node->concat()->left->length;
1626       node = node->concat()->right;
1627     }
1628   }
1629 
1630   // Get the child node if we encounter a SUBSTRING.
1631   size_t offset = 0;
1632   size_t length = node->length;
1633   if (node->IsSubstring()) {
1634     offset = node->substring()->start;
1635     node = node->substring()->child;
1636   }
1637 
1638   // Range to read ends with a proper (possibly empty) subrange of the current
1639   // chunk.
1640   assert(node->IsExternal() || node->IsFlat());
1641   assert(length > n);
1642   if (n > 0) {
1643     subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
1644   }
1645   const char* data =
1646       node->IsExternal() ? node->external()->base : node->flat()->Data();
1647   current_chunk_ = absl::string_view(data + offset + n, length - n);
1648   current_leaf_ = node;
1649   bytes_remaining_ -= n;
1650   subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
1651   return subcord;
1652 }
1653 
AdvanceBytesSlowPath(size_t n)1654 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1655   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1656   assert(n >= current_chunk_.size());  // This should only be called when
1657                                        // iterating to a new node.
1658 
1659   n -= current_chunk_.size();
1660   bytes_remaining_ -= current_chunk_.size();
1661 
1662   if (stack_of_right_children_.empty()) {
1663     // We have reached the end of the Cord.
1664     assert(bytes_remaining_ == 0);
1665     return;
1666   }
1667 
1668   // Process the next node(s) on the stack, skipping whole subtrees depending on
1669   // their length and how many bytes we are advancing.
1670   CordRep* node = nullptr;
1671   auto& stack_of_right_children = stack_of_right_children_;
1672   while (!stack_of_right_children.empty()) {
1673     node = stack_of_right_children.back();
1674     stack_of_right_children.pop_back();
1675     if (node->length > n) break;
1676     n -= node->length;
1677     bytes_remaining_ -= node->length;
1678     node = nullptr;
1679   }
1680 
1681   if (node == nullptr) {
1682     // We have reached the end of the Cord.
1683     assert(bytes_remaining_ == 0);
1684     return;
1685   }
1686 
1687   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1688   // right children to the stack for subsequent traversal.
1689   while (node->IsConcat()) {
1690     if (node->concat()->left->length > n) {
1691       // Push right, descend left.
1692       stack_of_right_children.push_back(node->concat()->right);
1693       node = node->concat()->left;
1694     } else {
1695       // Skip left, descend right.
1696       n -= node->concat()->left->length;
1697       bytes_remaining_ -= node->concat()->left->length;
1698       node = node->concat()->right;
1699     }
1700   }
1701 
1702   // Get the child node if we encounter a SUBSTRING.
1703   size_t offset = 0;
1704   size_t length = node->length;
1705   if (node->IsSubstring()) {
1706     offset = node->substring()->start;
1707     node = node->substring()->child;
1708   }
1709 
1710   assert(node->IsExternal() || node->IsFlat());
1711   assert(length > n);
1712   const char* data =
1713       node->IsExternal() ? node->external()->base : node->flat()->Data();
1714   current_chunk_ = absl::string_view(data + offset + n, length - n);
1715   current_leaf_ = node;
1716   bytes_remaining_ -= n;
1717 }
1718 
operator [](size_t i) const1719 char Cord::operator[](size_t i) const {
1720   ABSL_HARDENING_ASSERT(i < size());
1721   size_t offset = i;
1722   const CordRep* rep = contents_.tree();
1723   if (rep == nullptr) {
1724     return contents_.data()[i];
1725   }
1726   while (true) {
1727     assert(rep != nullptr);
1728     assert(offset < rep->length);
1729     if (rep->IsFlat()) {
1730       // Get the "i"th character directly from the flat array.
1731       return rep->flat()->Data()[offset];
1732     } else if (rep->IsBtree()) {
1733       return rep->btree()->GetCharacter(offset);
1734     } else if (rep->IsExternal()) {
1735       // Get the "i"th character from the external array.
1736       return rep->external()->base[offset];
1737     } else if (rep->IsConcat()) {
1738       // Recursively branch to the side of the concatenation that the "i"th
1739       // character is on.
1740       size_t left_length = rep->concat()->left->length;
1741       if (offset < left_length) {
1742         rep = rep->concat()->left;
1743       } else {
1744         offset -= left_length;
1745         rep = rep->concat()->right;
1746       }
1747     } else {
1748       // This must be a substring a node, so bypass it to get to the child.
1749       assert(rep->IsSubstring());
1750       offset += rep->substring()->start;
1751       rep = rep->substring()->child;
1752     }
1753   }
1754 }
1755 
FlattenSlowPath()1756 absl::string_view Cord::FlattenSlowPath() {
1757   assert(contents_.is_tree());
1758   size_t total_size = size();
1759   CordRep* new_rep;
1760   char* new_buffer;
1761 
1762   // Try to put the contents into a new flat rep. If they won't fit in the
1763   // biggest possible flat node, use an external rep instead.
1764   if (total_size <= kMaxFlatLength) {
1765     new_rep = CordRepFlat::New(total_size);
1766     new_rep->length = total_size;
1767     new_buffer = new_rep->flat()->Data();
1768     CopyToArraySlowPath(new_buffer);
1769   } else {
1770     new_buffer = std::allocator<char>().allocate(total_size);
1771     CopyToArraySlowPath(new_buffer);
1772     new_rep = absl::cord_internal::NewExternalRep(
1773         absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1774           std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1775                                             s.size());
1776         });
1777   }
1778   CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten);
1779   CordRep::Unref(contents_.as_tree());
1780   contents_.SetTree(new_rep, scope);
1781   return absl::string_view(new_buffer, total_size);
1782 }
1783 
GetFlatAux(CordRep * rep,absl::string_view * fragment)1784 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1785   assert(rep != nullptr);
1786   if (rep->IsFlat()) {
1787     *fragment = absl::string_view(rep->flat()->Data(), rep->length);
1788     return true;
1789   } else if (rep->IsExternal()) {
1790     *fragment = absl::string_view(rep->external()->base, rep->length);
1791     return true;
1792   } else if (rep->IsBtree()) {
1793     return rep->btree()->IsFlat(fragment);
1794   } else if (rep->IsSubstring()) {
1795     CordRep* child = rep->substring()->child;
1796     if (child->IsFlat()) {
1797       *fragment = absl::string_view(
1798           child->flat()->Data() + rep->substring()->start, rep->length);
1799       return true;
1800     } else if (child->IsExternal()) {
1801       *fragment = absl::string_view(
1802           child->external()->base + rep->substring()->start, rep->length);
1803       return true;
1804     } else if (child->IsBtree()) {
1805       return child->btree()->IsFlat(rep->substring()->start, rep->length,
1806                                     fragment);
1807     }
1808   }
1809   return false;
1810 }
1811 
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1812 /* static */ void Cord::ForEachChunkAux(
1813     absl::cord_internal::CordRep* rep,
1814     absl::FunctionRef<void(absl::string_view)> callback) {
1815   if (rep->IsBtree()) {
1816     ChunkIterator it(rep), end;
1817     while (it != end) {
1818       callback(*it);
1819       ++it;
1820     }
1821     return;
1822   }
1823 
1824   assert(rep != nullptr);
1825   int stack_pos = 0;
1826   constexpr int stack_max = 128;
1827   // Stack of right branches for tree traversal
1828   absl::cord_internal::CordRep* stack[stack_max];
1829   absl::cord_internal::CordRep* current_node = rep;
1830   while (true) {
1831     if (current_node->IsConcat()) {
1832       if (stack_pos == stack_max) {
1833         // There's no more room on our stack array to add another right branch,
1834         // and the idea is to avoid allocations, so call this function
1835         // recursively to navigate this subtree further.  (This is not something
1836         // we expect to happen in practice).
1837         ForEachChunkAux(current_node, callback);
1838 
1839         // Pop the next right branch and iterate.
1840         current_node = stack[--stack_pos];
1841         continue;
1842       } else {
1843         // Save the right branch for later traversal and continue down the left
1844         // branch.
1845         stack[stack_pos++] = current_node->concat()->right;
1846         current_node = current_node->concat()->left;
1847         continue;
1848       }
1849     }
1850     // This is a leaf node, so invoke our callback.
1851     absl::string_view chunk;
1852     bool success = GetFlatAux(current_node, &chunk);
1853     assert(success);
1854     if (success) {
1855       callback(chunk);
1856     }
1857     if (stack_pos == 0) {
1858       // end of traversal
1859       return;
1860     }
1861     current_node = stack[--stack_pos];
1862   }
1863 }
1864 
DumpNode(CordRep * rep,bool include_data,std::ostream * os,int indent)1865 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
1866                      int indent) {
1867   const int kIndentStep = 1;
1868   absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1869   absl::InlinedVector<int, kInlinedVectorSize> indents;
1870   for (;;) {
1871     *os << std::setw(3) << rep->refcount.Get();
1872     *os << " " << std::setw(7) << rep->length;
1873     *os << " [";
1874     if (include_data) *os << static_cast<void*>(rep);
1875     *os << "]";
1876     *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1877     *os << " " << std::setw(indent) << "";
1878     if (rep->IsConcat()) {
1879       *os << "CONCAT depth=" << Depth(rep) << "\n";
1880       indent += kIndentStep;
1881       indents.push_back(indent);
1882       stack.push_back(rep->concat()->right);
1883       rep = rep->concat()->left;
1884     } else if (rep->IsSubstring()) {
1885       *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1886       indent += kIndentStep;
1887       rep = rep->substring()->child;
1888     } else {  // Leaf or ring
1889       if (rep->IsExternal()) {
1890         *os << "EXTERNAL [";
1891         if (include_data)
1892           *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1893         *os << "]\n";
1894       } else if (rep->IsFlat()) {
1895         *os << "FLAT cap=" << rep->flat()->Capacity() << " [";
1896         if (include_data)
1897           *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length));
1898         *os << "]\n";
1899       } else {
1900         CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
1901       }
1902       if (stack.empty()) break;
1903       rep = stack.back();
1904       stack.pop_back();
1905       indent = indents.back();
1906       indents.pop_back();
1907     }
1908   }
1909   ABSL_INTERNAL_CHECK(indents.empty(), "");
1910 }
1911 
ReportError(CordRep * root,CordRep * node)1912 static std::string ReportError(CordRep* root, CordRep* node) {
1913   std::ostringstream buf;
1914   buf << "Error at node " << node << " in:";
1915   DumpNode(root, true, &buf);
1916   return buf.str();
1917 }
1918 
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1919 static bool VerifyNode(CordRep* root, CordRep* start_node,
1920                        bool full_validation) {
1921   absl::InlinedVector<CordRep*, 2> worklist;
1922   worklist.push_back(start_node);
1923   do {
1924     CordRep* node = worklist.back();
1925     worklist.pop_back();
1926 
1927     ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1928     if (node != root) {
1929       ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1930     }
1931 
1932     if (node->IsConcat()) {
1933       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1934                           ReportError(root, node));
1935       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1936                           ReportError(root, node));
1937       ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1938                                                node->concat()->right->length),
1939                           ReportError(root, node));
1940       if (full_validation) {
1941         worklist.push_back(node->concat()->right);
1942         worklist.push_back(node->concat()->left);
1943       }
1944     } else if (node->IsFlat()) {
1945       ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(),
1946                           ReportError(root, node));
1947     } else if (node->IsExternal()) {
1948       ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1949                           ReportError(root, node));
1950     } else if (node->IsSubstring()) {
1951       ABSL_INTERNAL_CHECK(
1952           node->substring()->start < node->substring()->child->length,
1953           ReportError(root, node));
1954       ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1955                               node->substring()->child->length,
1956                           ReportError(root, node));
1957     }
1958   } while (!worklist.empty());
1959   return true;
1960 }
1961 
1962 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1963 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1964   size_t total_mem_usage = 0;
1965 
1966   // Allow a quick exit for the common case that the root is a leaf.
1967   if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1968     return total_mem_usage;
1969   }
1970 
1971   // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1972   // never be appended to tree_stack. This reduces overhead from manipulating
1973   // tree_stack.
1974   absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1975   const CordRep* cur_node = rep;
1976   while (true) {
1977     const CordRep* next_node = nullptr;
1978 
1979     if (cur_node->IsConcat()) {
1980       total_mem_usage += sizeof(CordRepConcat);
1981       const CordRep* left = cur_node->concat()->left;
1982       if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1983         next_node = left;
1984       }
1985 
1986       const CordRep* right = cur_node->concat()->right;
1987       if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1988         if (next_node) {
1989           tree_stack.push_back(next_node);
1990         }
1991         next_node = right;
1992       }
1993     } else if (cur_node->IsBtree()) {
1994       total_mem_usage += sizeof(CordRepBtree);
1995       const CordRepBtree* node = cur_node->btree();
1996       if (node->height() == 0) {
1997         for (const CordRep* edge : node->Edges()) {
1998           RepMemoryUsageDataEdge(edge, &total_mem_usage);
1999         }
2000       } else {
2001         for (const CordRep* edge : node->Edges()) {
2002           tree_stack.push_back(edge);
2003         }
2004       }
2005     } else {
2006       // Since cur_node is not a leaf or a concat node it must be a substring.
2007       assert(cur_node->IsSubstring());
2008       total_mem_usage += sizeof(CordRepSubstring);
2009       next_node = cur_node->substring()->child;
2010       if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
2011         next_node = nullptr;
2012       }
2013     }
2014 
2015     if (!next_node) {
2016       if (tree_stack.empty()) {
2017         return total_mem_usage;
2018       }
2019       next_node = tree_stack.back();
2020       tree_stack.pop_back();
2021     }
2022     cur_node = next_node;
2023   }
2024 }
2025 
operator <<(std::ostream & out,const Cord & cord)2026 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
2027   for (absl::string_view chunk : cord.Chunks()) {
2028     out.write(chunk.data(), chunk.size());
2029   }
2030   return out;
2031 }
2032 
2033 namespace strings_internal {
FlatOverhead()2034 size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; }
MaxFlatLength()2035 size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; }
FlatTagToLength(uint8_t tag)2036 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
2037   return cord_internal::TagToLength(tag);
2038 }
LengthToTag(size_t s)2039 uint8_t CordTestAccess::LengthToTag(size_t s) {
2040   ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
2041   return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead);
2042 }
SizeofCordRepConcat()2043 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()2044 size_t CordTestAccess::SizeofCordRepExternal() {
2045   return sizeof(CordRepExternal);
2046 }
SizeofCordRepSubstring()2047 size_t CordTestAccess::SizeofCordRepSubstring() {
2048   return sizeof(CordRepSubstring);
2049 }
2050 }  // namespace strings_internal
2051 ABSL_NAMESPACE_END
2052 }  // namespace absl
2053