• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/cord.h"
16 
17 #include <algorithm>
18 #include <cstddef>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <iomanip>
22 #include <limits>
23 #include <ostream>
24 #include <sstream>
25 #include <type_traits>
26 #include <unordered_set>
27 #include <vector>
28 
29 #include "absl/base/casts.h"
30 #include "absl/base/internal/raw_logging.h"
31 #include "absl/base/port.h"
32 #include "absl/container/fixed_array.h"
33 #include "absl/container/inlined_vector.h"
34 #include "absl/strings/escaping.h"
35 #include "absl/strings/internal/cord_internal.h"
36 #include "absl/strings/internal/resize_uninitialized.h"
37 #include "absl/strings/str_cat.h"
38 #include "absl/strings/str_format.h"
39 #include "absl/strings/str_join.h"
40 #include "absl/strings/string_view.h"
41 
42 namespace absl {
43 ABSL_NAMESPACE_BEGIN
44 
45 using ::absl::cord_internal::CordRep;
46 using ::absl::cord_internal::CordRepConcat;
47 using ::absl::cord_internal::CordRepExternal;
48 using ::absl::cord_internal::CordRepSubstring;
49 
50 // Various representations that we allow
51 enum CordRepKind {
52   CONCAT        = 0,
53   EXTERNAL      = 1,
54   SUBSTRING     = 2,
55 
56   // We have different tags for different sized flat arrays,
57   // starting with FLAT
58   FLAT          = 3,
59 };
60 
61 namespace {
62 
63 // Type used with std::allocator for allocating and deallocating
64 // `CordRepExternal`. std::allocator is used because it opaquely handles the
65 // different new / delete overloads available on a given platform.
66 struct alignas(absl::cord_internal::ExternalRepAlignment()) ExternalAllocType {
67   unsigned char value[absl::cord_internal::ExternalRepAlignment()];
68 };
69 
70 // Returns the number of objects to pass in to std::allocator<ExternalAllocType>
71 // allocate() and deallocate() to create enough room for `CordRepExternal` with
72 // `releaser_size` bytes on the end.
GetExternalAllocNumObjects(size_t releaser_size)73 constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) {
74   // Be sure to round up since `releaser_size` could be smaller than
75   // `sizeof(ExternalAllocType)`.
76   return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) -
77           1) /
78          sizeof(ExternalAllocType);
79 }
80 
81 // Allocates enough memory for `CordRepExternal` and a releaser with size
82 // `releaser_size` bytes.
AllocateExternal(size_t releaser_size)83 void* AllocateExternal(size_t releaser_size) {
84   return std::allocator<ExternalAllocType>().allocate(
85       GetExternalAllocNumObjects(releaser_size));
86 }
87 
88 // Deallocates the memory for a `CordRepExternal` assuming it was allocated with
89 // a releaser of given size and alignment.
DeallocateExternal(CordRepExternal * p,size_t releaser_size)90 void DeallocateExternal(CordRepExternal* p, size_t releaser_size) {
91   std::allocator<ExternalAllocType>().deallocate(
92       reinterpret_cast<ExternalAllocType*>(p),
93       GetExternalAllocNumObjects(releaser_size));
94 }
95 
96 // Returns a pointer to the type erased releaser for the given CordRepExternal.
GetExternalReleaser(CordRepExternal * rep)97 void* GetExternalReleaser(CordRepExternal* rep) {
98   return rep + 1;
99 }
100 
101 }  // namespace
102 
103 namespace cord_internal {
104 
concat()105 inline CordRepConcat* CordRep::concat() {
106   assert(tag == CONCAT);
107   return static_cast<CordRepConcat*>(this);
108 }
109 
concat() const110 inline const CordRepConcat* CordRep::concat() const {
111   assert(tag == CONCAT);
112   return static_cast<const CordRepConcat*>(this);
113 }
114 
substring()115 inline CordRepSubstring* CordRep::substring() {
116   assert(tag == SUBSTRING);
117   return static_cast<CordRepSubstring*>(this);
118 }
119 
substring() const120 inline const CordRepSubstring* CordRep::substring() const {
121   assert(tag == SUBSTRING);
122   return static_cast<const CordRepSubstring*>(this);
123 }
124 
external()125 inline CordRepExternal* CordRep::external() {
126   assert(tag == EXTERNAL);
127   return static_cast<CordRepExternal*>(this);
128 }
129 
external() const130 inline const CordRepExternal* CordRep::external() const {
131   assert(tag == EXTERNAL);
132   return static_cast<const CordRepExternal*>(this);
133 }
134 
135 }  // namespace cord_internal
136 
137 static const size_t kFlatOverhead = offsetof(CordRep, data);
138 
139 static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13");
140 
141 // Largest and smallest flat node lengths we are willing to allocate
142 // Flat allocation size is stored in tag, which currently can encode sizes up
143 // to 4K, encoded as multiple of either 8 or 32 bytes.
144 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
145 static constexpr size_t kMaxFlatSize = 4096;
146 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
147 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
148 
149 // Prefer copying blocks of at most this size, otherwise reference count.
150 static const size_t kMaxBytesToCopy = 511;
151 
152 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)153 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)154 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
155 
156 // Returns the size to the nearest equal or larger value that can be
157 // expressed exactly as a tag value.
RoundUpForTag(size_t size)158 static size_t RoundUpForTag(size_t size) {
159   return RoundUp(size, (size <= 1024) ? 8 : 32);
160 }
161 
162 // Converts the allocated size to a tag, rounding down if the size
163 // does not exactly match a 'tag expressible' size value. The result is
164 // undefined if the size exceeds the maximum size that can be encoded in
165 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)166 static uint8_t AllocatedSizeToTag(size_t size) {
167   const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
168   assert(tag <= std::numeric_limits<uint8_t>::max());
169   return tag;
170 }
171 
172 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)173 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
174   return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
175 }
176 
177 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)178 static constexpr size_t TagToLength(uint8_t tag) {
179   return TagToAllocatedSize(tag) - kFlatOverhead;
180 }
181 
182 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
183 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
184 
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)185 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
186   return n == 0 ? a : Fibonacci(n - 1, b, a + b);
187 }
188 
189 static_assert(Fibonacci(63) == 6557470319842,
190               "Fibonacci values computed incorrectly");
191 
192 // Minimum length required for a given depth tree -- a tree is considered
193 // balanced if
194 //      length(t) >= min_length[depth(t)]
195 // The root node depth is allowed to become twice as large to reduce rebalancing
196 // for larger strings (see IsRootBalanced).
197 static constexpr uint64_t min_length[] = {
198     Fibonacci(2),
199     Fibonacci(3),
200     Fibonacci(4),
201     Fibonacci(5),
202     Fibonacci(6),
203     Fibonacci(7),
204     Fibonacci(8),
205     Fibonacci(9),
206     Fibonacci(10),
207     Fibonacci(11),
208     Fibonacci(12),
209     Fibonacci(13),
210     Fibonacci(14),
211     Fibonacci(15),
212     Fibonacci(16),
213     Fibonacci(17),
214     Fibonacci(18),
215     Fibonacci(19),
216     Fibonacci(20),
217     Fibonacci(21),
218     Fibonacci(22),
219     Fibonacci(23),
220     Fibonacci(24),
221     Fibonacci(25),
222     Fibonacci(26),
223     Fibonacci(27),
224     Fibonacci(28),
225     Fibonacci(29),
226     Fibonacci(30),
227     Fibonacci(31),
228     Fibonacci(32),
229     Fibonacci(33),
230     Fibonacci(34),
231     Fibonacci(35),
232     Fibonacci(36),
233     Fibonacci(37),
234     Fibonacci(38),
235     Fibonacci(39),
236     Fibonacci(40),
237     Fibonacci(41),
238     Fibonacci(42),
239     Fibonacci(43),
240     Fibonacci(44),
241     Fibonacci(45),
242     Fibonacci(46),
243     Fibonacci(47),
244     0xffffffffffffffffull,  // Avoid overflow
245 };
246 
247 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
248 
249 // The inlined size to use with absl::InlinedVector.
250 //
251 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
252 // the same value for their inlined size. The fact that they do is historical.
253 // It may be desirable for each to use a different inlined size optimized for
254 // that InlinedVector's usage.
255 //
256 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
257 // the inlined vector size (47 exists for backward compatibility).
258 static const int kInlinedVectorSize = 47;
259 
IsRootBalanced(CordRep * node)260 static inline bool IsRootBalanced(CordRep* node) {
261   if (node->tag != CONCAT) {
262     return true;
263   } else if (node->concat()->depth() <= 15) {
264     return true;
265   } else if (node->concat()->depth() > kMinLengthSize) {
266     return false;
267   } else {
268     // Allow depth to become twice as large as implied by fibonacci rule to
269     // reduce rebalancing for larger strings.
270     return (node->length >= min_length[node->concat()->depth() / 2]);
271   }
272 }
273 
274 static CordRep* Rebalance(CordRep* node);
275 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
276 static bool VerifyNode(CordRep* root, CordRep* start_node,
277                        bool full_validation);
278 
VerifyTree(CordRep * node)279 static inline CordRep* VerifyTree(CordRep* node) {
280   // Verification is expensive, so only do it in debug mode.
281   // Even in debug mode we normally do only light validation.
282   // If you are debugging Cord itself, you should define the
283   // macro EXTRA_CORD_VALIDATION, e.g. by adding
284   // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
285 #ifdef EXTRA_CORD_VALIDATION
286   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
287 #else   // EXTRA_CORD_VALIDATION
288   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
289 #endif  // EXTRA_CORD_VALIDATION
290   static_cast<void>(&VerifyNode);
291 
292   return node;
293 }
294 
295 // --------------------------------------------------------------------
296 // Memory management
297 
Ref(CordRep * rep)298 inline CordRep* Ref(CordRep* rep) {
299   if (rep != nullptr) {
300     rep->refcount.Increment();
301   }
302   return rep;
303 }
304 
305 // This internal routine is called from the cold path of Unref below. Keeping it
306 // in a separate routine allows good inlining of Unref into many profitable call
307 // sites. However, the call to this function can be highly disruptive to the
308 // register pressure in those callers. To minimize the cost to callers, we use
309 // a special LLVM calling convention that preserves most registers. This allows
310 // the call to this routine in cold paths to not disrupt the caller's register
311 // pressure. This calling convention is not available on all platforms; we
312 // intentionally allow LLVM to ignore the attribute rather than attempting to
313 // hardcode the list of supported platforms.
314 #if defined(__clang__) && !defined(__i386__)
315 #pragma clang diagnostic push
316 #pragma clang diagnostic ignored "-Wattributes"
317 __attribute__((preserve_most))
318 #pragma clang diagnostic pop
319 #endif
UnrefInternal(CordRep * rep)320 static void UnrefInternal(CordRep* rep) {
321   assert(rep != nullptr);
322 
323   absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
324   while (true) {
325     if (rep->tag == CONCAT) {
326       CordRepConcat* rep_concat = rep->concat();
327       CordRep* right = rep_concat->right;
328       if (!right->refcount.Decrement()) {
329         pending.push_back(right);
330       }
331       CordRep* left = rep_concat->left;
332       delete rep_concat;
333       rep = nullptr;
334       if (!left->refcount.Decrement()) {
335         rep = left;
336         continue;
337       }
338     } else if (rep->tag == EXTERNAL) {
339       CordRepExternal* rep_external = rep->external();
340       absl::string_view data(rep_external->base, rep->length);
341       void* releaser = GetExternalReleaser(rep_external);
342       size_t releaser_size = rep_external->releaser_invoker(releaser, data);
343       rep_external->~CordRepExternal();
344       DeallocateExternal(rep_external, releaser_size);
345       rep = nullptr;
346     } else if (rep->tag == SUBSTRING) {
347       CordRepSubstring* rep_substring = rep->substring();
348       CordRep* child = rep_substring->child;
349       delete rep_substring;
350       rep = nullptr;
351       if (!child->refcount.Decrement()) {
352         rep = child;
353         continue;
354       }
355     } else {
356       // Flat CordReps are allocated and constructed with raw ::operator new
357       // and placement new, and must be destructed and deallocated
358       // accordingly.
359 #if defined(__cpp_sized_deallocation)
360       size_t size = TagToAllocatedSize(rep->tag);
361       rep->~CordRep();
362       ::operator delete(rep, size);
363 #else
364       rep->~CordRep();
365       ::operator delete(rep);
366 #endif
367       rep = nullptr;
368     }
369 
370     if (!pending.empty()) {
371       rep = pending.back();
372       pending.pop_back();
373     } else {
374       break;
375     }
376   }
377 }
378 
Unref(CordRep * rep)379 inline void Unref(CordRep* rep) {
380   // Fast-path for two common, hot cases: a null rep and a shared root.
381   if (ABSL_PREDICT_TRUE(rep == nullptr ||
382                         rep->refcount.DecrementExpectHighRefcount())) {
383     return;
384   }
385 
386   UnrefInternal(rep);
387 }
388 
389 // Return the depth of a node
Depth(const CordRep * rep)390 static int Depth(const CordRep* rep) {
391   if (rep->tag == CONCAT) {
392     return rep->concat()->depth();
393   } else {
394     return 0;
395   }
396 }
397 
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)398 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
399                               CordRep* right) {
400   concat->left = left;
401   concat->right = right;
402 
403   concat->length = left->length + right->length;
404   concat->set_depth(1 + std::max(Depth(left), Depth(right)));
405 }
406 
407 // Create a concatenation of the specified nodes.
408 // Does not change the refcounts of "left" and "right".
409 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)410 static CordRep* RawConcat(CordRep* left, CordRep* right) {
411   // Avoid making degenerate concat nodes (one child is empty)
412   if (left == nullptr || left->length == 0) {
413     Unref(left);
414     return right;
415   }
416   if (right == nullptr || right->length == 0) {
417     Unref(right);
418     return left;
419   }
420 
421   CordRepConcat* rep = new CordRepConcat();
422   rep->tag = CONCAT;
423   SetConcatChildren(rep, left, right);
424 
425   return rep;
426 }
427 
Concat(CordRep * left,CordRep * right)428 static CordRep* Concat(CordRep* left, CordRep* right) {
429   CordRep* rep = RawConcat(left, right);
430   if (rep != nullptr && !IsRootBalanced(rep)) {
431     rep = Rebalance(rep);
432   }
433   return VerifyTree(rep);
434 }
435 
436 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)437 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
438   // Make repeated passes over the array, merging adjacent pairs
439   // until we are left with just a single node.
440   while (n > 1) {
441     size_t dst = 0;
442     for (size_t src = 0; src < n; src += 2) {
443       if (src + 1 < n) {
444         reps[dst] = Concat(reps[src], reps[src + 1]);
445       } else {
446         reps[dst] = reps[src];
447       }
448       dst++;
449     }
450     n = dst;
451   }
452 
453   return reps[0];
454 }
455 
456 // Create a new flat node.
NewFlat(size_t length_hint)457 static CordRep* NewFlat(size_t length_hint) {
458   if (length_hint <= kMinFlatLength) {
459     length_hint = kMinFlatLength;
460   } else if (length_hint > kMaxFlatLength) {
461     length_hint = kMaxFlatLength;
462   }
463 
464   // Round size up so it matches a size we can exactly express in a tag.
465   const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
466   void* const raw_rep = ::operator new(size);
467   CordRep* rep = new (raw_rep) CordRep();
468   rep->tag = AllocatedSizeToTag(size);
469   return VerifyTree(rep);
470 }
471 
472 // Create a new tree out of the specified array.
473 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)474 static CordRep* NewTree(const char* data,
475                         size_t length,
476                         size_t alloc_hint) {
477   if (length == 0) return nullptr;
478   absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
479   size_t n = 0;
480   do {
481     const size_t len = std::min(length, kMaxFlatLength);
482     CordRep* rep = NewFlat(len + alloc_hint);
483     rep->length = len;
484     memcpy(rep->data, data, len);
485     reps[n++] = VerifyTree(rep);
486     data += len;
487     length -= len;
488   } while (length != 0);
489   return MakeBalancedTree(reps.data(), n);
490 }
491 
492 namespace cord_internal {
493 
NewExternalWithUninitializedReleaser(absl::string_view data,ExternalReleaserInvoker invoker,size_t releaser_size)494 ExternalRepReleaserPair NewExternalWithUninitializedReleaser(
495     absl::string_view data, ExternalReleaserInvoker invoker,
496     size_t releaser_size) {
497   assert(!data.empty());
498 
499   void* raw_rep = AllocateExternal(releaser_size);
500   auto* rep = new (raw_rep) CordRepExternal();
501   rep->length = data.size();
502   rep->tag = EXTERNAL;
503   rep->base = data.data();
504   rep->releaser_invoker = invoker;
505   return {VerifyTree(rep), GetExternalReleaser(rep)};
506 }
507 
508 }  // namespace cord_internal
509 
NewSubstring(CordRep * child,size_t offset,size_t length)510 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
511   // Never create empty substring nodes
512   if (length == 0) {
513     Unref(child);
514     return nullptr;
515   } else {
516     CordRepSubstring* rep = new CordRepSubstring();
517     assert((offset + length) <= child->length);
518     rep->length = length;
519     rep->tag = SUBSTRING;
520     rep->start = offset;
521     rep->child = child;
522     return VerifyTree(rep);
523   }
524 }
525 
526 // --------------------------------------------------------------------
527 // Cord::InlineRep functions
528 
529 // This will trigger LNK2005 in MSVC.
530 #ifndef COMPILER_MSVC
531 const unsigned char Cord::InlineRep::kMaxInline;
532 #endif  // COMPILER_MSVC
533 
set_data(const char * data,size_t n,bool nullify_tail)534 inline void Cord::InlineRep::set_data(const char* data, size_t n,
535                                       bool nullify_tail) {
536   static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
537 
538   cord_internal::SmallMemmove(data_, data, n, nullify_tail);
539   data_[kMaxInline] = static_cast<char>(n);
540 }
541 
set_data(size_t n)542 inline char* Cord::InlineRep::set_data(size_t n) {
543   assert(n <= kMaxInline);
544   memset(data_, 0, sizeof(data_));
545   data_[kMaxInline] = static_cast<char>(n);
546   return data_;
547 }
548 
force_tree(size_t extra_hint)549 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
550   size_t len = data_[kMaxInline];
551   CordRep* result;
552   if (len > kMaxInline) {
553     memcpy(&result, data_, sizeof(result));
554   } else {
555     result = NewFlat(len + extra_hint);
556     result->length = len;
557     memcpy(result->data, data_, len);
558     set_tree(result);
559   }
560   return result;
561 }
562 
reduce_size(size_t n)563 inline void Cord::InlineRep::reduce_size(size_t n) {
564   size_t tag = data_[kMaxInline];
565   assert(tag <= kMaxInline);
566   assert(tag >= n);
567   tag -= n;
568   memset(data_ + tag, 0, n);
569   data_[kMaxInline] = static_cast<char>(tag);
570 }
571 
remove_prefix(size_t n)572 inline void Cord::InlineRep::remove_prefix(size_t n) {
573   cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n);
574   reduce_size(n);
575 }
576 
AppendTree(CordRep * tree)577 void Cord::InlineRep::AppendTree(CordRep* tree) {
578   if (tree == nullptr) return;
579   size_t len = data_[kMaxInline];
580   if (len == 0) {
581     set_tree(tree);
582   } else {
583     set_tree(Concat(force_tree(0), tree));
584   }
585 }
586 
PrependTree(CordRep * tree)587 void Cord::InlineRep::PrependTree(CordRep* tree) {
588   if (tree == nullptr) return;
589   size_t len = data_[kMaxInline];
590   if (len == 0) {
591     set_tree(tree);
592   } else {
593     set_tree(Concat(tree, force_tree(0)));
594   }
595 }
596 
597 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
598 // suitable leaf is found, the function will update the length field for all
599 // nodes to account for the size increase. The append region address will be
600 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)601 static inline bool PrepareAppendRegion(CordRep* root, char** region,
602                                        size_t* size, size_t max_length) {
603   // Search down the right-hand path for a non-full FLAT node.
604   CordRep* dst = root;
605   while (dst->tag == CONCAT && dst->refcount.IsOne()) {
606     dst = dst->concat()->right;
607   }
608 
609   if (dst->tag < FLAT || !dst->refcount.IsOne()) {
610     *region = nullptr;
611     *size = 0;
612     return false;
613   }
614 
615   const size_t in_use = dst->length;
616   const size_t capacity = TagToLength(dst->tag);
617   if (in_use == capacity) {
618     *region = nullptr;
619     *size = 0;
620     return false;
621   }
622 
623   size_t size_increase = std::min(capacity - in_use, max_length);
624 
625   // We need to update the length fields for all nodes, including the leaf node.
626   for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
627     rep->length += size_increase;
628   }
629   dst->length += size_increase;
630 
631   *region = dst->data + in_use;
632   *size = size_increase;
633   return true;
634 }
635 
GetAppendRegion(char ** region,size_t * size,size_t max_length)636 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
637                                       size_t max_length) {
638   if (max_length == 0) {
639     *region = nullptr;
640     *size = 0;
641     return;
642   }
643 
644   // Try to fit in the inline buffer if possible.
645   size_t inline_length = data_[kMaxInline];
646   if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
647     *region = data_ + inline_length;
648     *size = max_length;
649     data_[kMaxInline] = static_cast<char>(inline_length + max_length);
650     return;
651   }
652 
653   CordRep* root = force_tree(max_length);
654 
655   if (PrepareAppendRegion(root, region, size, max_length)) {
656     return;
657   }
658 
659   // Allocate new node.
660   CordRep* new_node =
661       NewFlat(std::max(static_cast<size_t>(root->length), max_length));
662   new_node->length =
663       std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
664   *region = new_node->data;
665   *size = new_node->length;
666   replace_tree(Concat(root, new_node));
667 }
668 
GetAppendRegion(char ** region,size_t * size)669 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
670   const size_t max_length = std::numeric_limits<size_t>::max();
671 
672   // Try to fit in the inline buffer if possible.
673   size_t inline_length = data_[kMaxInline];
674   if (inline_length < kMaxInline) {
675     *region = data_ + inline_length;
676     *size = kMaxInline - inline_length;
677     data_[kMaxInline] = kMaxInline;
678     return;
679   }
680 
681   CordRep* root = force_tree(max_length);
682 
683   if (PrepareAppendRegion(root, region, size, max_length)) {
684     return;
685   }
686 
687   // Allocate new node.
688   CordRep* new_node = NewFlat(root->length);
689   new_node->length = TagToLength(new_node->tag);
690   *region = new_node->data;
691   *size = new_node->length;
692   replace_tree(Concat(root, new_node));
693 }
694 
695 // If the rep is a leaf, this will increment the value at total_mem_usage and
696 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)697 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
698   if (rep->tag >= FLAT) {
699     *total_mem_usage += TagToAllocatedSize(rep->tag);
700     return true;
701   }
702   if (rep->tag == EXTERNAL) {
703     *total_mem_usage += sizeof(CordRepConcat) + rep->length;
704     return true;
705   }
706   return false;
707 }
708 
AssignSlow(const Cord::InlineRep & src)709 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
710   ClearSlow();
711 
712   memcpy(data_, src.data_, sizeof(data_));
713   if (is_tree()) {
714     Ref(tree());
715   }
716 }
717 
ClearSlow()718 void Cord::InlineRep::ClearSlow() {
719   if (is_tree()) {
720     Unref(tree());
721   }
722   memset(data_, 0, sizeof(data_));
723 }
724 
725 // --------------------------------------------------------------------
726 // Constructors and destructors
727 
Cord(const Cord & src)728 Cord::Cord(const Cord& src) : contents_(src.contents_) {
729   Ref(contents_.tree());  // Does nothing if contents_ has embedded data
730 }
731 
Cord(absl::string_view src)732 Cord::Cord(absl::string_view src) {
733   const size_t n = src.size();
734   if (n <= InlineRep::kMaxInline) {
735     contents_.set_data(src.data(), n, false);
736   } else {
737     contents_.set_tree(NewTree(src.data(), n, 0));
738   }
739 }
740 
741 // The destruction code is separate so that the compiler can determine
742 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()743 void Cord::DestroyCordSlow() {
744   Unref(VerifyTree(contents_.tree()));
745 }
746 
747 // --------------------------------------------------------------------
748 // Mutators
749 
Clear()750 void Cord::Clear() {
751   Unref(contents_.clear());
752 }
753 
operator =(absl::string_view src)754 Cord& Cord::operator=(absl::string_view src) {
755 
756   const char* data = src.data();
757   size_t length = src.size();
758   CordRep* tree = contents_.tree();
759   if (length <= InlineRep::kMaxInline) {
760     // Embed into this->contents_
761     contents_.set_data(data, length, true);
762     Unref(tree);
763     return *this;
764   }
765   if (tree != nullptr && tree->tag >= FLAT &&
766       TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
767     // Copy in place if the existing FLAT node is reusable.
768     memmove(tree->data, data, length);
769     tree->length = length;
770     VerifyTree(tree);
771     return *this;
772   }
773   contents_.set_tree(NewTree(data, length, 0));
774   Unref(tree);
775   return *this;
776 }
777 
778 // TODO(sanjay): Move to Cord::InlineRep section of file.  For now,
779 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)780 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
781   if (src_size == 0) return;  // memcpy(_, nullptr, 0) is undefined.
782   // Try to fit in the inline buffer if possible.
783   size_t inline_length = data_[kMaxInline];
784   if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
785     // Append new data to embedded array
786     data_[kMaxInline] = static_cast<char>(inline_length + src_size);
787     memcpy(data_ + inline_length, src_data, src_size);
788     return;
789   }
790 
791   CordRep* root = tree();
792 
793   size_t appended = 0;
794   if (root) {
795     char* region;
796     if (PrepareAppendRegion(root, &region, &appended, src_size)) {
797       memcpy(region, src_data, appended);
798     }
799   } else {
800     // It is possible that src_data == data_, but when we transition from an
801     // InlineRep to a tree we need to assign data_ = root via set_tree. To
802     // avoid corrupting the source data before we copy it, delay calling
803     // set_tree until after we've copied data.
804     // We are going from an inline size to beyond inline size. Make the new size
805     // either double the inlined size, or the added size + 10%.
806     const size_t size1 = inline_length * 2 + src_size;
807     const size_t size2 = inline_length + src_size / 10;
808     root = NewFlat(std::max<size_t>(size1, size2));
809     appended = std::min(src_size, TagToLength(root->tag) - inline_length);
810     memcpy(root->data, data_, inline_length);
811     memcpy(root->data + inline_length, src_data, appended);
812     root->length = inline_length + appended;
813     set_tree(root);
814   }
815 
816   src_data += appended;
817   src_size -= appended;
818   if (src_size == 0) {
819     return;
820   }
821 
822   // Use new block(s) for any remaining bytes that were not handled above.
823   // Alloc extra memory only if the right child of the root of the new tree is
824   // going to be a FLAT node, which will permit further inplace appends.
825   size_t length = src_size;
826   if (src_size < kMaxFlatLength) {
827     // The new length is either
828     // - old size + 10%
829     // - old_size + src_size
830     // This will cause a reasonable conservative step-up in size that is still
831     // large enough to avoid excessive amounts of small fragments being added.
832     length = std::max<size_t>(root->length / 10, src_size);
833   }
834   set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
835 }
836 
TakeRep() const837 inline CordRep* Cord::TakeRep() const& {
838   return Ref(contents_.tree());
839 }
840 
TakeRep()841 inline CordRep* Cord::TakeRep() && {
842   CordRep* rep = contents_.tree();
843   contents_.clear();
844   return rep;
845 }
846 
847 template <typename C>
AppendImpl(C && src)848 inline void Cord::AppendImpl(C&& src) {
849   if (empty()) {
850     // In case of an empty destination avoid allocating a new node, do not copy
851     // data.
852     *this = std::forward<C>(src);
853     return;
854   }
855 
856   // For short cords, it is faster to copy data if there is room in dst.
857   const size_t src_size = src.contents_.size();
858   if (src_size <= kMaxBytesToCopy) {
859     CordRep* src_tree = src.contents_.tree();
860     if (src_tree == nullptr) {
861       // src has embedded data.
862       contents_.AppendArray(src.contents_.data(), src_size);
863       return;
864     }
865     if (src_tree->tag >= FLAT) {
866       // src tree just has one flat node.
867       contents_.AppendArray(src_tree->data, src_size);
868       return;
869     }
870     if (&src == this) {
871       // ChunkIterator below assumes that src is not modified during traversal.
872       Append(Cord(src));
873       return;
874     }
875     // TODO(mec): Should we only do this if "dst" has space?
876     for (absl::string_view chunk : src.Chunks()) {
877       Append(chunk);
878     }
879     return;
880   }
881 
882   contents_.AppendTree(std::forward<C>(src).TakeRep());
883 }
884 
Append(const Cord & src)885 void Cord::Append(const Cord& src) { AppendImpl(src); }
886 
Append(Cord && src)887 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
888 
Prepend(const Cord & src)889 void Cord::Prepend(const Cord& src) {
890   CordRep* src_tree = src.contents_.tree();
891   if (src_tree != nullptr) {
892     Ref(src_tree);
893     contents_.PrependTree(src_tree);
894     return;
895   }
896 
897   // `src` cord is inlined.
898   absl::string_view src_contents(src.contents_.data(), src.contents_.size());
899   return Prepend(src_contents);
900 }
901 
Prepend(absl::string_view src)902 void Cord::Prepend(absl::string_view src) {
903   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
904   size_t cur_size = contents_.size();
905   if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
906     // Use embedded storage.
907     char data[InlineRep::kMaxInline + 1] = {0};
908     data[InlineRep::kMaxInline] = cur_size + src.size();  // set size
909     memcpy(data, src.data(), src.size());
910     memcpy(data + src.size(), contents_.data(), cur_size);
911     memcpy(reinterpret_cast<void*>(&contents_), data,
912            InlineRep::kMaxInline + 1);
913   } else {
914     contents_.PrependTree(NewTree(src.data(), src.size(), 0));
915   }
916 }
917 
RemovePrefixFrom(CordRep * node,size_t n)918 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
919   if (n >= node->length) return nullptr;
920   if (n == 0) return Ref(node);
921   absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
922 
923   while (node->tag == CONCAT) {
924     assert(n <= node->length);
925     if (n < node->concat()->left->length) {
926       // Push right to stack, descend left.
927       rhs_stack.push_back(node->concat()->right);
928       node = node->concat()->left;
929     } else {
930       // Drop left, descend right.
931       n -= node->concat()->left->length;
932       node = node->concat()->right;
933     }
934   }
935   assert(n <= node->length);
936 
937   if (n == 0) {
938     Ref(node);
939   } else {
940     size_t start = n;
941     size_t len = node->length - n;
942     if (node->tag == SUBSTRING) {
943       // Consider in-place update of node, similar to in RemoveSuffixFrom().
944       start += node->substring()->start;
945       node = node->substring()->child;
946     }
947     node = NewSubstring(Ref(node), start, len);
948   }
949   while (!rhs_stack.empty()) {
950     node = Concat(node, Ref(rhs_stack.back()));
951     rhs_stack.pop_back();
952   }
953   return node;
954 }
955 
956 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
957 // exception that removing a suffix has an optimization where a node may be
958 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)959 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
960   if (n >= node->length) return nullptr;
961   if (n == 0) return Ref(node);
962   absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
963   bool inplace_ok = node->refcount.IsOne();
964 
965   while (node->tag == CONCAT) {
966     assert(n <= node->length);
967     if (n < node->concat()->right->length) {
968       // Push left to stack, descend right.
969       lhs_stack.push_back(node->concat()->left);
970       node = node->concat()->right;
971     } else {
972       // Drop right, descend left.
973       n -= node->concat()->right->length;
974       node = node->concat()->left;
975     }
976     inplace_ok = inplace_ok && node->refcount.IsOne();
977   }
978   assert(n <= node->length);
979 
980   if (n == 0) {
981     Ref(node);
982   } else if (inplace_ok && node->tag != EXTERNAL) {
983     // Consider making a new buffer if the current node capacity is much
984     // larger than the new length.
985     Ref(node);
986     node->length -= n;
987   } else {
988     size_t start = 0;
989     size_t len = node->length - n;
990     if (node->tag == SUBSTRING) {
991       start = node->substring()->start;
992       node = node->substring()->child;
993     }
994     node = NewSubstring(Ref(node), start, len);
995   }
996   while (!lhs_stack.empty()) {
997     node = Concat(Ref(lhs_stack.back()), node);
998     lhs_stack.pop_back();
999   }
1000   return node;
1001 }
1002 
RemovePrefix(size_t n)1003 void Cord::RemovePrefix(size_t n) {
1004   ABSL_INTERNAL_CHECK(n <= size(),
1005                       absl::StrCat("Requested prefix size ", n,
1006                                    " exceeds Cord's size ", size()));
1007   CordRep* tree = contents_.tree();
1008   if (tree == nullptr) {
1009     contents_.remove_prefix(n);
1010   } else {
1011     CordRep* newrep = RemovePrefixFrom(tree, n);
1012     Unref(tree);
1013     contents_.replace_tree(VerifyTree(newrep));
1014   }
1015 }
1016 
RemoveSuffix(size_t n)1017 void Cord::RemoveSuffix(size_t n) {
1018   ABSL_INTERNAL_CHECK(n <= size(),
1019                       absl::StrCat("Requested suffix size ", n,
1020                                    " exceeds Cord's size ", size()));
1021   CordRep* tree = contents_.tree();
1022   if (tree == nullptr) {
1023     contents_.reduce_size(n);
1024   } else {
1025     CordRep* newrep = RemoveSuffixFrom(tree, n);
1026     Unref(tree);
1027     contents_.replace_tree(VerifyTree(newrep));
1028   }
1029 }
1030 
1031 // Work item for NewSubRange().
1032 struct SubRange {
SubRangeabsl::SubRange1033   SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1034       : node(a_node), pos(a_pos), n(a_n) {}
1035   CordRep* node;  // nullptr means concat last 2 results.
1036   size_t pos;
1037   size_t n;
1038 };
1039 
NewSubRange(CordRep * node,size_t pos,size_t n)1040 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1041   absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1042   absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1043   todo.push_back(SubRange(node, pos, n));
1044   do {
1045     const SubRange& sr = todo.back();
1046     node = sr.node;
1047     pos = sr.pos;
1048     n = sr.n;
1049     todo.pop_back();
1050 
1051     if (node == nullptr) {
1052       assert(results.size() >= 2);
1053       CordRep* right = results.back();
1054       results.pop_back();
1055       CordRep* left = results.back();
1056       results.pop_back();
1057       results.push_back(Concat(left, right));
1058     } else if (pos == 0 && n == node->length) {
1059       results.push_back(Ref(node));
1060     } else if (node->tag != CONCAT) {
1061       if (node->tag == SUBSTRING) {
1062         pos += node->substring()->start;
1063         node = node->substring()->child;
1064       }
1065       results.push_back(NewSubstring(Ref(node), pos, n));
1066     } else if (pos + n <= node->concat()->left->length) {
1067       todo.push_back(SubRange(node->concat()->left, pos, n));
1068     } else if (pos >= node->concat()->left->length) {
1069       pos -= node->concat()->left->length;
1070       todo.push_back(SubRange(node->concat()->right, pos, n));
1071     } else {
1072       size_t left_n = node->concat()->left->length - pos;
1073       todo.push_back(SubRange(nullptr, 0, 0));  // Concat()
1074       todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1075       todo.push_back(SubRange(node->concat()->left, pos, left_n));
1076     }
1077   } while (!todo.empty());
1078   assert(results.size() == 1);
1079   return results[0];
1080 }
1081 
Subcord(size_t pos,size_t new_size) const1082 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1083   Cord sub_cord;
1084   size_t length = size();
1085   if (pos > length) pos = length;
1086   if (new_size > length - pos) new_size = length - pos;
1087   CordRep* tree = contents_.tree();
1088   if (tree == nullptr) {
1089     // sub_cord is newly constructed, no need to re-zero-out the tail of
1090     // contents_ memory.
1091     sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1092   } else if (new_size == 0) {
1093     // We want to return empty subcord, so nothing to do.
1094   } else if (new_size <= InlineRep::kMaxInline) {
1095     Cord::ChunkIterator it = chunk_begin();
1096     it.AdvanceBytes(pos);
1097     char* dest = sub_cord.contents_.data_;
1098     size_t remaining_size = new_size;
1099     while (remaining_size > it->size()) {
1100       cord_internal::SmallMemmove(dest, it->data(), it->size());
1101       remaining_size -= it->size();
1102       dest += it->size();
1103       ++it;
1104     }
1105     cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1106     sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size;
1107   } else {
1108     sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1109   }
1110   return sub_cord;
1111 }
1112 
1113 // --------------------------------------------------------------------
1114 // Balancing
1115 
1116 class CordForest {
1117  public:
CordForest(size_t length)1118   explicit CordForest(size_t length)
1119       : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1120 
Build(CordRep * cord_root)1121   void Build(CordRep* cord_root) {
1122     std::vector<CordRep*> pending = {cord_root};
1123 
1124     while (!pending.empty()) {
1125       CordRep* node = pending.back();
1126       pending.pop_back();
1127       CheckNode(node);
1128       if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1129         AddNode(node);
1130         continue;
1131       }
1132 
1133       CordRepConcat* concat_node = node->concat();
1134       if (concat_node->depth() >= kMinLengthSize ||
1135           concat_node->length < min_length[concat_node->depth()]) {
1136         pending.push_back(concat_node->right);
1137         pending.push_back(concat_node->left);
1138 
1139         if (concat_node->refcount.IsOne()) {
1140           concat_node->left = concat_freelist_;
1141           concat_freelist_ = concat_node;
1142         } else {
1143           Ref(concat_node->right);
1144           Ref(concat_node->left);
1145           Unref(concat_node);
1146         }
1147       } else {
1148         AddNode(node);
1149       }
1150     }
1151   }
1152 
ConcatNodes()1153   CordRep* ConcatNodes() {
1154     CordRep* sum = nullptr;
1155     for (auto* node : trees_) {
1156       if (node == nullptr) continue;
1157 
1158       sum = PrependNode(node, sum);
1159       root_length_ -= node->length;
1160       if (root_length_ == 0) break;
1161     }
1162     ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1163     return VerifyTree(sum);
1164   }
1165 
1166  private:
AppendNode(CordRep * node,CordRep * sum)1167   CordRep* AppendNode(CordRep* node, CordRep* sum) {
1168     return (sum == nullptr) ? node : MakeConcat(sum, node);
1169   }
1170 
PrependNode(CordRep * node,CordRep * sum)1171   CordRep* PrependNode(CordRep* node, CordRep* sum) {
1172     return (sum == nullptr) ? node : MakeConcat(node, sum);
1173   }
1174 
AddNode(CordRep * node)1175   void AddNode(CordRep* node) {
1176     CordRep* sum = nullptr;
1177 
1178     // Collect together everything with which we will merge node
1179     int i = 0;
1180     for (; node->length > min_length[i + 1]; ++i) {
1181       auto& tree_at_i = trees_[i];
1182 
1183       if (tree_at_i == nullptr) continue;
1184       sum = PrependNode(tree_at_i, sum);
1185       tree_at_i = nullptr;
1186     }
1187 
1188     sum = AppendNode(node, sum);
1189 
1190     // Insert sum into appropriate place in the forest
1191     for (; sum->length >= min_length[i]; ++i) {
1192       auto& tree_at_i = trees_[i];
1193       if (tree_at_i == nullptr) continue;
1194 
1195       sum = MakeConcat(tree_at_i, sum);
1196       tree_at_i = nullptr;
1197     }
1198 
1199     // min_length[0] == 1, which means sum->length >= min_length[0]
1200     assert(i > 0);
1201     trees_[i - 1] = sum;
1202   }
1203 
1204   // Make concat node trying to resue existing CordRepConcat nodes we
1205   // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1206   CordRep* MakeConcat(CordRep* left, CordRep* right) {
1207     if (concat_freelist_ == nullptr) return RawConcat(left, right);
1208 
1209     CordRepConcat* rep = concat_freelist_;
1210     if (concat_freelist_->left == nullptr) {
1211       concat_freelist_ = nullptr;
1212     } else {
1213       concat_freelist_ = concat_freelist_->left->concat();
1214     }
1215     SetConcatChildren(rep, left, right);
1216 
1217     return rep;
1218   }
1219 
CheckNode(CordRep * node)1220   static void CheckNode(CordRep* node) {
1221     ABSL_INTERNAL_CHECK(node->length != 0u, "");
1222     if (node->tag == CONCAT) {
1223       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1224       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1225       ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1226                                            node->concat()->right->length),
1227                           "");
1228     }
1229   }
1230 
1231   size_t root_length_;
1232 
1233   // use an inlined vector instead of a flat array to get bounds checking
1234   absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1235 
1236   // List of concat nodes we can re-use for Cord balancing.
1237   CordRepConcat* concat_freelist_ = nullptr;
1238 };
1239 
Rebalance(CordRep * node)1240 static CordRep* Rebalance(CordRep* node) {
1241   VerifyTree(node);
1242   assert(node->tag == CONCAT);
1243 
1244   if (node->length == 0) {
1245     return nullptr;
1246   }
1247 
1248   CordForest forest(node->length);
1249   forest.Build(node);
1250   return forest.ConcatNodes();
1251 }
1252 
1253 // --------------------------------------------------------------------
1254 // Comparators
1255 
1256 namespace {
1257 
ClampResult(int memcmp_res)1258 int ClampResult(int memcmp_res) {
1259   return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1260 }
1261 
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1262 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1263                   size_t* size_to_compare) {
1264   size_t compared_size = std::min(lhs->size(), rhs->size());
1265   assert(*size_to_compare >= compared_size);
1266   *size_to_compare -= compared_size;
1267 
1268   int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1269   if (memcmp_res != 0) return memcmp_res;
1270 
1271   lhs->remove_prefix(compared_size);
1272   rhs->remove_prefix(compared_size);
1273 
1274   return 0;
1275 }
1276 
1277 // This overload set computes comparison results from memcmp result. This
1278 // interface is used inside GenericCompare below. Differet implementations
1279 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1280 // set. For bool we just interested in "value == 0".
1281 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1282 ResultType ComputeCompareResult(int memcmp_res) {
1283   return ClampResult(memcmp_res);
1284 }
1285 template <>
ComputeCompareResult(int memcmp_res)1286 bool ComputeCompareResult<bool>(int memcmp_res) {
1287   return memcmp_res == 0;
1288 }
1289 
1290 }  // namespace
1291 
1292 // Helper routine. Locates the first flat chunk of the Cord without
1293 // initializing the iterator.
FindFlatStartPiece() const1294 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1295   size_t n = data_[kMaxInline];
1296   if (n <= kMaxInline) {
1297     return absl::string_view(data_, n);
1298   }
1299 
1300   CordRep* node = tree();
1301   if (node->tag >= FLAT) {
1302     return absl::string_view(node->data, node->length);
1303   }
1304 
1305   if (node->tag == EXTERNAL) {
1306     return absl::string_view(node->external()->base, node->length);
1307   }
1308 
1309   // Walk down the left branches until we hit a non-CONCAT node.
1310   while (node->tag == CONCAT) {
1311     node = node->concat()->left;
1312   }
1313 
1314   // Get the child node if we encounter a SUBSTRING.
1315   size_t offset = 0;
1316   size_t length = node->length;
1317   assert(length != 0);
1318 
1319   if (node->tag == SUBSTRING) {
1320     offset = node->substring()->start;
1321     node = node->substring()->child;
1322   }
1323 
1324   if (node->tag >= FLAT) {
1325     return absl::string_view(node->data + offset, length);
1326   }
1327 
1328   assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1329 
1330   return absl::string_view(node->external()->base + offset, length);
1331 }
1332 
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1333 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1334                                  size_t size_to_compare) const {
1335   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1336     if (!chunk->empty()) return true;
1337     ++*it;
1338     if (it->bytes_remaining_ == 0) return false;
1339     *chunk = **it;
1340     return true;
1341   };
1342 
1343   Cord::ChunkIterator lhs_it = chunk_begin();
1344 
1345   // compared_size is inside first chunk.
1346   absl::string_view lhs_chunk =
1347       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1348   assert(compared_size <= lhs_chunk.size());
1349   assert(compared_size <= rhs.size());
1350   lhs_chunk.remove_prefix(compared_size);
1351   rhs.remove_prefix(compared_size);
1352   size_to_compare -= compared_size;  // skip already compared size.
1353 
1354   while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1355     int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1356     if (comparison_result != 0) return comparison_result;
1357     if (size_to_compare == 0) return 0;
1358   }
1359 
1360   return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1361 }
1362 
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1363 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1364                                  size_t size_to_compare) const {
1365   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1366     if (!chunk->empty()) return true;
1367     ++*it;
1368     if (it->bytes_remaining_ == 0) return false;
1369     *chunk = **it;
1370     return true;
1371   };
1372 
1373   Cord::ChunkIterator lhs_it = chunk_begin();
1374   Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1375 
1376   // compared_size is inside both first chunks.
1377   absl::string_view lhs_chunk =
1378       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1379   absl::string_view rhs_chunk =
1380       (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1381   assert(compared_size <= lhs_chunk.size());
1382   assert(compared_size <= rhs_chunk.size());
1383   lhs_chunk.remove_prefix(compared_size);
1384   rhs_chunk.remove_prefix(compared_size);
1385   size_to_compare -= compared_size;  // skip already compared size.
1386 
1387   while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1388     int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1389     if (memcmp_res != 0) return memcmp_res;
1390     if (size_to_compare == 0) return 0;
1391   }
1392 
1393   return static_cast<int>(rhs_chunk.empty()) -
1394          static_cast<int>(lhs_chunk.empty());
1395 }
1396 
GetFirstChunk(const Cord & c)1397 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1398   return c.contents_.FindFlatStartPiece();
1399 }
GetFirstChunk(absl::string_view sv)1400 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1401   return sv;
1402 }
1403 
1404 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1405 // that 'size_to_compare' is greater that size of smallest of first chunks.
1406 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1407 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1408                           size_t size_to_compare) {
1409   absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1410   absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1411 
1412   size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1413   assert(size_to_compare >= compared_size);
1414   int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1415   if (compared_size == size_to_compare || memcmp_res != 0) {
1416     return ComputeCompareResult<ResultType>(memcmp_res);
1417   }
1418 
1419   return ComputeCompareResult<ResultType>(
1420       lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1421 }
1422 
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1423 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1424   return GenericCompare<bool>(*this, rhs, size_to_compare);
1425 }
1426 
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1427 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1428   return GenericCompare<bool>(*this, rhs, size_to_compare);
1429 }
1430 
1431 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1432 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1433   size_t lhs_size = lhs.size();
1434   size_t rhs_size = rhs.size();
1435   if (lhs_size == rhs_size) {
1436     return GenericCompare<int>(lhs, rhs, lhs_size);
1437   }
1438   if (lhs_size < rhs_size) {
1439     auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1440     return data_comp_res == 0 ? -1 : data_comp_res;
1441   }
1442 
1443   auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1444   return data_comp_res == 0 ? +1 : data_comp_res;
1445 }
1446 
Compare(absl::string_view rhs) const1447 int Cord::Compare(absl::string_view rhs) const {
1448   return SharedCompareImpl(*this, rhs);
1449 }
1450 
CompareImpl(const Cord & rhs) const1451 int Cord::CompareImpl(const Cord& rhs) const {
1452   return SharedCompareImpl(*this, rhs);
1453 }
1454 
EndsWith(absl::string_view rhs) const1455 bool Cord::EndsWith(absl::string_view rhs) const {
1456   size_t my_size = size();
1457   size_t rhs_size = rhs.size();
1458 
1459   if (my_size < rhs_size) return false;
1460 
1461   Cord tmp(*this);
1462   tmp.RemovePrefix(my_size - rhs_size);
1463   return tmp.EqualsImpl(rhs, rhs_size);
1464 }
1465 
EndsWith(const Cord & rhs) const1466 bool Cord::EndsWith(const Cord& rhs) const {
1467   size_t my_size = size();
1468   size_t rhs_size = rhs.size();
1469 
1470   if (my_size < rhs_size) return false;
1471 
1472   Cord tmp(*this);
1473   tmp.RemovePrefix(my_size - rhs_size);
1474   return tmp.EqualsImpl(rhs, rhs_size);
1475 }
1476 
1477 // --------------------------------------------------------------------
1478 // Misc.
1479 
operator std::string() const1480 Cord::operator std::string() const {
1481   std::string s;
1482   absl::CopyCordToString(*this, &s);
1483   return s;
1484 }
1485 
CopyCordToString(const Cord & src,std::string * dst)1486 void CopyCordToString(const Cord& src, std::string* dst) {
1487   if (!src.contents_.is_tree()) {
1488     src.contents_.CopyTo(dst);
1489   } else {
1490     absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1491     src.CopyToArraySlowPath(&(*dst)[0]);
1492   }
1493 }
1494 
CopyToArraySlowPath(char * dst) const1495 void Cord::CopyToArraySlowPath(char* dst) const {
1496   assert(contents_.is_tree());
1497   absl::string_view fragment;
1498   if (GetFlatAux(contents_.tree(), &fragment)) {
1499     memcpy(dst, fragment.data(), fragment.size());
1500     return;
1501   }
1502   for (absl::string_view chunk : Chunks()) {
1503     memcpy(dst, chunk.data(), chunk.size());
1504     dst += chunk.size();
1505   }
1506 }
1507 
operator ++()1508 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1509   assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`");
1510   assert(bytes_remaining_ >= current_chunk_.size());
1511   bytes_remaining_ -= current_chunk_.size();
1512 
1513   if (stack_of_right_children_.empty()) {
1514     assert(!current_chunk_.empty());  // Called on invalid iterator.
1515     // We have reached the end of the Cord.
1516     return *this;
1517   }
1518 
1519   // Process the next node on the stack.
1520   CordRep* node = stack_of_right_children_.back();
1521   stack_of_right_children_.pop_back();
1522 
1523   // Walk down the left branches until we hit a non-CONCAT node. Save the
1524   // right children to the stack for subsequent traversal.
1525   while (node->tag == CONCAT) {
1526     stack_of_right_children_.push_back(node->concat()->right);
1527     node = node->concat()->left;
1528   }
1529 
1530   // Get the child node if we encounter a SUBSTRING.
1531   size_t offset = 0;
1532   size_t length = node->length;
1533   if (node->tag == SUBSTRING) {
1534     offset = node->substring()->start;
1535     node = node->substring()->child;
1536   }
1537 
1538   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1539   assert(length != 0);
1540   const char* data =
1541       node->tag == EXTERNAL ? node->external()->base : node->data;
1542   current_chunk_ = absl::string_view(data + offset, length);
1543   current_leaf_ = node;
1544   return *this;
1545 }
1546 
AdvanceAndReadBytes(size_t n)1547 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1548   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1549   Cord subcord;
1550 
1551   if (n <= InlineRep::kMaxInline) {
1552     // Range to read fits in inline data. Flatten it.
1553     char* data = subcord.contents_.set_data(n);
1554     while (n > current_chunk_.size()) {
1555       memcpy(data, current_chunk_.data(), current_chunk_.size());
1556       data += current_chunk_.size();
1557       n -= current_chunk_.size();
1558       ++*this;
1559     }
1560     memcpy(data, current_chunk_.data(), n);
1561     if (n < current_chunk_.size()) {
1562       RemoveChunkPrefix(n);
1563     } else if (n > 0) {
1564       ++*this;
1565     }
1566     return subcord;
1567   }
1568   if (n < current_chunk_.size()) {
1569     // Range to read is a proper subrange of the current chunk.
1570     assert(current_leaf_ != nullptr);
1571     CordRep* subnode = Ref(current_leaf_);
1572     const char* data =
1573         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1574     subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1575     subcord.contents_.set_tree(VerifyTree(subnode));
1576     RemoveChunkPrefix(n);
1577     return subcord;
1578   }
1579 
1580   // Range to read begins with a proper subrange of the current chunk.
1581   assert(!current_chunk_.empty());
1582   assert(current_leaf_ != nullptr);
1583   CordRep* subnode = Ref(current_leaf_);
1584   if (current_chunk_.size() < subnode->length) {
1585     const char* data =
1586         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1587     subnode = NewSubstring(subnode, current_chunk_.data() - data,
1588                            current_chunk_.size());
1589   }
1590   n -= current_chunk_.size();
1591   bytes_remaining_ -= current_chunk_.size();
1592 
1593   // Process the next node(s) on the stack, reading whole subtrees depending on
1594   // their length and how many bytes we are advancing.
1595   CordRep* node = nullptr;
1596   while (!stack_of_right_children_.empty()) {
1597     node = stack_of_right_children_.back();
1598     stack_of_right_children_.pop_back();
1599     if (node->length > n) break;
1600     // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1601     // Avoiding that would need pretty complicated logic (instead of
1602     // current_leaf_, keep current_subtree_ which points to the highest node
1603     // such that the current leaf can be found on the path of left children
1604     // starting from current_subtree_; delay creating subnode while node is
1605     // below current_subtree_; find the proper node along the path of left
1606     // children starting from current_subtree_ if this loop exits while staying
1607     // below current_subtree_; etc.; alternatively, push parents instead of
1608     // right children on the stack).
1609     subnode = Concat(subnode, Ref(node));
1610     n -= node->length;
1611     bytes_remaining_ -= node->length;
1612     node = nullptr;
1613   }
1614 
1615   if (node == nullptr) {
1616     // We have reached the end of the Cord.
1617     assert(bytes_remaining_ == 0);
1618     subcord.contents_.set_tree(VerifyTree(subnode));
1619     return subcord;
1620   }
1621 
1622   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1623   // right children to the stack for subsequent traversal.
1624   while (node->tag == CONCAT) {
1625     if (node->concat()->left->length > n) {
1626       // Push right, descend left.
1627       stack_of_right_children_.push_back(node->concat()->right);
1628       node = node->concat()->left;
1629     } else {
1630       // Read left, descend right.
1631       subnode = Concat(subnode, Ref(node->concat()->left));
1632       n -= node->concat()->left->length;
1633       bytes_remaining_ -= node->concat()->left->length;
1634       node = node->concat()->right;
1635     }
1636   }
1637 
1638   // Get the child node if we encounter a SUBSTRING.
1639   size_t offset = 0;
1640   size_t length = node->length;
1641   if (node->tag == SUBSTRING) {
1642     offset = node->substring()->start;
1643     node = node->substring()->child;
1644   }
1645 
1646   // Range to read ends with a proper (possibly empty) subrange of the current
1647   // chunk.
1648   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1649   assert(length > n);
1650   if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1651   const char* data =
1652       node->tag == EXTERNAL ? node->external()->base : node->data;
1653   current_chunk_ = absl::string_view(data + offset + n, length - n);
1654   current_leaf_ = node;
1655   bytes_remaining_ -= n;
1656   subcord.contents_.set_tree(VerifyTree(subnode));
1657   return subcord;
1658 }
1659 
AdvanceBytesSlowPath(size_t n)1660 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1661   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1662   assert(n >= current_chunk_.size());  // This should only be called when
1663                                        // iterating to a new node.
1664 
1665   n -= current_chunk_.size();
1666   bytes_remaining_ -= current_chunk_.size();
1667 
1668   // Process the next node(s) on the stack, skipping whole subtrees depending on
1669   // their length and how many bytes we are advancing.
1670   CordRep* node = nullptr;
1671   while (!stack_of_right_children_.empty()) {
1672     node = stack_of_right_children_.back();
1673     stack_of_right_children_.pop_back();
1674     if (node->length > n) break;
1675     n -= node->length;
1676     bytes_remaining_ -= node->length;
1677     node = nullptr;
1678   }
1679 
1680   if (node == nullptr) {
1681     // We have reached the end of the Cord.
1682     assert(bytes_remaining_ == 0);
1683     return;
1684   }
1685 
1686   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1687   // right children to the stack for subsequent traversal.
1688   while (node->tag == CONCAT) {
1689     if (node->concat()->left->length > n) {
1690       // Push right, descend left.
1691       stack_of_right_children_.push_back(node->concat()->right);
1692       node = node->concat()->left;
1693     } else {
1694       // Skip left, descend right.
1695       n -= node->concat()->left->length;
1696       bytes_remaining_ -= node->concat()->left->length;
1697       node = node->concat()->right;
1698     }
1699   }
1700 
1701   // Get the child node if we encounter a SUBSTRING.
1702   size_t offset = 0;
1703   size_t length = node->length;
1704   if (node->tag == SUBSTRING) {
1705     offset = node->substring()->start;
1706     node = node->substring()->child;
1707   }
1708 
1709   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1710   assert(length > n);
1711   const char* data =
1712       node->tag == EXTERNAL ? node->external()->base : node->data;
1713   current_chunk_ = absl::string_view(data + offset + n, length - n);
1714   current_leaf_ = node;
1715   bytes_remaining_ -= n;
1716 }
1717 
operator [](size_t i) const1718 char Cord::operator[](size_t i) const {
1719   assert(i < size());
1720   size_t offset = i;
1721   const CordRep* rep = contents_.tree();
1722   if (rep == nullptr) {
1723     return contents_.data()[i];
1724   }
1725   while (true) {
1726     assert(rep != nullptr);
1727     assert(offset < rep->length);
1728     if (rep->tag >= FLAT) {
1729       // Get the "i"th character directly from the flat array.
1730       return rep->data[offset];
1731     } else if (rep->tag == EXTERNAL) {
1732       // Get the "i"th character from the external array.
1733       return rep->external()->base[offset];
1734     } else if (rep->tag == CONCAT) {
1735       // Recursively branch to the side of the concatenation that the "i"th
1736       // character is on.
1737       size_t left_length = rep->concat()->left->length;
1738       if (offset < left_length) {
1739         rep = rep->concat()->left;
1740       } else {
1741         offset -= left_length;
1742         rep = rep->concat()->right;
1743       }
1744     } else {
1745       // This must be a substring a node, so bypass it to get to the child.
1746       assert(rep->tag == SUBSTRING);
1747       offset += rep->substring()->start;
1748       rep = rep->substring()->child;
1749     }
1750   }
1751 }
1752 
FlattenSlowPath()1753 absl::string_view Cord::FlattenSlowPath() {
1754   size_t total_size = size();
1755   CordRep* new_rep;
1756   char* new_buffer;
1757 
1758   // Try to put the contents into a new flat rep. If they won't fit in the
1759   // biggest possible flat node, use an external rep instead.
1760   if (total_size <= kMaxFlatLength) {
1761     new_rep = NewFlat(total_size);
1762     new_rep->length = total_size;
1763     new_buffer = new_rep->data;
1764     CopyToArraySlowPath(new_buffer);
1765   } else {
1766     new_buffer = std::allocator<char>().allocate(total_size);
1767     CopyToArraySlowPath(new_buffer);
1768     new_rep = absl::cord_internal::NewExternalRep(
1769         absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1770           std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1771                                             s.size());
1772         });
1773   }
1774   Unref(contents_.tree());
1775   contents_.set_tree(new_rep);
1776   return absl::string_view(new_buffer, total_size);
1777 }
1778 
GetFlatAux(CordRep * rep,absl::string_view * fragment)1779 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1780   assert(rep != nullptr);
1781   if (rep->tag >= FLAT) {
1782     *fragment = absl::string_view(rep->data, rep->length);
1783     return true;
1784   } else if (rep->tag == EXTERNAL) {
1785     *fragment = absl::string_view(rep->external()->base, rep->length);
1786     return true;
1787   } else if (rep->tag == SUBSTRING) {
1788     CordRep* child = rep->substring()->child;
1789     if (child->tag >= FLAT) {
1790       *fragment =
1791           absl::string_view(child->data + rep->substring()->start, rep->length);
1792       return true;
1793     } else if (child->tag == EXTERNAL) {
1794       *fragment = absl::string_view(
1795           child->external()->base + rep->substring()->start, rep->length);
1796       return true;
1797     }
1798   }
1799   return false;
1800 }
1801 
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1802 /* static */ void Cord::ForEachChunkAux(
1803     absl::cord_internal::CordRep* rep,
1804     absl::FunctionRef<void(absl::string_view)> callback) {
1805   assert(rep != nullptr);
1806   int stack_pos = 0;
1807   constexpr int stack_max = 128;
1808   // Stack of right branches for tree traversal
1809   absl::cord_internal::CordRep* stack[stack_max];
1810   absl::cord_internal::CordRep* current_node = rep;
1811   while (true) {
1812     if (current_node->tag == CONCAT) {
1813       if (stack_pos == stack_max) {
1814         // There's no more room on our stack array to add another right branch,
1815         // and the idea is to avoid allocations, so call this function
1816         // recursively to navigate this subtree further.  (This is not something
1817         // we expect to happen in practice).
1818         ForEachChunkAux(current_node, callback);
1819 
1820         // Pop the next right branch and iterate.
1821         current_node = stack[--stack_pos];
1822         continue;
1823       } else {
1824         // Save the right branch for later traversal and continue down the left
1825         // branch.
1826         stack[stack_pos++] = current_node->concat()->right;
1827         current_node = current_node->concat()->left;
1828         continue;
1829       }
1830     }
1831     // This is a leaf node, so invoke our callback.
1832     absl::string_view chunk;
1833     bool success = GetFlatAux(current_node, &chunk);
1834     assert(success);
1835     if (success) {
1836       callback(chunk);
1837     }
1838     if (stack_pos == 0) {
1839       // end of traversal
1840       return;
1841     }
1842     current_node = stack[--stack_pos];
1843   }
1844 }
1845 
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1846 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1847   const int kIndentStep = 1;
1848   int indent = 0;
1849   absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1850   absl::InlinedVector<int, kInlinedVectorSize> indents;
1851   for (;;) {
1852     *os << std::setw(3) << rep->refcount.Get();
1853     *os << " " << std::setw(7) << rep->length;
1854     *os << " [";
1855     if (include_data) *os << static_cast<void*>(rep);
1856     *os << "]";
1857     *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1858     *os << " " << std::setw(indent) << "";
1859     if (rep->tag == CONCAT) {
1860       *os << "CONCAT depth=" << Depth(rep) << "\n";
1861       indent += kIndentStep;
1862       indents.push_back(indent);
1863       stack.push_back(rep->concat()->right);
1864       rep = rep->concat()->left;
1865     } else if (rep->tag == SUBSTRING) {
1866       *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1867       indent += kIndentStep;
1868       rep = rep->substring()->child;
1869     } else {  // Leaf
1870       if (rep->tag == EXTERNAL) {
1871         *os << "EXTERNAL [";
1872         if (include_data)
1873           *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1874         *os << "]\n";
1875       } else {
1876         *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1877         if (include_data)
1878           *os << absl::CEscape(std::string(rep->data, rep->length));
1879         *os << "]\n";
1880       }
1881       if (stack.empty()) break;
1882       rep = stack.back();
1883       stack.pop_back();
1884       indent = indents.back();
1885       indents.pop_back();
1886     }
1887   }
1888   ABSL_INTERNAL_CHECK(indents.empty(), "");
1889 }
1890 
ReportError(CordRep * root,CordRep * node)1891 static std::string ReportError(CordRep* root, CordRep* node) {
1892   std::ostringstream buf;
1893   buf << "Error at node " << node << " in:";
1894   DumpNode(root, true, &buf);
1895   return buf.str();
1896 }
1897 
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1898 static bool VerifyNode(CordRep* root, CordRep* start_node,
1899                        bool full_validation) {
1900   absl::InlinedVector<CordRep*, 2> worklist;
1901   worklist.push_back(start_node);
1902   do {
1903     CordRep* node = worklist.back();
1904     worklist.pop_back();
1905 
1906     ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1907     if (node != root) {
1908       ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1909     }
1910 
1911     if (node->tag == CONCAT) {
1912       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1913                           ReportError(root, node));
1914       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1915                           ReportError(root, node));
1916       ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1917                                                node->concat()->right->length),
1918                           ReportError(root, node));
1919       if (full_validation) {
1920         worklist.push_back(node->concat()->right);
1921         worklist.push_back(node->concat()->left);
1922       }
1923     } else if (node->tag >= FLAT) {
1924       ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1925                           ReportError(root, node));
1926     } else if (node->tag == EXTERNAL) {
1927       ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1928                           ReportError(root, node));
1929     } else if (node->tag == SUBSTRING) {
1930       ABSL_INTERNAL_CHECK(
1931           node->substring()->start < node->substring()->child->length,
1932           ReportError(root, node));
1933       ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1934                               node->substring()->child->length,
1935                           ReportError(root, node));
1936     }
1937   } while (!worklist.empty());
1938   return true;
1939 }
1940 
1941 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1942 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1943   size_t total_mem_usage = 0;
1944 
1945   // Allow a quick exit for the common case that the root is a leaf.
1946   if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1947     return total_mem_usage;
1948   }
1949 
1950   // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1951   // never be appended to tree_stack. This reduces overhead from manipulating
1952   // tree_stack.
1953   absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1954   const CordRep* cur_node = rep;
1955   while (true) {
1956     const CordRep* next_node = nullptr;
1957 
1958     if (cur_node->tag == CONCAT) {
1959       total_mem_usage += sizeof(CordRepConcat);
1960       const CordRep* left = cur_node->concat()->left;
1961       if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1962         next_node = left;
1963       }
1964 
1965       const CordRep* right = cur_node->concat()->right;
1966       if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1967         if (next_node) {
1968           tree_stack.push_back(next_node);
1969         }
1970         next_node = right;
1971       }
1972     } else {
1973       // Since cur_node is not a leaf or a concat node it must be a substring.
1974       assert(cur_node->tag == SUBSTRING);
1975       total_mem_usage += sizeof(CordRepSubstring);
1976       next_node = cur_node->substring()->child;
1977       if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1978         next_node = nullptr;
1979       }
1980     }
1981 
1982     if (!next_node) {
1983       if (tree_stack.empty()) {
1984         return total_mem_usage;
1985       }
1986       next_node = tree_stack.back();
1987       tree_stack.pop_back();
1988     }
1989     cur_node = next_node;
1990   }
1991 }
1992 
operator <<(std::ostream & out,const Cord & cord)1993 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1994   for (absl::string_view chunk : cord.Chunks()) {
1995     out.write(chunk.data(), chunk.size());
1996   }
1997   return out;
1998 }
1999 
2000 namespace strings_internal {
FlatOverhead()2001 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()2002 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)2003 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
2004   return TagToLength(tag);
2005 }
LengthToTag(size_t s)2006 uint8_t CordTestAccess::LengthToTag(size_t s) {
2007   ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
2008   return AllocatedSizeToTag(s + kFlatOverhead);
2009 }
SizeofCordRepConcat()2010 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()2011 size_t CordTestAccess::SizeofCordRepExternal() {
2012   return sizeof(CordRepExternal);
2013 }
SizeofCordRepSubstring()2014 size_t CordTestAccess::SizeofCordRepSubstring() {
2015   return sizeof(CordRepSubstring);
2016 }
2017 }  // namespace strings_internal
2018 ABSL_NAMESPACE_END
2019 }  // namespace absl
2020