• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17 
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23 
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33 
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36     (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37      defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40 
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42   ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43 
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47 
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte.  Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepBtree;
59 
60 class CordzInfo;
61 
62 // Default feature enable states for cord ring buffers
63 enum CordFeatureDefaults { kCordShallowSubcordsDefault = false };
64 
65 extern std::atomic<bool> shallow_subcords_enabled;
66 
enable_shallow_subcords(bool enable)67 inline void enable_shallow_subcords(bool enable) {
68   shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
69 }
70 
71 enum Constants {
72   // The inlined size to use with absl::InlinedVector.
73   //
74   // Note: The InlinedVectors in this file (and in cord.h) do not need to use
75   // the same value for their inlined size. The fact that they do is historical.
76   // It may be desirable for each to use a different inlined size optimized for
77   // that InlinedVector's usage.
78   //
79   // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
80   // the inlined vector size (47 exists for backward compatibility).
81   kInlinedVectorSize = 47,
82 
83   // Prefer copying blocks of at most this size, otherwise reference count.
84   kMaxBytesToCopy = 511
85 };
86 
87 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
88 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
89 
90 // Fast implementation of memmove for up to 15 bytes. This implementation is
91 // safe for overlapping regions. If nullify_tail is true, the destination is
92 // padded with '\0' up to 15 bytes.
93 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)94 inline void SmallMemmove(char* dst, const char* src, size_t n) {
95   if (n >= 8) {
96     assert(n <= 15);
97     uint64_t buf1;
98     uint64_t buf2;
99     memcpy(&buf1, src, 8);
100     memcpy(&buf2, src + n - 8, 8);
101     if (nullify_tail) {
102       memset(dst + 7, 0, 8);
103     }
104     // GCC 12 has a false-positive -Wstringop-overflow warning here.
105 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wstringop-overflow"
108 #endif
109     memcpy(dst, &buf1, 8);
110     memcpy(dst + n - 8, &buf2, 8);
111 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
112 #pragma GCC diagnostic pop
113 #endif
114   } else if (n >= 4) {
115     uint32_t buf1;
116     uint32_t buf2;
117     memcpy(&buf1, src, 4);
118     memcpy(&buf2, src + n - 4, 4);
119     if (nullify_tail) {
120       memset(dst + 4, 0, 4);
121       memset(dst + 7, 0, 8);
122     }
123     memcpy(dst, &buf1, 4);
124     memcpy(dst + n - 4, &buf2, 4);
125   } else {
126     if (n != 0) {
127       dst[0] = src[0];
128       dst[n / 2] = src[n / 2];
129       dst[n - 1] = src[n - 1];
130     }
131     if (nullify_tail) {
132       memset(dst + 7, 0, 8);
133       memset(dst + n, 0, 8);
134     }
135   }
136 }
137 
138 // Compact class for tracking the reference count and state flags for CordRep
139 // instances.  Data is stored in an atomic int32_t for compactness and speed.
140 class RefcountAndFlags {
141  public:
RefcountAndFlags()142   constexpr RefcountAndFlags() : count_{kRefIncrement} {}
143   struct Immortal {};
RefcountAndFlags(Immortal)144   explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
145 
146   // Increments the reference count. Imposes no memory ordering.
Increment()147   inline void Increment() {
148     count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
149   }
150 
151   // Asserts that the current refcount is greater than 0. If the refcount is
152   // greater than 1, decrements the reference count.
153   //
154   // Returns false if there are no references outstanding; true otherwise.
155   // Inserts barriers to ensure that state written before this method returns
156   // false will be visible to a thread that just observed this method returning
157   // false.  Always returns false when the immortal bit is set.
Decrement()158   inline bool Decrement() {
159     int32_t refcount = count_.load(std::memory_order_acquire);
160     assert(refcount > 0 || refcount & kImmortalFlag);
161     return refcount != kRefIncrement &&
162            count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
163                kRefIncrement;
164   }
165 
166   // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()167   inline bool DecrementExpectHighRefcount() {
168     int32_t refcount =
169         count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
170     assert(refcount > 0 || refcount & kImmortalFlag);
171     return refcount != kRefIncrement;
172   }
173 
174   // Returns the current reference count using acquire semantics.
Get()175   inline size_t Get() const {
176     return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
177                                kNumFlags);
178   }
179 
180   // Returns whether the atomic integer is 1.
181   // If the reference count is used in the conventional way, a
182   // reference count of 1 implies that the current thread owns the
183   // reference and no other thread shares it.
184   // This call performs the test for a reference count of one, and
185   // performs the memory barrier needed for the owning thread
186   // to act on the object, knowing that it has exclusive access to the
187   // object. Always returns false when the immortal bit is set.
IsOne()188   inline bool IsOne() {
189     return count_.load(std::memory_order_acquire) == kRefIncrement;
190   }
191 
IsImmortal()192   bool IsImmortal() const {
193     return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
194   }
195 
196  private:
197   // We reserve the bottom bit for flag.
198   // kImmortalBit indicates that this entity should never be collected; it is
199   // used for the StringConstant constructor to avoid collecting immutable
200   // constant cords.
201   enum Flags {
202     kNumFlags = 1,
203 
204     kImmortalFlag = 0x1,
205     kRefIncrement = (1 << kNumFlags),
206   };
207 
208   std::atomic<int32_t> count_;
209 };
210 
211 // Various representations that we allow
212 enum CordRepKind {
213   UNUSED_0 = 0,
214   SUBSTRING = 1,
215   CRC = 2,
216   BTREE = 3,
217   UNUSED_4 = 4,
218   EXTERNAL = 5,
219 
220   // We have different tags for different sized flat arrays,
221   // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
222   // allocated range of 32 bytes to 256 KB. The current granularity is:
223   // - 8 byte granularity for flat sizes in [32 - 512]
224   // - 64 byte granularity for flat sizes in (512 - 8KiB]
225   // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
226   // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
227   // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
228   // represents the minimum flat allocation size. (32 bytes as of now).
229   FLAT = 6,
230   MAX_FLAT_TAG = 248
231 };
232 
233 // There are various locations where we want to check if some rep is a 'plain'
234 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
235 // can perform this check in a single branch as 'tag >= EXTERNAL'
236 // Note that we can leave this optimization to the compiler. The compiler will
237 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
238 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
239 
240 struct CordRep {
241   // Result from an `extract edge` operation. Contains the (possibly changed)
242   // tree node as well as the extracted edge, or {tree, nullptr} if no edge
243   // could be extracted.
244   // On success, the returned `tree` value is null if `extracted` was the only
245   // data edge inside the tree, a data edge if there were only two data edges in
246   // the tree, or the (possibly new / smaller) remaining tree with the extracted
247   // data edge removed.
248   struct ExtractResult {
249     CordRep* tree;
250     CordRep* extracted;
251   };
252 
253   CordRep() = default;
CordRepCordRep254   constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
255       : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
256 
257   // The following three fields have to be less than 32 bytes since
258   // that is the smallest supported flat node size. Some code optimizations rely
259   // on the specific layout of these fields. Notably: the non-trivial field
260   // `refcount` being preceded by `length`, and being tailed by POD data
261   // members only.
262   // # LINT.IfChange
263   size_t length;
264   RefcountAndFlags refcount;
265   // If tag < FLAT, it represents CordRepKind and indicates the type of node.
266   // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
267   uint8_t tag;
268 
269   // `storage` provides two main purposes:
270   // - the starting point for FlatCordRep.Data() [flexible-array-member]
271   // - 3 bytes of additional storage for use by derived classes.
272   // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
273   // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
274   // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
275   // allocate room for these in the derived class, as not all compilers reuse
276   // padding space from the base class (clang and gcc do, MSVC does not, etc)
277   uint8_t storage[3];
278   // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
279 
280   // Returns true if this instance's tag matches the requested type.
IsSubstringCordRep281   constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep282   constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep283   constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep284   constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep285   constexpr bool IsBtree() const { return tag == BTREE; }
286 
287   inline CordRepSubstring* substring();
288   inline const CordRepSubstring* substring() const;
289   inline CordRepCrc* crc();
290   inline const CordRepCrc* crc() const;
291   inline CordRepExternal* external();
292   inline const CordRepExternal* external() const;
293   inline CordRepFlat* flat();
294   inline const CordRepFlat* flat() const;
295   inline CordRepBtree* btree();
296   inline const CordRepBtree* btree() const;
297 
298   // --------------------------------------------------------------------
299   // Memory management
300 
301   // Destroys the provided `rep`.
302   static void Destroy(CordRep* rep);
303 
304   // Increments the reference count of `rep`.
305   // Requires `rep` to be a non-null pointer value.
306   static inline CordRep* Ref(CordRep* rep);
307 
308   // Decrements the reference count of `rep`. Destroys rep if count reaches
309   // zero. Requires `rep` to be a non-null pointer value.
310   static inline void Unref(CordRep* rep);
311 };
312 
313 struct CordRepSubstring : public CordRep {
314   size_t start;  // Starting offset of substring in child
315   CordRep* child;
316 
317   // Creates a substring on `child`, adopting a reference on `child`.
318   // Requires `child` to be either a flat or external node, and `pos` and `n` to
319   // form a non-empty partial sub range of `'child`, i.e.:
320   // `n > 0 && n < length && n + pos <= length`
321   static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
322 
323   // Creates a substring of `rep`. Does not adopt a reference on `rep`.
324   // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
325   // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
326   // If `rep` is a substring of a flat or external node, then this method will
327   // return a new substring of that flat or external node with `pos` adjusted
328   // with the original `start` position.
329   static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
330 };
331 
332 // Type for function pointer that will invoke the releaser function and also
333 // delete the `CordRepExternalImpl` corresponding to the passed in
334 // `CordRepExternal`.
335 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
336 
337 // External CordReps are allocated together with a type erased releaser. The
338 // releaser is stored in the memory directly following the CordRepExternal.
339 struct CordRepExternal : public CordRep {
340   CordRepExternal() = default;
CordRepExternalCordRepExternal341   explicit constexpr CordRepExternal(absl::string_view str)
342       : CordRep(RefcountAndFlags::Immortal{}, str.size()),
343         base(str.data()),
344         releaser_invoker(nullptr) {}
345 
346   const char* base;
347   // Pointer to function that knows how to call and destroy the releaser.
348   ExternalReleaserInvoker releaser_invoker;
349 
350   // Deletes (releases) the external rep.
351   // Requires rep != nullptr and rep->IsExternal()
352   static void Delete(CordRep* rep);
353 };
354 
355 struct Rank1 {};
356 struct Rank0 : Rank1 {};
357 
358 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
359                                  Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)360 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
361   ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
362 }
363 
364 template <typename Releaser,
365           typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)366 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
367   ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
368 }
369 
370 // We use CompressedTuple so that we can benefit from EBCO.
371 template <typename Releaser>
372 struct CordRepExternalImpl
373     : public CordRepExternal,
374       public ::absl::container_internal::CompressedTuple<Releaser> {
375   // The extra int arg is so that we can avoid interfering with copy/move
376   // constructors while still benefitting from perfect forwarding.
377   template <typename T>
CordRepExternalImplCordRepExternalImpl378   CordRepExternalImpl(T&& releaser, int)
379       : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
380     this->releaser_invoker = &Release;
381   }
382 
~CordRepExternalImplCordRepExternalImpl383   ~CordRepExternalImpl() {
384     InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
385                    absl::string_view(base, length));
386   }
387 
ReleaseCordRepExternalImpl388   static void Release(CordRepExternal* rep) {
389     delete static_cast<CordRepExternalImpl*>(rep);
390   }
391 };
392 
Create(CordRep * child,size_t pos,size_t n)393 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
394                                                   size_t n) {
395   assert(child != nullptr);
396   assert(n > 0);
397   assert(n < child->length);
398   assert(pos < child->length);
399   assert(n <= child->length - pos);
400 
401   // TODO(b/217376272): Harden internal logic.
402   // Move to strategical places inside the Cord logic and make this an assert.
403   if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
404     LogFatalNodeType(child);
405   }
406 
407   CordRepSubstring* rep = new CordRepSubstring();
408   rep->length = n;
409   rep->tag = SUBSTRING;
410   rep->start = pos;
411   rep->child = child;
412   return rep;
413 }
414 
Substring(CordRep * rep,size_t pos,size_t n)415 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
416                                             size_t n) {
417   assert(rep != nullptr);
418   assert(n != 0);
419   assert(pos < rep->length);
420   assert(n <= rep->length - pos);
421   if (n == rep->length) return CordRep::Ref(rep);
422   if (rep->IsSubstring()) {
423     pos += rep->substring()->start;
424     rep = rep->substring()->child;
425   }
426   CordRepSubstring* substr = new CordRepSubstring();
427   substr->length = n;
428   substr->tag = SUBSTRING;
429   substr->start = pos;
430   substr->child = CordRep::Ref(rep);
431   return substr;
432 }
433 
Delete(CordRep * rep)434 inline void CordRepExternal::Delete(CordRep* rep) {
435   assert(rep != nullptr && rep->IsExternal());
436   auto* rep_external = static_cast<CordRepExternal*>(rep);
437   assert(rep_external->releaser_invoker != nullptr);
438   rep_external->releaser_invoker(rep_external);
439 }
440 
441 template <typename Str>
442 struct ConstInitExternalStorage {
443   ABSL_CONST_INIT static CordRepExternal value;
444 };
445 
446 template <typename Str>
447 ABSL_CONST_INIT CordRepExternal
448     ConstInitExternalStorage<Str>::value(Str::value);
449 
450 enum {
451   kMaxInline = 15,
452 };
453 
GetOrNull(absl::string_view data,size_t pos)454 constexpr char GetOrNull(absl::string_view data, size_t pos) {
455   return pos < data.size() ? data[pos] : '\0';
456 }
457 
458 // We store cordz_info as 64 bit pointer value in little endian format. This
459 // guarantees that the least significant byte of cordz_info matches the first
460 // byte of the inline data representation in `data`, which holds the inlined
461 // size or the 'is_tree' bit.
462 using cordz_info_t = int64_t;
463 
464 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
465 // of `data` and can hold a pointer value.
466 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
467 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
468 
469 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
470 // a little endian value where the first byte in the host's representation
471 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)472 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
473 #if defined(ABSL_IS_BIG_ENDIAN)
474   return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
475 #else
476   return value;
477 #endif
478 }
479 
480 class InlineData {
481  public:
482   // DefaultInitType forces the use of the default initialization constructor.
483   enum DefaultInitType { kDefaultInit };
484 
485   // kNullCordzInfo holds the little endian representation of intptr_t(1)
486   // This is the 'null' / initial value of 'cordz_info'. The null value
487   // is specifically big endian 1 as with 64-bit pointers, the last
488   // byte of cordz_info overlaps with the last byte holding the tag.
489   static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
490 
491   // kTagOffset contains the offset of the control byte / tag. This constant is
492   // intended mostly for debugging purposes: do not remove this constant as it
493   // is actively inspected and used by gdb pretty printing code.
494   static constexpr size_t kTagOffset = 0;
495 
496   // Implement `~InlineData()` conditionally: we only need this destructor to
497   // unpoison poisoned instances under *SAN, and it will only compile correctly
498   // if the current compiler supports `absl::is_constant_evaluated()`.
499 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()500   ~InlineData() noexcept { unpoison(); }
501 #endif
502 
InlineData()503   constexpr InlineData() noexcept { poison_this(); }
504 
InlineData(DefaultInitType)505   explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
506     poison_this();
507   }
508 
InlineData(CordRep * rep)509   explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
510     ABSL_ASSERT(rep != nullptr);
511   }
512 
513   // Explicit constexpr constructor to create a constexpr InlineData
514   // value. Creates an inlined SSO value if `rep` is null, otherwise
515   // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)516   constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
517       : rep_(rep ? Rep(rep) : Rep(sv)) {
518     poison();
519   }
520 
521   constexpr InlineData(const InlineData& rhs) noexcept;
522   InlineData& operator=(const InlineData& rhs) noexcept;
523 
524   friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
525 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
526     const Rep l = lhs.rep_.SanitizerSafeCopy();
527     const Rep r = rhs.rep_.SanitizerSafeCopy();
528     return memcmp(&l, &r, sizeof(l)) == 0;
529 #else
530     return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
531 #endif
532   }
533   friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
534     return !operator==(lhs, rhs);
535   }
536 
537   // Poisons the unused inlined SSO data if the current instance
538   // is inlined, else un-poisons the entire instance.
539   constexpr void poison();
540 
541   // Un-poisons this instance.
542   constexpr void unpoison();
543 
544   // Poisons the current instance. This is used on default initialization.
545   constexpr void poison_this();
546 
547   // Returns true if the current instance is empty.
548   // The 'empty value' is an inlined data value of zero length.
is_empty()549   bool is_empty() const { return rep_.tag() == 0; }
550 
551   // Returns true if the current instance holds a tree value.
is_tree()552   bool is_tree() const { return (rep_.tag() & 1) != 0; }
553 
554   // Returns true if the current instance holds a cordz_info value.
555   // Requires the current instance to hold a tree value.
is_profiled()556   bool is_profiled() const {
557     assert(is_tree());
558     return rep_.cordz_info() != kNullCordzInfo;
559   }
560 
561   // Returns true if either of the provided instances hold a cordz_info value.
562   // This method is more efficient than the equivalent `data1.is_profiled() ||
563   // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)564   static bool is_either_profiled(const InlineData& data1,
565                                  const InlineData& data2) {
566     assert(data1.is_tree() && data2.is_tree());
567     return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
568            kNullCordzInfo;
569   }
570 
571   // Returns the cordz_info sampling instance for this instance, or nullptr
572   // if the current instance is not sampled and does not have CordzInfo data.
573   // Requires the current instance to hold a tree value.
cordz_info()574   CordzInfo* cordz_info() const {
575     assert(is_tree());
576     intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
577         static_cast<uint64_t>(rep_.cordz_info())));
578     assert(info & 1);
579     return reinterpret_cast<CordzInfo*>(info - 1);
580   }
581 
582   // Sets the current cordz_info sampling instance for this instance, or nullptr
583   // if the current instance is not sampled and does not have CordzInfo data.
584   // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)585   void set_cordz_info(CordzInfo* cordz_info) {
586     assert(is_tree());
587     uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
588     rep_.set_cordz_info(
589         static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
590   }
591 
592   // Resets the current cordz_info to null / empty.
clear_cordz_info()593   void clear_cordz_info() {
594     assert(is_tree());
595     rep_.set_cordz_info(kNullCordzInfo);
596   }
597 
598   // Returns a read only pointer to the character data inside this instance.
599   // Requires the current instance to hold inline data.
as_chars()600   const char* as_chars() const {
601     assert(!is_tree());
602     return rep_.as_chars();
603   }
604 
605   // Returns a mutable pointer to the character data inside this instance.
606   // Should be used for 'write only' operations setting an inlined value.
607   // Applications can set the value of inlined data either before or after
608   // setting the inlined size, i.e., both of the below are valid:
609   //
610   //   // Set inlined data and inline size
611   //   memcpy(data_.as_chars(), data, size);
612   //   data_.set_inline_size(size);
613   //
614   //   // Set inlined size and inline data
615   //   data_.set_inline_size(size);
616   //   memcpy(data_.as_chars(), data, size);
617   //
618   // It's an error to read from the returned pointer without a preceding write
619   // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()620   char* as_chars() { return rep_.as_chars(); }
621 
622   // Returns the tree value of this value.
623   // Requires the current instance to hold a tree value.
as_tree()624   CordRep* as_tree() const {
625     assert(is_tree());
626     return rep_.tree();
627   }
628 
set_inline_data(const char * data,size_t n)629   void set_inline_data(const char* data, size_t n) {
630     ABSL_ASSERT(n <= kMaxInline);
631     unpoison();
632     rep_.set_tag(static_cast<int8_t>(n << 1));
633     SmallMemmove<true>(rep_.as_chars(), data, n);
634     poison();
635   }
636 
copy_max_inline_to(char * dst)637   void copy_max_inline_to(char* dst) const {
638     assert(!is_tree());
639     memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
640   }
641 
642   // Initialize this instance to holding the tree value `rep`,
643   // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)644   void make_tree(CordRep* rep) {
645     unpoison();
646     rep_.make_tree(rep);
647   }
648 
649   // Set the tree value of this instance to 'rep`.
650   // Requires the current instance to already hold a tree value.
651   // Does not affect the value of cordz_info.
set_tree(CordRep * rep)652   void set_tree(CordRep* rep) {
653     assert(is_tree());
654     rep_.set_tree(rep);
655   }
656 
657   // Returns the size of the inlined character data inside this instance.
658   // Requires the current instance to hold inline data.
inline_size()659   size_t inline_size() const { return rep_.inline_size(); }
660 
661   // Sets the size of the inlined character data inside this instance.
662   // Requires `size` to be <= kMaxInline.
663   // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)664   void set_inline_size(size_t size) {
665     unpoison();
666     rep_.set_inline_size(size);
667     poison();
668   }
669 
670   // Compares 'this' inlined data  with rhs. The comparison is a straightforward
671   // lexicographic comparison. `Compare()` returns values as follows:
672   //
673   //   -1  'this' InlineData instance is smaller
674   //    0  the InlineData instances are equal
675   //    1  'this' InlineData instance larger
Compare(const InlineData & rhs)676   int Compare(const InlineData& rhs) const {
677     return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
678   }
679 
680  private:
681   struct Rep {
682     // See cordz_info_t for forced alignment and size of `cordz_info` details.
683     struct AsTree {
AsTreeRep::AsTree684       explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
685           : rep(tree) {}
686       cordz_info_t cordz_info = kNullCordzInfo;
687       absl::cord_internal::CordRep* rep;
688     };
689 
RepRep690     explicit Rep(DefaultInitType) {}
RepRep691     constexpr Rep() : data{0} {}
692     constexpr Rep(const Rep&) = default;
693     constexpr Rep& operator=(const Rep&) = default;
694 
RepRep695     explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
696 
RepRep697     explicit constexpr Rep(absl::string_view chars)
698         : data{static_cast<char>((chars.size() << 1)),
699                GetOrNull(chars, 0),
700                GetOrNull(chars, 1),
701                GetOrNull(chars, 2),
702                GetOrNull(chars, 3),
703                GetOrNull(chars, 4),
704                GetOrNull(chars, 5),
705                GetOrNull(chars, 6),
706                GetOrNull(chars, 7),
707                GetOrNull(chars, 8),
708                GetOrNull(chars, 9),
709                GetOrNull(chars, 10),
710                GetOrNull(chars, 11),
711                GetOrNull(chars, 12),
712                GetOrNull(chars, 13),
713                GetOrNull(chars, 14)} {}
714 
715     // Disable sanitizer as we must always be able to read `tag`.
716     ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep717     int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep718     void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
719 
as_charsRep720     char* as_chars() { return data + 1; }
as_charsRep721     const char* as_chars() const { return data + 1; }
722 
is_treeRep723     bool is_tree() const { return (tag() & 1) != 0; }
724 
inline_sizeRep725     size_t inline_size() const {
726       ABSL_ASSERT(!is_tree());
727       return static_cast<size_t>(tag()) >> 1;
728     }
729 
set_inline_sizeRep730     void set_inline_size(size_t size) {
731       ABSL_ASSERT(size <= kMaxInline);
732       set_tag(static_cast<int8_t>(size << 1));
733     }
734 
treeRep735     CordRep* tree() const { return as_tree.rep; }
set_treeRep736     void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
737 
cordz_infoRep738     cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep739     void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
740 
make_treeRep741     void make_tree(CordRep* tree) {
742       as_tree.rep = tree;
743       as_tree.cordz_info = kNullCordzInfo;
744     }
745 
746 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep747     constexpr Rep SanitizerSafeCopy() const {
748       if (!absl::is_constant_evaluated()) {
749         Rep res;
750         if (is_tree()) {
751           res = *this;
752         } else {
753           res.set_tag(tag());
754           memcpy(res.as_chars(), as_chars(), inline_size());
755         }
756         return res;
757       } else {
758         return *this;
759       }
760     }
761 #else
SanitizerSafeCopyRep762     constexpr const Rep& SanitizerSafeCopy() const { return *this; }
763 #endif
764 
765     // If the data has length <= kMaxInline, we store it in `data`, and
766     // store the size in the first char of `data` shifted left + 1.
767     // Else we store it in a tree and store a pointer to that tree in
768     // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
769     union {
770       char data[kMaxInline + 1];
771       AsTree as_tree;
772     };
773   };
774 
775   // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)776   static inline int Compare(const Rep& lhs, const Rep& rhs) {
777     uint64_t x, y;
778     memcpy(&x, lhs.as_chars(), sizeof(x));
779     memcpy(&y, rhs.as_chars(), sizeof(y));
780     if (x == y) {
781       memcpy(&x, lhs.as_chars() + 7, sizeof(x));
782       memcpy(&y, rhs.as_chars() + 7, sizeof(y));
783       if (x == y) {
784         if (lhs.inline_size() == rhs.inline_size()) return 0;
785         return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
786       }
787     }
788     x = absl::big_endian::FromHost64(x);
789     y = absl::big_endian::FromHost64(y);
790     return x < y ? -1 : 1;
791   }
792 
793   Rep rep_;
794 };
795 
796 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
797 
798 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
799 
InlineData(const InlineData & rhs)800 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
801     : rep_(rhs.rep_.SanitizerSafeCopy()) {
802   poison();
803 }
804 
805 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
806   unpoison();
807   rep_ = rhs.rep_.SanitizerSafeCopy();
808   poison();
809   return *this;
810 }
811 
poison_this()812 constexpr void InlineData::poison_this() {
813   if (!absl::is_constant_evaluated()) {
814     container_internal::SanitizerPoisonObject(this);
815   }
816 }
817 
unpoison()818 constexpr void InlineData::unpoison() {
819   if (!absl::is_constant_evaluated()) {
820     container_internal::SanitizerUnpoisonObject(this);
821   }
822 }
823 
poison()824 constexpr void InlineData::poison() {
825   if (!absl::is_constant_evaluated()) {
826     if (is_tree()) {
827       container_internal::SanitizerUnpoisonObject(this);
828     } else if (const size_t size = inline_size()) {
829       if (size < kMaxInline) {
830         const char* end = rep_.as_chars() + size;
831         container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
832       }
833     } else {
834       container_internal::SanitizerPoisonObject(this);
835     }
836   }
837 }
838 
839 #else  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
840 
841 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
842 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
843 
poison_this()844 constexpr void InlineData::poison_this() {}
unpoison()845 constexpr void InlineData::unpoison() {}
poison()846 constexpr void InlineData::poison() {}
847 
848 #endif  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
849 
substring()850 inline CordRepSubstring* CordRep::substring() {
851   assert(IsSubstring());
852   return static_cast<CordRepSubstring*>(this);
853 }
854 
substring()855 inline const CordRepSubstring* CordRep::substring() const {
856   assert(IsSubstring());
857   return static_cast<const CordRepSubstring*>(this);
858 }
859 
external()860 inline CordRepExternal* CordRep::external() {
861   assert(IsExternal());
862   return static_cast<CordRepExternal*>(this);
863 }
864 
external()865 inline const CordRepExternal* CordRep::external() const {
866   assert(IsExternal());
867   return static_cast<const CordRepExternal*>(this);
868 }
869 
Ref(CordRep * rep)870 inline CordRep* CordRep::Ref(CordRep* rep) {
871   // ABSL_ASSUME is a workaround for
872   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
873   ABSL_ASSUME(rep != nullptr);
874   rep->refcount.Increment();
875   return rep;
876 }
877 
Unref(CordRep * rep)878 inline void CordRep::Unref(CordRep* rep) {
879   assert(rep != nullptr);
880   // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
881   // typically outweigh the cost of an extra branch checking for ref == 1.
882   if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
883     Destroy(rep);
884   }
885 }
886 
887 }  // namespace cord_internal
888 
889 ABSL_NAMESPACE_END
890 }  // namespace absl
891 #endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
892