• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17 
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23 
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33 
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36     (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37      defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40 
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42   ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43 
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47 
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte.  Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepRing;
59 class CordRepBtree;
60 
61 class CordzInfo;
62 
63 // Default feature enable states for cord ring buffers
64 enum CordFeatureDefaults {
65   kCordEnableRingBufferDefault = false,
66   kCordShallowSubcordsDefault = false
67 };
68 
69 extern std::atomic<bool> cord_ring_buffer_enabled;
70 extern std::atomic<bool> shallow_subcords_enabled;
71 
enable_cord_ring_buffer(bool enable)72 inline void enable_cord_ring_buffer(bool enable) {
73   cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
74 }
75 
enable_shallow_subcords(bool enable)76 inline void enable_shallow_subcords(bool enable) {
77   shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
78 }
79 
80 enum Constants {
81   // The inlined size to use with absl::InlinedVector.
82   //
83   // Note: The InlinedVectors in this file (and in cord.h) do not need to use
84   // the same value for their inlined size. The fact that they do is historical.
85   // It may be desirable for each to use a different inlined size optimized for
86   // that InlinedVector's usage.
87   //
88   // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
89   // the inlined vector size (47 exists for backward compatibility).
90   kInlinedVectorSize = 47,
91 
92   // Prefer copying blocks of at most this size, otherwise reference count.
93   kMaxBytesToCopy = 511
94 };
95 
96 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
97 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
98 
99 // Fast implementation of memmove for up to 15 bytes. This implementation is
100 // safe for overlapping regions. If nullify_tail is true, the destination is
101 // padded with '\0' up to 15 bytes.
102 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)103 inline void SmallMemmove(char* dst, const char* src, size_t n) {
104   if (n >= 8) {
105     assert(n <= 15);
106     uint64_t buf1;
107     uint64_t buf2;
108     memcpy(&buf1, src, 8);
109     memcpy(&buf2, src + n - 8, 8);
110     if (nullify_tail) {
111       memset(dst + 7, 0, 8);
112     }
113     memcpy(dst, &buf1, 8);
114     memcpy(dst + n - 8, &buf2, 8);
115   } else if (n >= 4) {
116     uint32_t buf1;
117     uint32_t buf2;
118     memcpy(&buf1, src, 4);
119     memcpy(&buf2, src + n - 4, 4);
120     if (nullify_tail) {
121       memset(dst + 4, 0, 4);
122       memset(dst + 7, 0, 8);
123     }
124     memcpy(dst, &buf1, 4);
125     memcpy(dst + n - 4, &buf2, 4);
126   } else {
127     if (n != 0) {
128       dst[0] = src[0];
129       dst[n / 2] = src[n / 2];
130       dst[n - 1] = src[n - 1];
131     }
132     if (nullify_tail) {
133       memset(dst + 7, 0, 8);
134       memset(dst + n, 0, 8);
135     }
136   }
137 }
138 
139 // Compact class for tracking the reference count and state flags for CordRep
140 // instances.  Data is stored in an atomic int32_t for compactness and speed.
141 class RefcountAndFlags {
142  public:
RefcountAndFlags()143   constexpr RefcountAndFlags() : count_{kRefIncrement} {}
144   struct Immortal {};
RefcountAndFlags(Immortal)145   explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
146 
147   // Increments the reference count. Imposes no memory ordering.
Increment()148   inline void Increment() {
149     count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
150   }
151 
152   // Asserts that the current refcount is greater than 0. If the refcount is
153   // greater than 1, decrements the reference count.
154   //
155   // Returns false if there are no references outstanding; true otherwise.
156   // Inserts barriers to ensure that state written before this method returns
157   // false will be visible to a thread that just observed this method returning
158   // false.  Always returns false when the immortal bit is set.
Decrement()159   inline bool Decrement() {
160     int32_t refcount = count_.load(std::memory_order_acquire);
161     assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
162     return refcount != kRefIncrement &&
163            (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
164             kHighRefcountMask) != 0;
165   }
166 
167   // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()168   inline bool DecrementExpectHighRefcount() {
169     int32_t refcount =
170         count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
171     assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
172     return (refcount & kHighRefcountMask) != 0;
173   }
174 
175   // Returns the current reference count using acquire semantics.
Get()176   inline size_t Get() const {
177     return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
178                                kNumFlags);
179   }
180 
181   // Returns whether the atomic integer is 1.
182   // If the reference count is used in the conventional way, a
183   // reference count of 1 implies that the current thread owns the
184   // reference and no other thread shares it.
185   // This call performs the test for a reference count of one, and
186   // performs the memory barrier needed for the owning thread
187   // to act on the object, knowing that it has exclusive access to the
188   // object.  Always returns false when the immortal bit is set.
IsOne()189   inline bool IsOne() {
190     return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
191            kRefIncrement;
192   }
193 
IsImmortal()194   bool IsImmortal() const {
195     return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
196   }
197 
198  private:
199   // We reserve the bottom bits for flags.
200   // kImmortalBit indicates that this entity should never be collected; it is
201   // used for the StringConstant constructor to avoid collecting immutable
202   // constant cords.
203   // kReservedFlag is reserved for future use.
204   enum Flags {
205     kNumFlags = 2,
206 
207     kImmortalFlag = 0x1,
208     kReservedFlag = 0x2,
209     kRefIncrement = (1 << kNumFlags),
210 
211     // Bitmask to use when checking refcount by equality.  This masks out
212     // all flags except kImmortalFlag, which is part of the refcount for
213     // purposes of equality.  (A refcount of 0 or 1 does not count as 0 or 1
214     // if the immortal bit is set.)
215     kRefcountMask = ~kReservedFlag,
216 
217     // Bitmask to use when checking if refcount is equal to 1 and not
218     // immortal when decrementing the refcount. This masks out kRefIncrement and
219     // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we
220     // assume the refcount is equal to 1, since we know it's not immortal and
221     // not greater than 1. If the masked RefcountAndFlags is not 0, we can
222     // assume the refcount is not equal to 1 since either a higher bit in the
223     // refcount is set, or kImmortal is set.
224     kHighRefcountMask = kRefcountMask & ~kRefIncrement,
225   };
226 
227   std::atomic<int32_t> count_;
228 };
229 
230 // Various representations that we allow
231 enum CordRepKind {
232   UNUSED_0 = 0,
233   SUBSTRING = 1,
234   CRC = 2,
235   BTREE = 3,
236   RING = 4,
237   EXTERNAL = 5,
238 
239   // We have different tags for different sized flat arrays,
240   // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
241   // allocated range of 32 bytes to 256 KB. The current granularity is:
242   // - 8 byte granularity for flat sizes in [32 - 512]
243   // - 64 byte granularity for flat sizes in (512 - 8KiB]
244   // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
245   // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
246   // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
247   // represents the minimum flat allocation size. (32 bytes as of now).
248   FLAT = 6,
249   MAX_FLAT_TAG = 248
250 };
251 
252 // There are various locations where we want to check if some rep is a 'plain'
253 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
254 // can perform this check in a single branch as 'tag >= EXTERNAL'
255 // Likewise, we have some locations where we check for 'ring or external/flat',
256 // so likewise align RING to EXTERNAL.
257 // Note that we can leave this optimization to the compiler. The compiler will
258 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
259 static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
260 static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
261 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
262 
263 struct CordRep {
264   // Result from an `extract edge` operation. Contains the (possibly changed)
265   // tree node as well as the extracted edge, or {tree, nullptr} if no edge
266   // could be extracted.
267   // On success, the returned `tree` value is null if `extracted` was the only
268   // data edge inside the tree, a data edge if there were only two data edges in
269   // the tree, or the (possibly new / smaller) remaining tree with the extracted
270   // data edge removed.
271   struct ExtractResult {
272     CordRep* tree;
273     CordRep* extracted;
274   };
275 
276   CordRep() = default;
CordRepCordRep277   constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
278       : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
279 
280   // The following three fields have to be less than 32 bytes since
281   // that is the smallest supported flat node size. Some code optimizations rely
282   // on the specific layout of these fields. Notably: the non-trivial field
283   // `refcount` being preceded by `length`, and being tailed by POD data
284   // members only.
285   // # LINT.IfChange
286   size_t length;
287   RefcountAndFlags refcount;
288   // If tag < FLAT, it represents CordRepKind and indicates the type of node.
289   // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
290   uint8_t tag;
291 
292   // `storage` provides two main purposes:
293   // - the starting point for FlatCordRep.Data() [flexible-array-member]
294   // - 3 bytes of additional storage for use by derived classes.
295   // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
296   // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
297   // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
298   // allocate room for these in the derived class, as not all compilers reuse
299   // padding space from the base class (clang and gcc do, MSVC does not, etc)
300   uint8_t storage[3];
301   // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
302 
303   // Returns true if this instance's tag matches the requested type.
IsRingCordRep304   constexpr bool IsRing() const { return tag == RING; }
IsSubstringCordRep305   constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep306   constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep307   constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep308   constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep309   constexpr bool IsBtree() const { return tag == BTREE; }
310 
311   inline CordRepRing* ring();
312   inline const CordRepRing* ring() const;
313   inline CordRepSubstring* substring();
314   inline const CordRepSubstring* substring() const;
315   inline CordRepCrc* crc();
316   inline const CordRepCrc* crc() const;
317   inline CordRepExternal* external();
318   inline const CordRepExternal* external() const;
319   inline CordRepFlat* flat();
320   inline const CordRepFlat* flat() const;
321   inline CordRepBtree* btree();
322   inline const CordRepBtree* btree() const;
323 
324   // --------------------------------------------------------------------
325   // Memory management
326 
327   // Destroys the provided `rep`.
328   static void Destroy(CordRep* rep);
329 
330   // Increments the reference count of `rep`.
331   // Requires `rep` to be a non-null pointer value.
332   static inline CordRep* Ref(CordRep* rep);
333 
334   // Decrements the reference count of `rep`. Destroys rep if count reaches
335   // zero. Requires `rep` to be a non-null pointer value.
336   static inline void Unref(CordRep* rep);
337 };
338 
339 struct CordRepSubstring : public CordRep {
340   size_t start;  // Starting offset of substring in child
341   CordRep* child;
342 
343   // Creates a substring on `child`, adopting a reference on `child`.
344   // Requires `child` to be either a flat or external node, and `pos` and `n` to
345   // form a non-empty partial sub range of `'child`, i.e.:
346   // `n > 0 && n < length && n + pos <= length`
347   static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
348 
349   // Creates a substring of `rep`. Does not adopt a reference on `rep`.
350   // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
351   // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
352   // If `rep` is a substring of a flat or external node, then this method will
353   // return a new substring of that flat or external node with `pos` adjusted
354   // with the original `start` position.
355   static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
356 };
357 
358 // Type for function pointer that will invoke the releaser function and also
359 // delete the `CordRepExternalImpl` corresponding to the passed in
360 // `CordRepExternal`.
361 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
362 
363 // External CordReps are allocated together with a type erased releaser. The
364 // releaser is stored in the memory directly following the CordRepExternal.
365 struct CordRepExternal : public CordRep {
366   CordRepExternal() = default;
CordRepExternalCordRepExternal367   explicit constexpr CordRepExternal(absl::string_view str)
368       : CordRep(RefcountAndFlags::Immortal{}, str.size()),
369         base(str.data()),
370         releaser_invoker(nullptr) {}
371 
372   const char* base;
373   // Pointer to function that knows how to call and destroy the releaser.
374   ExternalReleaserInvoker releaser_invoker;
375 
376   // Deletes (releases) the external rep.
377   // Requires rep != nullptr and rep->IsExternal()
378   static void Delete(CordRep* rep);
379 };
380 
381 struct Rank1 {};
382 struct Rank0 : Rank1 {};
383 
384 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
385                                  Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)386 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
387   ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
388 }
389 
390 template <typename Releaser,
391           typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)392 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
393   ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
394 }
395 
396 // We use CompressedTuple so that we can benefit from EBCO.
397 template <typename Releaser>
398 struct CordRepExternalImpl
399     : public CordRepExternal,
400       public ::absl::container_internal::CompressedTuple<Releaser> {
401   // The extra int arg is so that we can avoid interfering with copy/move
402   // constructors while still benefitting from perfect forwarding.
403   template <typename T>
CordRepExternalImplCordRepExternalImpl404   CordRepExternalImpl(T&& releaser, int)
405       : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
406     this->releaser_invoker = &Release;
407   }
408 
~CordRepExternalImplCordRepExternalImpl409   ~CordRepExternalImpl() {
410     InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
411                    absl::string_view(base, length));
412   }
413 
ReleaseCordRepExternalImpl414   static void Release(CordRepExternal* rep) {
415     delete static_cast<CordRepExternalImpl*>(rep);
416   }
417 };
418 
Create(CordRep * child,size_t pos,size_t n)419 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
420                                                   size_t n) {
421   assert(child != nullptr);
422   assert(n > 0);
423   assert(n < child->length);
424   assert(pos < child->length);
425   assert(n <= child->length - pos);
426 
427   // TODO(b/217376272): Harden internal logic.
428   // Move to strategical places inside the Cord logic and make this an assert.
429   if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
430     LogFatalNodeType(child);
431   }
432 
433   CordRepSubstring* rep = new CordRepSubstring();
434   rep->length = n;
435   rep->tag = SUBSTRING;
436   rep->start = pos;
437   rep->child = child;
438   return rep;
439 }
440 
Substring(CordRep * rep,size_t pos,size_t n)441 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
442                                             size_t n) {
443   assert(rep != nullptr);
444   assert(n != 0);
445   assert(pos < rep->length);
446   assert(n <= rep->length - pos);
447   if (n == rep->length) return CordRep::Ref(rep);
448   if (rep->IsSubstring()) {
449     pos += rep->substring()->start;
450     rep = rep->substring()->child;
451   }
452   CordRepSubstring* substr = new CordRepSubstring();
453   substr->length = n;
454   substr->tag = SUBSTRING;
455   substr->start = pos;
456   substr->child = CordRep::Ref(rep);
457   return substr;
458 }
459 
Delete(CordRep * rep)460 inline void CordRepExternal::Delete(CordRep* rep) {
461   assert(rep != nullptr && rep->IsExternal());
462   auto* rep_external = static_cast<CordRepExternal*>(rep);
463   assert(rep_external->releaser_invoker != nullptr);
464   rep_external->releaser_invoker(rep_external);
465 }
466 
467 template <typename Str>
468 struct ConstInitExternalStorage {
469   ABSL_CONST_INIT static CordRepExternal value;
470 };
471 
472 template <typename Str>
473 ABSL_CONST_INIT CordRepExternal
474     ConstInitExternalStorage<Str>::value(Str::value);
475 
476 enum {
477   kMaxInline = 15,
478 };
479 
GetOrNull(absl::string_view data,size_t pos)480 constexpr char GetOrNull(absl::string_view data, size_t pos) {
481   return pos < data.size() ? data[pos] : '\0';
482 }
483 
484 // We store cordz_info as 64 bit pointer value in little endian format. This
485 // guarantees that the least significant byte of cordz_info matches the first
486 // byte of the inline data representation in `data`, which holds the inlined
487 // size or the 'is_tree' bit.
488 using cordz_info_t = int64_t;
489 
490 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
491 // of `data` and can hold a pointer value.
492 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
493 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
494 
495 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
496 // a little endian value where the first byte in the host's representation
497 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)498 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
499 #if defined(ABSL_IS_BIG_ENDIAN)
500   return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
501 #else
502   return value;
503 #endif
504 }
505 
506 class InlineData {
507  public:
508   // DefaultInitType forces the use of the default initialization constructor.
509   enum DefaultInitType { kDefaultInit };
510 
511   // kNullCordzInfo holds the little endian representation of intptr_t(1)
512   // This is the 'null' / initial value of 'cordz_info'. The null value
513   // is specifically big endian 1 as with 64-bit pointers, the last
514   // byte of cordz_info overlaps with the last byte holding the tag.
515   static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
516 
517   // kTagOffset contains the offset of the control byte / tag. This constant is
518   // intended mostly for debugging purposes: do not remove this constant as it
519   // is actively inspected and used by gdb pretty printing code.
520   static constexpr size_t kTagOffset = 0;
521 
522   // Implement `~InlineData()` conditionally: we only need this destructor to
523   // unpoison poisoned instances under *SAN, and it will only compile correctly
524   // if the current compiler supports `absl::is_constant_evaluated()`.
525 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()526   ~InlineData() noexcept { unpoison(); }
527 #endif
528 
InlineData()529   constexpr InlineData() noexcept { poison_this(); }
530 
InlineData(DefaultInitType)531   explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
532     poison_this();
533   }
534 
InlineData(CordRep * rep)535   explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
536     ABSL_ASSERT(rep != nullptr);
537   }
538 
539   // Explicit constexpr constructor to create a constexpr InlineData
540   // value. Creates an inlined SSO value if `rep` is null, otherwise
541   // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)542   constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
543       : rep_(rep ? Rep(rep) : Rep(sv)) {
544     poison();
545   }
546 
547   constexpr InlineData(const InlineData& rhs) noexcept;
548   InlineData& operator=(const InlineData& rhs) noexcept;
549 
550   friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
551 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
552     const Rep l = lhs.rep_.SanitizerSafeCopy();
553     const Rep r = rhs.rep_.SanitizerSafeCopy();
554     return memcmp(&l, &r, sizeof(l)) == 0;
555 #else
556     return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
557 #endif
558   }
559   friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
560     return !operator==(lhs, rhs);
561   }
562 
563   // Poisons the unused inlined SSO data if the current instance
564   // is inlined, else un-poisons the entire instance.
565   constexpr void poison();
566 
567   // Un-poisons this instance.
568   constexpr void unpoison();
569 
570   // Poisons the current instance. This is used on default initialization.
571   constexpr void poison_this();
572 
573   // Returns true if the current instance is empty.
574   // The 'empty value' is an inlined data value of zero length.
is_empty()575   bool is_empty() const { return rep_.tag() == 0; }
576 
577   // Returns true if the current instance holds a tree value.
is_tree()578   bool is_tree() const { return (rep_.tag() & 1) != 0; }
579 
580   // Returns true if the current instance holds a cordz_info value.
581   // Requires the current instance to hold a tree value.
is_profiled()582   bool is_profiled() const {
583     assert(is_tree());
584     return rep_.cordz_info() != kNullCordzInfo;
585   }
586 
587   // Returns true if either of the provided instances hold a cordz_info value.
588   // This method is more efficient than the equivalent `data1.is_profiled() ||
589   // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)590   static bool is_either_profiled(const InlineData& data1,
591                                  const InlineData& data2) {
592     assert(data1.is_tree() && data2.is_tree());
593     return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
594            kNullCordzInfo;
595   }
596 
597   // Returns the cordz_info sampling instance for this instance, or nullptr
598   // if the current instance is not sampled and does not have CordzInfo data.
599   // Requires the current instance to hold a tree value.
cordz_info()600   CordzInfo* cordz_info() const {
601     assert(is_tree());
602     intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
603         static_cast<uint64_t>(rep_.cordz_info())));
604     assert(info & 1);
605     return reinterpret_cast<CordzInfo*>(info - 1);
606   }
607 
608   // Sets the current cordz_info sampling instance for this instance, or nullptr
609   // if the current instance is not sampled and does not have CordzInfo data.
610   // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)611   void set_cordz_info(CordzInfo* cordz_info) {
612     assert(is_tree());
613     uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
614     rep_.set_cordz_info(
615         static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
616   }
617 
618   // Resets the current cordz_info to null / empty.
clear_cordz_info()619   void clear_cordz_info() {
620     assert(is_tree());
621     rep_.set_cordz_info(kNullCordzInfo);
622   }
623 
624   // Returns a read only pointer to the character data inside this instance.
625   // Requires the current instance to hold inline data.
as_chars()626   const char* as_chars() const {
627     assert(!is_tree());
628     return rep_.as_chars();
629   }
630 
631   // Returns a mutable pointer to the character data inside this instance.
632   // Should be used for 'write only' operations setting an inlined value.
633   // Applications can set the value of inlined data either before or after
634   // setting the inlined size, i.e., both of the below are valid:
635   //
636   //   // Set inlined data and inline size
637   //   memcpy(data_.as_chars(), data, size);
638   //   data_.set_inline_size(size);
639   //
640   //   // Set inlined size and inline data
641   //   data_.set_inline_size(size);
642   //   memcpy(data_.as_chars(), data, size);
643   //
644   // It's an error to read from the returned pointer without a preceding write
645   // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()646   char* as_chars() { return rep_.as_chars(); }
647 
648   // Returns the tree value of this value.
649   // Requires the current instance to hold a tree value.
as_tree()650   CordRep* as_tree() const {
651     assert(is_tree());
652     return rep_.tree();
653   }
654 
set_inline_data(const char * data,size_t n)655   void set_inline_data(const char* data, size_t n) {
656     ABSL_ASSERT(n <= kMaxInline);
657     unpoison();
658     rep_.set_tag(static_cast<int8_t>(n << 1));
659     SmallMemmove<true>(rep_.as_chars(), data, n);
660     poison();
661   }
662 
copy_max_inline_to(char * dst)663   void copy_max_inline_to(char* dst) const {
664     assert(!is_tree());
665     memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
666   }
667 
668   // Initialize this instance to holding the tree value `rep`,
669   // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)670   void make_tree(CordRep* rep) {
671     unpoison();
672     rep_.make_tree(rep);
673   }
674 
675   // Set the tree value of this instance to 'rep`.
676   // Requires the current instance to already hold a tree value.
677   // Does not affect the value of cordz_info.
set_tree(CordRep * rep)678   void set_tree(CordRep* rep) {
679     assert(is_tree());
680     rep_.set_tree(rep);
681   }
682 
683   // Returns the size of the inlined character data inside this instance.
684   // Requires the current instance to hold inline data.
inline_size()685   size_t inline_size() const { return rep_.inline_size(); }
686 
687   // Sets the size of the inlined character data inside this instance.
688   // Requires `size` to be <= kMaxInline.
689   // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)690   void set_inline_size(size_t size) {
691     unpoison();
692     rep_.set_inline_size(size);
693     poison();
694   }
695 
696   // Compares 'this' inlined data  with rhs. The comparison is a straightforward
697   // lexicographic comparison. `Compare()` returns values as follows:
698   //
699   //   -1  'this' InlineData instance is smaller
700   //    0  the InlineData instances are equal
701   //    1  'this' InlineData instance larger
Compare(const InlineData & rhs)702   int Compare(const InlineData& rhs) const {
703     return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
704   }
705 
706  private:
707   struct Rep {
708     // See cordz_info_t for forced alignment and size of `cordz_info` details.
709     struct AsTree {
AsTreeRep::AsTree710       explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
711           : rep(tree) {}
712       cordz_info_t cordz_info = kNullCordzInfo;
713       absl::cord_internal::CordRep* rep;
714     };
715 
RepRep716     explicit Rep(DefaultInitType) {}
RepRep717     constexpr Rep() : data{0} {}
718     constexpr Rep(const Rep&) = default;
719     constexpr Rep& operator=(const Rep&) = default;
720 
RepRep721     explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
722 
RepRep723     explicit constexpr Rep(absl::string_view chars)
724         : data{static_cast<char>((chars.size() << 1)),
725                GetOrNull(chars, 0),
726                GetOrNull(chars, 1),
727                GetOrNull(chars, 2),
728                GetOrNull(chars, 3),
729                GetOrNull(chars, 4),
730                GetOrNull(chars, 5),
731                GetOrNull(chars, 6),
732                GetOrNull(chars, 7),
733                GetOrNull(chars, 8),
734                GetOrNull(chars, 9),
735                GetOrNull(chars, 10),
736                GetOrNull(chars, 11),
737                GetOrNull(chars, 12),
738                GetOrNull(chars, 13),
739                GetOrNull(chars, 14)} {}
740 
741     // Disable sanitizer as we must always be able to read `tag`.
742     ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep743     int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep744     void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
745 
as_charsRep746     char* as_chars() { return data + 1; }
as_charsRep747     const char* as_chars() const { return data + 1; }
748 
is_treeRep749     bool is_tree() const { return (tag() & 1) != 0; }
750 
inline_sizeRep751     size_t inline_size() const {
752       ABSL_ASSERT(!is_tree());
753       return static_cast<size_t>(tag()) >> 1;
754     }
755 
set_inline_sizeRep756     void set_inline_size(size_t size) {
757       ABSL_ASSERT(size <= kMaxInline);
758       set_tag(static_cast<int8_t>(size << 1));
759     }
760 
treeRep761     CordRep* tree() const { return as_tree.rep; }
set_treeRep762     void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
763 
cordz_infoRep764     cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep765     void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
766 
make_treeRep767     void make_tree(CordRep* tree) {
768       as_tree.rep = tree;
769       as_tree.cordz_info = kNullCordzInfo;
770     }
771 
772 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep773     constexpr Rep SanitizerSafeCopy() const {
774       if (!absl::is_constant_evaluated()) {
775         Rep res;
776         if (is_tree()) {
777           res = *this;
778         } else {
779           res.set_tag(tag());
780           memcpy(res.as_chars(), as_chars(), inline_size());
781         }
782         return res;
783       } else {
784         return *this;
785       }
786     }
787 #else
SanitizerSafeCopyRep788     constexpr const Rep& SanitizerSafeCopy() const { return *this; }
789 #endif
790 
791     // If the data has length <= kMaxInline, we store it in `data`, and
792     // store the size in the first char of `data` shifted left + 1.
793     // Else we store it in a tree and store a pointer to that tree in
794     // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
795     union {
796       char data[kMaxInline + 1];
797       AsTree as_tree;
798     };
799   };
800 
801   // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)802   static inline int Compare(const Rep& lhs, const Rep& rhs) {
803     uint64_t x, y;
804     memcpy(&x, lhs.as_chars(), sizeof(x));
805     memcpy(&y, rhs.as_chars(), sizeof(y));
806     if (x == y) {
807       memcpy(&x, lhs.as_chars() + 7, sizeof(x));
808       memcpy(&y, rhs.as_chars() + 7, sizeof(y));
809       if (x == y) {
810         if (lhs.inline_size() == rhs.inline_size()) return 0;
811         return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
812       }
813     }
814     x = absl::big_endian::FromHost64(x);
815     y = absl::big_endian::FromHost64(y);
816     return x < y ? -1 : 1;
817   }
818 
819   Rep rep_;
820 };
821 
822 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
823 
824 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
825 
InlineData(const InlineData & rhs)826 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
827     : rep_(rhs.rep_.SanitizerSafeCopy()) {
828   poison();
829 }
830 
831 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
832   unpoison();
833   rep_ = rhs.rep_.SanitizerSafeCopy();
834   poison();
835   return *this;
836 }
837 
poison_this()838 constexpr void InlineData::poison_this() {
839   if (!absl::is_constant_evaluated()) {
840     container_internal::SanitizerPoisonObject(this);
841   }
842 }
843 
unpoison()844 constexpr void InlineData::unpoison() {
845   if (!absl::is_constant_evaluated()) {
846     container_internal::SanitizerUnpoisonObject(this);
847   }
848 }
849 
poison()850 constexpr void InlineData::poison() {
851   if (!absl::is_constant_evaluated()) {
852     if (is_tree()) {
853       container_internal::SanitizerUnpoisonObject(this);
854     } else if (const size_t size = inline_size()) {
855       if (size < kMaxInline) {
856         const char* end = rep_.as_chars() + size;
857         container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
858       }
859     } else {
860       container_internal::SanitizerPoisonObject(this);
861     }
862   }
863 }
864 
865 #else  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
866 
867 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
868 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
869 
poison_this()870 constexpr void InlineData::poison_this() {}
unpoison()871 constexpr void InlineData::unpoison() {}
poison()872 constexpr void InlineData::poison() {}
873 
874 #endif  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
875 
substring()876 inline CordRepSubstring* CordRep::substring() {
877   assert(IsSubstring());
878   return static_cast<CordRepSubstring*>(this);
879 }
880 
substring()881 inline const CordRepSubstring* CordRep::substring() const {
882   assert(IsSubstring());
883   return static_cast<const CordRepSubstring*>(this);
884 }
885 
external()886 inline CordRepExternal* CordRep::external() {
887   assert(IsExternal());
888   return static_cast<CordRepExternal*>(this);
889 }
890 
external()891 inline const CordRepExternal* CordRep::external() const {
892   assert(IsExternal());
893   return static_cast<const CordRepExternal*>(this);
894 }
895 
Ref(CordRep * rep)896 inline CordRep* CordRep::Ref(CordRep* rep) {
897   // ABSL_ASSUME is a workaround for
898   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
899   ABSL_ASSUME(rep != nullptr);
900   rep->refcount.Increment();
901   return rep;
902 }
903 
Unref(CordRep * rep)904 inline void CordRep::Unref(CordRep* rep) {
905   assert(rep != nullptr);
906   // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
907   // typically outweigh the cost of an extra branch checking for ref == 1.
908   if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
909     Destroy(rep);
910   }
911 }
912 
913 }  // namespace cord_internal
914 
915 ABSL_NAMESPACE_END
916 }  // namespace absl
917 #endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
918