1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36 (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37 defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42 ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte. Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepRing;
59 class CordRepBtree;
60
61 class CordzInfo;
62
63 // Default feature enable states for cord ring buffers
64 enum CordFeatureDefaults {
65 kCordEnableRingBufferDefault = false,
66 kCordShallowSubcordsDefault = false
67 };
68
69 extern std::atomic<bool> cord_ring_buffer_enabled;
70 extern std::atomic<bool> shallow_subcords_enabled;
71
enable_cord_ring_buffer(bool enable)72 inline void enable_cord_ring_buffer(bool enable) {
73 cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
74 }
75
enable_shallow_subcords(bool enable)76 inline void enable_shallow_subcords(bool enable) {
77 shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
78 }
79
80 enum Constants {
81 // The inlined size to use with absl::InlinedVector.
82 //
83 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
84 // the same value for their inlined size. The fact that they do is historical.
85 // It may be desirable for each to use a different inlined size optimized for
86 // that InlinedVector's usage.
87 //
88 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
89 // the inlined vector size (47 exists for backward compatibility).
90 kInlinedVectorSize = 47,
91
92 // Prefer copying blocks of at most this size, otherwise reference count.
93 kMaxBytesToCopy = 511
94 };
95
96 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
97 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
98
99 // Fast implementation of memmove for up to 15 bytes. This implementation is
100 // safe for overlapping regions. If nullify_tail is true, the destination is
101 // padded with '\0' up to 15 bytes.
102 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)103 inline void SmallMemmove(char* dst, const char* src, size_t n) {
104 if (n >= 8) {
105 assert(n <= 15);
106 uint64_t buf1;
107 uint64_t buf2;
108 memcpy(&buf1, src, 8);
109 memcpy(&buf2, src + n - 8, 8);
110 if (nullify_tail) {
111 memset(dst + 7, 0, 8);
112 }
113 memcpy(dst, &buf1, 8);
114 memcpy(dst + n - 8, &buf2, 8);
115 } else if (n >= 4) {
116 uint32_t buf1;
117 uint32_t buf2;
118 memcpy(&buf1, src, 4);
119 memcpy(&buf2, src + n - 4, 4);
120 if (nullify_tail) {
121 memset(dst + 4, 0, 4);
122 memset(dst + 7, 0, 8);
123 }
124 memcpy(dst, &buf1, 4);
125 memcpy(dst + n - 4, &buf2, 4);
126 } else {
127 if (n != 0) {
128 dst[0] = src[0];
129 dst[n / 2] = src[n / 2];
130 dst[n - 1] = src[n - 1];
131 }
132 if (nullify_tail) {
133 memset(dst + 7, 0, 8);
134 memset(dst + n, 0, 8);
135 }
136 }
137 }
138
139 // Compact class for tracking the reference count and state flags for CordRep
140 // instances. Data is stored in an atomic int32_t for compactness and speed.
141 class RefcountAndFlags {
142 public:
RefcountAndFlags()143 constexpr RefcountAndFlags() : count_{kRefIncrement} {}
144 struct Immortal {};
RefcountAndFlags(Immortal)145 explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
146
147 // Increments the reference count. Imposes no memory ordering.
Increment()148 inline void Increment() {
149 count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
150 }
151
152 // Asserts that the current refcount is greater than 0. If the refcount is
153 // greater than 1, decrements the reference count.
154 //
155 // Returns false if there are no references outstanding; true otherwise.
156 // Inserts barriers to ensure that state written before this method returns
157 // false will be visible to a thread that just observed this method returning
158 // false. Always returns false when the immortal bit is set.
Decrement()159 inline bool Decrement() {
160 int32_t refcount = count_.load(std::memory_order_acquire);
161 assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
162 return refcount != kRefIncrement &&
163 (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
164 kHighRefcountMask) != 0;
165 }
166
167 // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()168 inline bool DecrementExpectHighRefcount() {
169 int32_t refcount =
170 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
171 assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
172 return (refcount & kHighRefcountMask) != 0;
173 }
174
175 // Returns the current reference count using acquire semantics.
Get()176 inline size_t Get() const {
177 return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
178 kNumFlags);
179 }
180
181 // Returns whether the atomic integer is 1.
182 // If the reference count is used in the conventional way, a
183 // reference count of 1 implies that the current thread owns the
184 // reference and no other thread shares it.
185 // This call performs the test for a reference count of one, and
186 // performs the memory barrier needed for the owning thread
187 // to act on the object, knowing that it has exclusive access to the
188 // object. Always returns false when the immortal bit is set.
IsOne()189 inline bool IsOne() {
190 return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
191 kRefIncrement;
192 }
193
IsImmortal()194 bool IsImmortal() const {
195 return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
196 }
197
198 private:
199 // We reserve the bottom bits for flags.
200 // kImmortalBit indicates that this entity should never be collected; it is
201 // used for the StringConstant constructor to avoid collecting immutable
202 // constant cords.
203 // kReservedFlag is reserved for future use.
204 enum Flags {
205 kNumFlags = 2,
206
207 kImmortalFlag = 0x1,
208 kReservedFlag = 0x2,
209 kRefIncrement = (1 << kNumFlags),
210
211 // Bitmask to use when checking refcount by equality. This masks out
212 // all flags except kImmortalFlag, which is part of the refcount for
213 // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
214 // if the immortal bit is set.)
215 kRefcountMask = ~kReservedFlag,
216
217 // Bitmask to use when checking if refcount is equal to 1 and not
218 // immortal when decrementing the refcount. This masks out kRefIncrement and
219 // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we
220 // assume the refcount is equal to 1, since we know it's not immortal and
221 // not greater than 1. If the masked RefcountAndFlags is not 0, we can
222 // assume the refcount is not equal to 1 since either a higher bit in the
223 // refcount is set, or kImmortal is set.
224 kHighRefcountMask = kRefcountMask & ~kRefIncrement,
225 };
226
227 std::atomic<int32_t> count_;
228 };
229
230 // Various representations that we allow
231 enum CordRepKind {
232 UNUSED_0 = 0,
233 SUBSTRING = 1,
234 CRC = 2,
235 BTREE = 3,
236 RING = 4,
237 EXTERNAL = 5,
238
239 // We have different tags for different sized flat arrays,
240 // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
241 // allocated range of 32 bytes to 256 KB. The current granularity is:
242 // - 8 byte granularity for flat sizes in [32 - 512]
243 // - 64 byte granularity for flat sizes in (512 - 8KiB]
244 // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
245 // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
246 // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
247 // represents the minimum flat allocation size. (32 bytes as of now).
248 FLAT = 6,
249 MAX_FLAT_TAG = 248
250 };
251
252 // There are various locations where we want to check if some rep is a 'plain'
253 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
254 // can perform this check in a single branch as 'tag >= EXTERNAL'
255 // Likewise, we have some locations where we check for 'ring or external/flat',
256 // so likewise align RING to EXTERNAL.
257 // Note that we can leave this optimization to the compiler. The compiler will
258 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
259 static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
260 static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
261 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
262
263 struct CordRep {
264 // Result from an `extract edge` operation. Contains the (possibly changed)
265 // tree node as well as the extracted edge, or {tree, nullptr} if no edge
266 // could be extracted.
267 // On success, the returned `tree` value is null if `extracted` was the only
268 // data edge inside the tree, a data edge if there were only two data edges in
269 // the tree, or the (possibly new / smaller) remaining tree with the extracted
270 // data edge removed.
271 struct ExtractResult {
272 CordRep* tree;
273 CordRep* extracted;
274 };
275
276 CordRep() = default;
CordRepCordRep277 constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
278 : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
279
280 // The following three fields have to be less than 32 bytes since
281 // that is the smallest supported flat node size. Some code optimizations rely
282 // on the specific layout of these fields. Notably: the non-trivial field
283 // `refcount` being preceded by `length`, and being tailed by POD data
284 // members only.
285 // # LINT.IfChange
286 size_t length;
287 RefcountAndFlags refcount;
288 // If tag < FLAT, it represents CordRepKind and indicates the type of node.
289 // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
290 uint8_t tag;
291
292 // `storage` provides two main purposes:
293 // - the starting point for FlatCordRep.Data() [flexible-array-member]
294 // - 3 bytes of additional storage for use by derived classes.
295 // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
296 // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
297 // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
298 // allocate room for these in the derived class, as not all compilers reuse
299 // padding space from the base class (clang and gcc do, MSVC does not, etc)
300 uint8_t storage[3];
301 // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
302
303 // Returns true if this instance's tag matches the requested type.
IsRingCordRep304 constexpr bool IsRing() const { return tag == RING; }
IsSubstringCordRep305 constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep306 constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep307 constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep308 constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep309 constexpr bool IsBtree() const { return tag == BTREE; }
310
311 inline CordRepRing* ring();
312 inline const CordRepRing* ring() const;
313 inline CordRepSubstring* substring();
314 inline const CordRepSubstring* substring() const;
315 inline CordRepCrc* crc();
316 inline const CordRepCrc* crc() const;
317 inline CordRepExternal* external();
318 inline const CordRepExternal* external() const;
319 inline CordRepFlat* flat();
320 inline const CordRepFlat* flat() const;
321 inline CordRepBtree* btree();
322 inline const CordRepBtree* btree() const;
323
324 // --------------------------------------------------------------------
325 // Memory management
326
327 // Destroys the provided `rep`.
328 static void Destroy(CordRep* rep);
329
330 // Increments the reference count of `rep`.
331 // Requires `rep` to be a non-null pointer value.
332 static inline CordRep* Ref(CordRep* rep);
333
334 // Decrements the reference count of `rep`. Destroys rep if count reaches
335 // zero. Requires `rep` to be a non-null pointer value.
336 static inline void Unref(CordRep* rep);
337 };
338
339 struct CordRepSubstring : public CordRep {
340 size_t start; // Starting offset of substring in child
341 CordRep* child;
342
343 // Creates a substring on `child`, adopting a reference on `child`.
344 // Requires `child` to be either a flat or external node, and `pos` and `n` to
345 // form a non-empty partial sub range of `'child`, i.e.:
346 // `n > 0 && n < length && n + pos <= length`
347 static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
348
349 // Creates a substring of `rep`. Does not adopt a reference on `rep`.
350 // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
351 // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
352 // If `rep` is a substring of a flat or external node, then this method will
353 // return a new substring of that flat or external node with `pos` adjusted
354 // with the original `start` position.
355 static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
356 };
357
358 // Type for function pointer that will invoke the releaser function and also
359 // delete the `CordRepExternalImpl` corresponding to the passed in
360 // `CordRepExternal`.
361 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
362
363 // External CordReps are allocated together with a type erased releaser. The
364 // releaser is stored in the memory directly following the CordRepExternal.
365 struct CordRepExternal : public CordRep {
366 CordRepExternal() = default;
CordRepExternalCordRepExternal367 explicit constexpr CordRepExternal(absl::string_view str)
368 : CordRep(RefcountAndFlags::Immortal{}, str.size()),
369 base(str.data()),
370 releaser_invoker(nullptr) {}
371
372 const char* base;
373 // Pointer to function that knows how to call and destroy the releaser.
374 ExternalReleaserInvoker releaser_invoker;
375
376 // Deletes (releases) the external rep.
377 // Requires rep != nullptr and rep->IsExternal()
378 static void Delete(CordRep* rep);
379 };
380
381 struct Rank1 {};
382 struct Rank0 : Rank1 {};
383
384 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
385 Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)386 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
387 ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
388 }
389
390 template <typename Releaser,
391 typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)392 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
393 ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
394 }
395
396 // We use CompressedTuple so that we can benefit from EBCO.
397 template <typename Releaser>
398 struct CordRepExternalImpl
399 : public CordRepExternal,
400 public ::absl::container_internal::CompressedTuple<Releaser> {
401 // The extra int arg is so that we can avoid interfering with copy/move
402 // constructors while still benefitting from perfect forwarding.
403 template <typename T>
CordRepExternalImplCordRepExternalImpl404 CordRepExternalImpl(T&& releaser, int)
405 : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
406 this->releaser_invoker = &Release;
407 }
408
~CordRepExternalImplCordRepExternalImpl409 ~CordRepExternalImpl() {
410 InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
411 absl::string_view(base, length));
412 }
413
ReleaseCordRepExternalImpl414 static void Release(CordRepExternal* rep) {
415 delete static_cast<CordRepExternalImpl*>(rep);
416 }
417 };
418
Create(CordRep * child,size_t pos,size_t n)419 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
420 size_t n) {
421 assert(child != nullptr);
422 assert(n > 0);
423 assert(n < child->length);
424 assert(pos < child->length);
425 assert(n <= child->length - pos);
426
427 // TODO(b/217376272): Harden internal logic.
428 // Move to strategical places inside the Cord logic and make this an assert.
429 if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
430 LogFatalNodeType(child);
431 }
432
433 CordRepSubstring* rep = new CordRepSubstring();
434 rep->length = n;
435 rep->tag = SUBSTRING;
436 rep->start = pos;
437 rep->child = child;
438 return rep;
439 }
440
Substring(CordRep * rep,size_t pos,size_t n)441 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
442 size_t n) {
443 assert(rep != nullptr);
444 assert(n != 0);
445 assert(pos < rep->length);
446 assert(n <= rep->length - pos);
447 if (n == rep->length) return CordRep::Ref(rep);
448 if (rep->IsSubstring()) {
449 pos += rep->substring()->start;
450 rep = rep->substring()->child;
451 }
452 CordRepSubstring* substr = new CordRepSubstring();
453 substr->length = n;
454 substr->tag = SUBSTRING;
455 substr->start = pos;
456 substr->child = CordRep::Ref(rep);
457 return substr;
458 }
459
Delete(CordRep * rep)460 inline void CordRepExternal::Delete(CordRep* rep) {
461 assert(rep != nullptr && rep->IsExternal());
462 auto* rep_external = static_cast<CordRepExternal*>(rep);
463 assert(rep_external->releaser_invoker != nullptr);
464 rep_external->releaser_invoker(rep_external);
465 }
466
467 template <typename Str>
468 struct ConstInitExternalStorage {
469 ABSL_CONST_INIT static CordRepExternal value;
470 };
471
472 template <typename Str>
473 ABSL_CONST_INIT CordRepExternal
474 ConstInitExternalStorage<Str>::value(Str::value);
475
476 enum {
477 kMaxInline = 15,
478 };
479
GetOrNull(absl::string_view data,size_t pos)480 constexpr char GetOrNull(absl::string_view data, size_t pos) {
481 return pos < data.size() ? data[pos] : '\0';
482 }
483
484 // We store cordz_info as 64 bit pointer value in little endian format. This
485 // guarantees that the least significant byte of cordz_info matches the first
486 // byte of the inline data representation in `data`, which holds the inlined
487 // size or the 'is_tree' bit.
488 using cordz_info_t = int64_t;
489
490 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
491 // of `data` and can hold a pointer value.
492 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
493 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
494
495 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
496 // a little endian value where the first byte in the host's representation
497 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)498 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
499 #if defined(ABSL_IS_BIG_ENDIAN)
500 return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
501 #else
502 return value;
503 #endif
504 }
505
506 class InlineData {
507 public:
508 // DefaultInitType forces the use of the default initialization constructor.
509 enum DefaultInitType { kDefaultInit };
510
511 // kNullCordzInfo holds the little endian representation of intptr_t(1)
512 // This is the 'null' / initial value of 'cordz_info'. The null value
513 // is specifically big endian 1 as with 64-bit pointers, the last
514 // byte of cordz_info overlaps with the last byte holding the tag.
515 static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
516
517 // kTagOffset contains the offset of the control byte / tag. This constant is
518 // intended mostly for debugging purposes: do not remove this constant as it
519 // is actively inspected and used by gdb pretty printing code.
520 static constexpr size_t kTagOffset = 0;
521
522 // Implement `~InlineData()` conditionally: we only need this destructor to
523 // unpoison poisoned instances under *SAN, and it will only compile correctly
524 // if the current compiler supports `absl::is_constant_evaluated()`.
525 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()526 ~InlineData() noexcept { unpoison(); }
527 #endif
528
InlineData()529 constexpr InlineData() noexcept { poison_this(); }
530
InlineData(DefaultInitType)531 explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
532 poison_this();
533 }
534
InlineData(CordRep * rep)535 explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
536 ABSL_ASSERT(rep != nullptr);
537 }
538
539 // Explicit constexpr constructor to create a constexpr InlineData
540 // value. Creates an inlined SSO value if `rep` is null, otherwise
541 // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)542 constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
543 : rep_(rep ? Rep(rep) : Rep(sv)) {
544 poison();
545 }
546
547 constexpr InlineData(const InlineData& rhs) noexcept;
548 InlineData& operator=(const InlineData& rhs) noexcept;
549
550 friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
551 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
552 const Rep l = lhs.rep_.SanitizerSafeCopy();
553 const Rep r = rhs.rep_.SanitizerSafeCopy();
554 return memcmp(&l, &r, sizeof(l)) == 0;
555 #else
556 return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
557 #endif
558 }
559 friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
560 return !operator==(lhs, rhs);
561 }
562
563 // Poisons the unused inlined SSO data if the current instance
564 // is inlined, else un-poisons the entire instance.
565 constexpr void poison();
566
567 // Un-poisons this instance.
568 constexpr void unpoison();
569
570 // Poisons the current instance. This is used on default initialization.
571 constexpr void poison_this();
572
573 // Returns true if the current instance is empty.
574 // The 'empty value' is an inlined data value of zero length.
is_empty()575 bool is_empty() const { return rep_.tag() == 0; }
576
577 // Returns true if the current instance holds a tree value.
is_tree()578 bool is_tree() const { return (rep_.tag() & 1) != 0; }
579
580 // Returns true if the current instance holds a cordz_info value.
581 // Requires the current instance to hold a tree value.
is_profiled()582 bool is_profiled() const {
583 assert(is_tree());
584 return rep_.cordz_info() != kNullCordzInfo;
585 }
586
587 // Returns true if either of the provided instances hold a cordz_info value.
588 // This method is more efficient than the equivalent `data1.is_profiled() ||
589 // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)590 static bool is_either_profiled(const InlineData& data1,
591 const InlineData& data2) {
592 assert(data1.is_tree() && data2.is_tree());
593 return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
594 kNullCordzInfo;
595 }
596
597 // Returns the cordz_info sampling instance for this instance, or nullptr
598 // if the current instance is not sampled and does not have CordzInfo data.
599 // Requires the current instance to hold a tree value.
cordz_info()600 CordzInfo* cordz_info() const {
601 assert(is_tree());
602 intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
603 static_cast<uint64_t>(rep_.cordz_info())));
604 assert(info & 1);
605 return reinterpret_cast<CordzInfo*>(info - 1);
606 }
607
608 // Sets the current cordz_info sampling instance for this instance, or nullptr
609 // if the current instance is not sampled and does not have CordzInfo data.
610 // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)611 void set_cordz_info(CordzInfo* cordz_info) {
612 assert(is_tree());
613 uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
614 rep_.set_cordz_info(
615 static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
616 }
617
618 // Resets the current cordz_info to null / empty.
clear_cordz_info()619 void clear_cordz_info() {
620 assert(is_tree());
621 rep_.set_cordz_info(kNullCordzInfo);
622 }
623
624 // Returns a read only pointer to the character data inside this instance.
625 // Requires the current instance to hold inline data.
as_chars()626 const char* as_chars() const {
627 assert(!is_tree());
628 return rep_.as_chars();
629 }
630
631 // Returns a mutable pointer to the character data inside this instance.
632 // Should be used for 'write only' operations setting an inlined value.
633 // Applications can set the value of inlined data either before or after
634 // setting the inlined size, i.e., both of the below are valid:
635 //
636 // // Set inlined data and inline size
637 // memcpy(data_.as_chars(), data, size);
638 // data_.set_inline_size(size);
639 //
640 // // Set inlined size and inline data
641 // data_.set_inline_size(size);
642 // memcpy(data_.as_chars(), data, size);
643 //
644 // It's an error to read from the returned pointer without a preceding write
645 // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()646 char* as_chars() { return rep_.as_chars(); }
647
648 // Returns the tree value of this value.
649 // Requires the current instance to hold a tree value.
as_tree()650 CordRep* as_tree() const {
651 assert(is_tree());
652 return rep_.tree();
653 }
654
set_inline_data(const char * data,size_t n)655 void set_inline_data(const char* data, size_t n) {
656 ABSL_ASSERT(n <= kMaxInline);
657 unpoison();
658 rep_.set_tag(static_cast<int8_t>(n << 1));
659 SmallMemmove<true>(rep_.as_chars(), data, n);
660 poison();
661 }
662
copy_max_inline_to(char * dst)663 void copy_max_inline_to(char* dst) const {
664 assert(!is_tree());
665 memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
666 }
667
668 // Initialize this instance to holding the tree value `rep`,
669 // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)670 void make_tree(CordRep* rep) {
671 unpoison();
672 rep_.make_tree(rep);
673 }
674
675 // Set the tree value of this instance to 'rep`.
676 // Requires the current instance to already hold a tree value.
677 // Does not affect the value of cordz_info.
set_tree(CordRep * rep)678 void set_tree(CordRep* rep) {
679 assert(is_tree());
680 rep_.set_tree(rep);
681 }
682
683 // Returns the size of the inlined character data inside this instance.
684 // Requires the current instance to hold inline data.
inline_size()685 size_t inline_size() const { return rep_.inline_size(); }
686
687 // Sets the size of the inlined character data inside this instance.
688 // Requires `size` to be <= kMaxInline.
689 // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)690 void set_inline_size(size_t size) {
691 unpoison();
692 rep_.set_inline_size(size);
693 poison();
694 }
695
696 // Compares 'this' inlined data with rhs. The comparison is a straightforward
697 // lexicographic comparison. `Compare()` returns values as follows:
698 //
699 // -1 'this' InlineData instance is smaller
700 // 0 the InlineData instances are equal
701 // 1 'this' InlineData instance larger
Compare(const InlineData & rhs)702 int Compare(const InlineData& rhs) const {
703 return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
704 }
705
706 private:
707 struct Rep {
708 // See cordz_info_t for forced alignment and size of `cordz_info` details.
709 struct AsTree {
AsTreeRep::AsTree710 explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
711 : rep(tree) {}
712 cordz_info_t cordz_info = kNullCordzInfo;
713 absl::cord_internal::CordRep* rep;
714 };
715
RepRep716 explicit Rep(DefaultInitType) {}
RepRep717 constexpr Rep() : data{0} {}
718 constexpr Rep(const Rep&) = default;
719 constexpr Rep& operator=(const Rep&) = default;
720
RepRep721 explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
722
RepRep723 explicit constexpr Rep(absl::string_view chars)
724 : data{static_cast<char>((chars.size() << 1)),
725 GetOrNull(chars, 0),
726 GetOrNull(chars, 1),
727 GetOrNull(chars, 2),
728 GetOrNull(chars, 3),
729 GetOrNull(chars, 4),
730 GetOrNull(chars, 5),
731 GetOrNull(chars, 6),
732 GetOrNull(chars, 7),
733 GetOrNull(chars, 8),
734 GetOrNull(chars, 9),
735 GetOrNull(chars, 10),
736 GetOrNull(chars, 11),
737 GetOrNull(chars, 12),
738 GetOrNull(chars, 13),
739 GetOrNull(chars, 14)} {}
740
741 // Disable sanitizer as we must always be able to read `tag`.
742 ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep743 int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep744 void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
745
as_charsRep746 char* as_chars() { return data + 1; }
as_charsRep747 const char* as_chars() const { return data + 1; }
748
is_treeRep749 bool is_tree() const { return (tag() & 1) != 0; }
750
inline_sizeRep751 size_t inline_size() const {
752 ABSL_ASSERT(!is_tree());
753 return static_cast<size_t>(tag()) >> 1;
754 }
755
set_inline_sizeRep756 void set_inline_size(size_t size) {
757 ABSL_ASSERT(size <= kMaxInline);
758 set_tag(static_cast<int8_t>(size << 1));
759 }
760
treeRep761 CordRep* tree() const { return as_tree.rep; }
set_treeRep762 void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
763
cordz_infoRep764 cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep765 void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
766
make_treeRep767 void make_tree(CordRep* tree) {
768 as_tree.rep = tree;
769 as_tree.cordz_info = kNullCordzInfo;
770 }
771
772 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep773 constexpr Rep SanitizerSafeCopy() const {
774 if (!absl::is_constant_evaluated()) {
775 Rep res;
776 if (is_tree()) {
777 res = *this;
778 } else {
779 res.set_tag(tag());
780 memcpy(res.as_chars(), as_chars(), inline_size());
781 }
782 return res;
783 } else {
784 return *this;
785 }
786 }
787 #else
SanitizerSafeCopyRep788 constexpr const Rep& SanitizerSafeCopy() const { return *this; }
789 #endif
790
791 // If the data has length <= kMaxInline, we store it in `data`, and
792 // store the size in the first char of `data` shifted left + 1.
793 // Else we store it in a tree and store a pointer to that tree in
794 // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
795 union {
796 char data[kMaxInline + 1];
797 AsTree as_tree;
798 };
799 };
800
801 // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)802 static inline int Compare(const Rep& lhs, const Rep& rhs) {
803 uint64_t x, y;
804 memcpy(&x, lhs.as_chars(), sizeof(x));
805 memcpy(&y, rhs.as_chars(), sizeof(y));
806 if (x == y) {
807 memcpy(&x, lhs.as_chars() + 7, sizeof(x));
808 memcpy(&y, rhs.as_chars() + 7, sizeof(y));
809 if (x == y) {
810 if (lhs.inline_size() == rhs.inline_size()) return 0;
811 return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
812 }
813 }
814 x = absl::big_endian::FromHost64(x);
815 y = absl::big_endian::FromHost64(y);
816 return x < y ? -1 : 1;
817 }
818
819 Rep rep_;
820 };
821
822 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
823
824 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
825
InlineData(const InlineData & rhs)826 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
827 : rep_(rhs.rep_.SanitizerSafeCopy()) {
828 poison();
829 }
830
831 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
832 unpoison();
833 rep_ = rhs.rep_.SanitizerSafeCopy();
834 poison();
835 return *this;
836 }
837
poison_this()838 constexpr void InlineData::poison_this() {
839 if (!absl::is_constant_evaluated()) {
840 container_internal::SanitizerPoisonObject(this);
841 }
842 }
843
unpoison()844 constexpr void InlineData::unpoison() {
845 if (!absl::is_constant_evaluated()) {
846 container_internal::SanitizerUnpoisonObject(this);
847 }
848 }
849
poison()850 constexpr void InlineData::poison() {
851 if (!absl::is_constant_evaluated()) {
852 if (is_tree()) {
853 container_internal::SanitizerUnpoisonObject(this);
854 } else if (const size_t size = inline_size()) {
855 if (size < kMaxInline) {
856 const char* end = rep_.as_chars() + size;
857 container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
858 }
859 } else {
860 container_internal::SanitizerPoisonObject(this);
861 }
862 }
863 }
864
865 #else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
866
867 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
868 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
869
poison_this()870 constexpr void InlineData::poison_this() {}
unpoison()871 constexpr void InlineData::unpoison() {}
poison()872 constexpr void InlineData::poison() {}
873
874 #endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
875
substring()876 inline CordRepSubstring* CordRep::substring() {
877 assert(IsSubstring());
878 return static_cast<CordRepSubstring*>(this);
879 }
880
substring()881 inline const CordRepSubstring* CordRep::substring() const {
882 assert(IsSubstring());
883 return static_cast<const CordRepSubstring*>(this);
884 }
885
external()886 inline CordRepExternal* CordRep::external() {
887 assert(IsExternal());
888 return static_cast<CordRepExternal*>(this);
889 }
890
external()891 inline const CordRepExternal* CordRep::external() const {
892 assert(IsExternal());
893 return static_cast<const CordRepExternal*>(this);
894 }
895
Ref(CordRep * rep)896 inline CordRep* CordRep::Ref(CordRep* rep) {
897 // ABSL_ASSUME is a workaround for
898 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
899 ABSL_ASSUME(rep != nullptr);
900 rep->refcount.Increment();
901 return rep;
902 }
903
Unref(CordRep * rep)904 inline void CordRep::Unref(CordRep* rep) {
905 assert(rep != nullptr);
906 // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
907 // typically outweigh the cost of an extra branch checking for ref == 1.
908 if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
909 Destroy(rep);
910 }
911 }
912
913 } // namespace cord_internal
914
915 ABSL_NAMESPACE_END
916 } // namespace absl
917 #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
918