1 // Copyright 2021 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
17
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <type_traits>
23
24 #include "absl/base/attributes.h"
25 #include "absl/base/config.h"
26 #include "absl/base/internal/endian.h"
27 #include "absl/base/internal/invoke.h"
28 #include "absl/base/optimization.h"
29 #include "absl/container/internal/compressed_tuple.h"
30 #include "absl/container/internal/container_memory.h"
31 #include "absl/meta/type_traits.h"
32 #include "absl/strings/string_view.h"
33
34 // We can only add poisoning if we can detect consteval executions.
35 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36 (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37 defined(ABSL_HAVE_MEMORY_SANITIZER))
38 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39 #endif
40
41 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
42 ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace cord_internal {
47
48 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
49 // using only a single byte to differentiate classes from each other - the "tag"
50 // byte. Define the subclasses first so we can provide downcasting helper
51 // functions in the base class.
52 struct CordRep;
53 struct CordRepConcat;
54 struct CordRepExternal;
55 struct CordRepFlat;
56 struct CordRepSubstring;
57 struct CordRepCrc;
58 class CordRepBtree;
59
60 class CordzInfo;
61
62 // Default feature enable states for cord ring buffers
63 enum CordFeatureDefaults { kCordShallowSubcordsDefault = false };
64
65 extern std::atomic<bool> shallow_subcords_enabled;
66
enable_shallow_subcords(bool enable)67 inline void enable_shallow_subcords(bool enable) {
68 shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
69 }
70
71 enum Constants {
72 // The inlined size to use with absl::InlinedVector.
73 //
74 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
75 // the same value for their inlined size. The fact that they do is historical.
76 // It may be desirable for each to use a different inlined size optimized for
77 // that InlinedVector's usage.
78 //
79 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
80 // the inlined vector size (47 exists for backward compatibility).
81 kInlinedVectorSize = 47,
82
83 // Prefer copying blocks of at most this size, otherwise reference count.
84 kMaxBytesToCopy = 511
85 };
86
87 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
88 ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
89
90 // Fast implementation of memmove for up to 15 bytes. This implementation is
91 // safe for overlapping regions. If nullify_tail is true, the destination is
92 // padded with '\0' up to 15 bytes.
93 template <bool nullify_tail = false>
SmallMemmove(char * dst,const char * src,size_t n)94 inline void SmallMemmove(char* dst, const char* src, size_t n) {
95 if (n >= 8) {
96 assert(n <= 15);
97 uint64_t buf1;
98 uint64_t buf2;
99 memcpy(&buf1, src, 8);
100 memcpy(&buf2, src + n - 8, 8);
101 if (nullify_tail) {
102 memset(dst + 7, 0, 8);
103 }
104 // GCC 12 has a false-positive -Wstringop-overflow warning here.
105 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wstringop-overflow"
108 #endif
109 memcpy(dst, &buf1, 8);
110 memcpy(dst + n - 8, &buf2, 8);
111 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
112 #pragma GCC diagnostic pop
113 #endif
114 } else if (n >= 4) {
115 uint32_t buf1;
116 uint32_t buf2;
117 memcpy(&buf1, src, 4);
118 memcpy(&buf2, src + n - 4, 4);
119 if (nullify_tail) {
120 memset(dst + 4, 0, 4);
121 memset(dst + 7, 0, 8);
122 }
123 memcpy(dst, &buf1, 4);
124 memcpy(dst + n - 4, &buf2, 4);
125 } else {
126 if (n != 0) {
127 dst[0] = src[0];
128 dst[n / 2] = src[n / 2];
129 dst[n - 1] = src[n - 1];
130 }
131 if (nullify_tail) {
132 memset(dst + 7, 0, 8);
133 memset(dst + n, 0, 8);
134 }
135 }
136 }
137
138 // Compact class for tracking the reference count and state flags for CordRep
139 // instances. Data is stored in an atomic int32_t for compactness and speed.
140 class RefcountAndFlags {
141 public:
RefcountAndFlags()142 constexpr RefcountAndFlags() : count_{kRefIncrement} {}
143 struct Immortal {};
RefcountAndFlags(Immortal)144 explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
145
146 // Increments the reference count. Imposes no memory ordering.
Increment()147 inline void Increment() {
148 count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
149 }
150
151 // Asserts that the current refcount is greater than 0. If the refcount is
152 // greater than 1, decrements the reference count.
153 //
154 // Returns false if there are no references outstanding; true otherwise.
155 // Inserts barriers to ensure that state written before this method returns
156 // false will be visible to a thread that just observed this method returning
157 // false. Always returns false when the immortal bit is set.
Decrement()158 inline bool Decrement() {
159 int32_t refcount = count_.load(std::memory_order_acquire);
160 assert(refcount > 0 || refcount & kImmortalFlag);
161 return refcount != kRefIncrement &&
162 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
163 kRefIncrement;
164 }
165
166 // Same as Decrement but expect that refcount is greater than 1.
DecrementExpectHighRefcount()167 inline bool DecrementExpectHighRefcount() {
168 int32_t refcount =
169 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
170 assert(refcount > 0 || refcount & kImmortalFlag);
171 return refcount != kRefIncrement;
172 }
173
174 // Returns the current reference count using acquire semantics.
Get()175 inline size_t Get() const {
176 return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
177 kNumFlags);
178 }
179
180 // Returns whether the atomic integer is 1.
181 // If the reference count is used in the conventional way, a
182 // reference count of 1 implies that the current thread owns the
183 // reference and no other thread shares it.
184 // This call performs the test for a reference count of one, and
185 // performs the memory barrier needed for the owning thread
186 // to act on the object, knowing that it has exclusive access to the
187 // object. Always returns false when the immortal bit is set.
IsOne()188 inline bool IsOne() {
189 return count_.load(std::memory_order_acquire) == kRefIncrement;
190 }
191
IsImmortal()192 bool IsImmortal() const {
193 return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
194 }
195
196 private:
197 // We reserve the bottom bit for flag.
198 // kImmortalBit indicates that this entity should never be collected; it is
199 // used for the StringConstant constructor to avoid collecting immutable
200 // constant cords.
201 enum Flags {
202 kNumFlags = 1,
203
204 kImmortalFlag = 0x1,
205 kRefIncrement = (1 << kNumFlags),
206 };
207
208 std::atomic<int32_t> count_;
209 };
210
211 // Various representations that we allow
212 enum CordRepKind {
213 UNUSED_0 = 0,
214 SUBSTRING = 1,
215 CRC = 2,
216 BTREE = 3,
217 UNUSED_4 = 4,
218 EXTERNAL = 5,
219
220 // We have different tags for different sized flat arrays,
221 // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
222 // allocated range of 32 bytes to 256 KB. The current granularity is:
223 // - 8 byte granularity for flat sizes in [32 - 512]
224 // - 64 byte granularity for flat sizes in (512 - 8KiB]
225 // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
226 // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
227 // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
228 // represents the minimum flat allocation size. (32 bytes as of now).
229 FLAT = 6,
230 MAX_FLAT_TAG = 248
231 };
232
233 // There are various locations where we want to check if some rep is a 'plain'
234 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
235 // can perform this check in a single branch as 'tag >= EXTERNAL'
236 // Note that we can leave this optimization to the compiler. The compiler will
237 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
238 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
239
240 struct CordRep {
241 // Result from an `extract edge` operation. Contains the (possibly changed)
242 // tree node as well as the extracted edge, or {tree, nullptr} if no edge
243 // could be extracted.
244 // On success, the returned `tree` value is null if `extracted` was the only
245 // data edge inside the tree, a data edge if there were only two data edges in
246 // the tree, or the (possibly new / smaller) remaining tree with the extracted
247 // data edge removed.
248 struct ExtractResult {
249 CordRep* tree;
250 CordRep* extracted;
251 };
252
253 CordRep() = default;
CordRepCordRep254 constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
255 : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
256
257 // The following three fields have to be less than 32 bytes since
258 // that is the smallest supported flat node size. Some code optimizations rely
259 // on the specific layout of these fields. Notably: the non-trivial field
260 // `refcount` being preceded by `length`, and being tailed by POD data
261 // members only.
262 // # LINT.IfChange
263 size_t length;
264 RefcountAndFlags refcount;
265 // If tag < FLAT, it represents CordRepKind and indicates the type of node.
266 // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
267 uint8_t tag;
268
269 // `storage` provides two main purposes:
270 // - the starting point for FlatCordRep.Data() [flexible-array-member]
271 // - 3 bytes of additional storage for use by derived classes.
272 // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
273 // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
274 // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
275 // allocate room for these in the derived class, as not all compilers reuse
276 // padding space from the base class (clang and gcc do, MSVC does not, etc)
277 uint8_t storage[3];
278 // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
279
280 // Returns true if this instance's tag matches the requested type.
IsSubstringCordRep281 constexpr bool IsSubstring() const { return tag == SUBSTRING; }
IsCrcCordRep282 constexpr bool IsCrc() const { return tag == CRC; }
IsExternalCordRep283 constexpr bool IsExternal() const { return tag == EXTERNAL; }
IsFlatCordRep284 constexpr bool IsFlat() const { return tag >= FLAT; }
IsBtreeCordRep285 constexpr bool IsBtree() const { return tag == BTREE; }
286
287 inline CordRepSubstring* substring();
288 inline const CordRepSubstring* substring() const;
289 inline CordRepCrc* crc();
290 inline const CordRepCrc* crc() const;
291 inline CordRepExternal* external();
292 inline const CordRepExternal* external() const;
293 inline CordRepFlat* flat();
294 inline const CordRepFlat* flat() const;
295 inline CordRepBtree* btree();
296 inline const CordRepBtree* btree() const;
297
298 // --------------------------------------------------------------------
299 // Memory management
300
301 // Destroys the provided `rep`.
302 static void Destroy(CordRep* rep);
303
304 // Increments the reference count of `rep`.
305 // Requires `rep` to be a non-null pointer value.
306 static inline CordRep* Ref(CordRep* rep);
307
308 // Decrements the reference count of `rep`. Destroys rep if count reaches
309 // zero. Requires `rep` to be a non-null pointer value.
310 static inline void Unref(CordRep* rep);
311 };
312
313 struct CordRepSubstring : public CordRep {
314 size_t start; // Starting offset of substring in child
315 CordRep* child;
316
317 // Creates a substring on `child`, adopting a reference on `child`.
318 // Requires `child` to be either a flat or external node, and `pos` and `n` to
319 // form a non-empty partial sub range of `'child`, i.e.:
320 // `n > 0 && n < length && n + pos <= length`
321 static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
322
323 // Creates a substring of `rep`. Does not adopt a reference on `rep`.
324 // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
325 // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
326 // If `rep` is a substring of a flat or external node, then this method will
327 // return a new substring of that flat or external node with `pos` adjusted
328 // with the original `start` position.
329 static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
330 };
331
332 // Type for function pointer that will invoke the releaser function and also
333 // delete the `CordRepExternalImpl` corresponding to the passed in
334 // `CordRepExternal`.
335 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
336
337 // External CordReps are allocated together with a type erased releaser. The
338 // releaser is stored in the memory directly following the CordRepExternal.
339 struct CordRepExternal : public CordRep {
340 CordRepExternal() = default;
CordRepExternalCordRepExternal341 explicit constexpr CordRepExternal(absl::string_view str)
342 : CordRep(RefcountAndFlags::Immortal{}, str.size()),
343 base(str.data()),
344 releaser_invoker(nullptr) {}
345
346 const char* base;
347 // Pointer to function that knows how to call and destroy the releaser.
348 ExternalReleaserInvoker releaser_invoker;
349
350 // Deletes (releases) the external rep.
351 // Requires rep != nullptr and rep->IsExternal()
352 static void Delete(CordRep* rep);
353 };
354
355 struct Rank1 {};
356 struct Rank0 : Rank1 {};
357
358 template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
359 Releaser, absl::string_view>>
InvokeReleaser(Rank0,Releaser && releaser,absl::string_view data)360 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
361 ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
362 }
363
364 template <typename Releaser,
365 typename = ::absl::base_internal::invoke_result_t<Releaser>>
InvokeReleaser(Rank1,Releaser && releaser,absl::string_view)366 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
367 ::absl::base_internal::invoke(std::forward<Releaser>(releaser));
368 }
369
370 // We use CompressedTuple so that we can benefit from EBCO.
371 template <typename Releaser>
372 struct CordRepExternalImpl
373 : public CordRepExternal,
374 public ::absl::container_internal::CompressedTuple<Releaser> {
375 // The extra int arg is so that we can avoid interfering with copy/move
376 // constructors while still benefitting from perfect forwarding.
377 template <typename T>
CordRepExternalImplCordRepExternalImpl378 CordRepExternalImpl(T&& releaser, int)
379 : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
380 this->releaser_invoker = &Release;
381 }
382
~CordRepExternalImplCordRepExternalImpl383 ~CordRepExternalImpl() {
384 InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
385 absl::string_view(base, length));
386 }
387
ReleaseCordRepExternalImpl388 static void Release(CordRepExternal* rep) {
389 delete static_cast<CordRepExternalImpl*>(rep);
390 }
391 };
392
Create(CordRep * child,size_t pos,size_t n)393 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
394 size_t n) {
395 assert(child != nullptr);
396 assert(n > 0);
397 assert(n < child->length);
398 assert(pos < child->length);
399 assert(n <= child->length - pos);
400
401 // TODO(b/217376272): Harden internal logic.
402 // Move to strategical places inside the Cord logic and make this an assert.
403 if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
404 LogFatalNodeType(child);
405 }
406
407 CordRepSubstring* rep = new CordRepSubstring();
408 rep->length = n;
409 rep->tag = SUBSTRING;
410 rep->start = pos;
411 rep->child = child;
412 return rep;
413 }
414
Substring(CordRep * rep,size_t pos,size_t n)415 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
416 size_t n) {
417 assert(rep != nullptr);
418 assert(n != 0);
419 assert(pos < rep->length);
420 assert(n <= rep->length - pos);
421 if (n == rep->length) return CordRep::Ref(rep);
422 if (rep->IsSubstring()) {
423 pos += rep->substring()->start;
424 rep = rep->substring()->child;
425 }
426 CordRepSubstring* substr = new CordRepSubstring();
427 substr->length = n;
428 substr->tag = SUBSTRING;
429 substr->start = pos;
430 substr->child = CordRep::Ref(rep);
431 return substr;
432 }
433
Delete(CordRep * rep)434 inline void CordRepExternal::Delete(CordRep* rep) {
435 assert(rep != nullptr && rep->IsExternal());
436 auto* rep_external = static_cast<CordRepExternal*>(rep);
437 assert(rep_external->releaser_invoker != nullptr);
438 rep_external->releaser_invoker(rep_external);
439 }
440
441 template <typename Str>
442 struct ConstInitExternalStorage {
443 ABSL_CONST_INIT static CordRepExternal value;
444 };
445
446 template <typename Str>
447 ABSL_CONST_INIT CordRepExternal
448 ConstInitExternalStorage<Str>::value(Str::value);
449
450 enum {
451 kMaxInline = 15,
452 };
453
GetOrNull(absl::string_view data,size_t pos)454 constexpr char GetOrNull(absl::string_view data, size_t pos) {
455 return pos < data.size() ? data[pos] : '\0';
456 }
457
458 // We store cordz_info as 64 bit pointer value in little endian format. This
459 // guarantees that the least significant byte of cordz_info matches the first
460 // byte of the inline data representation in `data`, which holds the inlined
461 // size or the 'is_tree' bit.
462 using cordz_info_t = int64_t;
463
464 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
465 // of `data` and can hold a pointer value.
466 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
467 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
468
469 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
470 // a little endian value where the first byte in the host's representation
471 // holds 'value`, with all other bytes being 0.
LittleEndianByte(unsigned char value)472 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
473 #if defined(ABSL_IS_BIG_ENDIAN)
474 return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
475 #else
476 return value;
477 #endif
478 }
479
480 class InlineData {
481 public:
482 // DefaultInitType forces the use of the default initialization constructor.
483 enum DefaultInitType { kDefaultInit };
484
485 // kNullCordzInfo holds the little endian representation of intptr_t(1)
486 // This is the 'null' / initial value of 'cordz_info'. The null value
487 // is specifically big endian 1 as with 64-bit pointers, the last
488 // byte of cordz_info overlaps with the last byte holding the tag.
489 static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
490
491 // kTagOffset contains the offset of the control byte / tag. This constant is
492 // intended mostly for debugging purposes: do not remove this constant as it
493 // is actively inspected and used by gdb pretty printing code.
494 static constexpr size_t kTagOffset = 0;
495
496 // Implement `~InlineData()` conditionally: we only need this destructor to
497 // unpoison poisoned instances under *SAN, and it will only compile correctly
498 // if the current compiler supports `absl::is_constant_evaluated()`.
499 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData()500 ~InlineData() noexcept { unpoison(); }
501 #endif
502
InlineData()503 constexpr InlineData() noexcept { poison_this(); }
504
InlineData(DefaultInitType)505 explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
506 poison_this();
507 }
508
InlineData(CordRep * rep)509 explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
510 ABSL_ASSERT(rep != nullptr);
511 }
512
513 // Explicit constexpr constructor to create a constexpr InlineData
514 // value. Creates an inlined SSO value if `rep` is null, otherwise
515 // creates a tree instance value.
InlineData(absl::string_view sv,CordRep * rep)516 constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
517 : rep_(rep ? Rep(rep) : Rep(sv)) {
518 poison();
519 }
520
521 constexpr InlineData(const InlineData& rhs) noexcept;
522 InlineData& operator=(const InlineData& rhs) noexcept;
523
524 friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
525 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
526 const Rep l = lhs.rep_.SanitizerSafeCopy();
527 const Rep r = rhs.rep_.SanitizerSafeCopy();
528 return memcmp(&l, &r, sizeof(l)) == 0;
529 #else
530 return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
531 #endif
532 }
533 friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
534 return !operator==(lhs, rhs);
535 }
536
537 // Poisons the unused inlined SSO data if the current instance
538 // is inlined, else un-poisons the entire instance.
539 constexpr void poison();
540
541 // Un-poisons this instance.
542 constexpr void unpoison();
543
544 // Poisons the current instance. This is used on default initialization.
545 constexpr void poison_this();
546
547 // Returns true if the current instance is empty.
548 // The 'empty value' is an inlined data value of zero length.
is_empty()549 bool is_empty() const { return rep_.tag() == 0; }
550
551 // Returns true if the current instance holds a tree value.
is_tree()552 bool is_tree() const { return (rep_.tag() & 1) != 0; }
553
554 // Returns true if the current instance holds a cordz_info value.
555 // Requires the current instance to hold a tree value.
is_profiled()556 bool is_profiled() const {
557 assert(is_tree());
558 return rep_.cordz_info() != kNullCordzInfo;
559 }
560
561 // Returns true if either of the provided instances hold a cordz_info value.
562 // This method is more efficient than the equivalent `data1.is_profiled() ||
563 // data2.is_profiled()`. Requires both arguments to hold a tree.
is_either_profiled(const InlineData & data1,const InlineData & data2)564 static bool is_either_profiled(const InlineData& data1,
565 const InlineData& data2) {
566 assert(data1.is_tree() && data2.is_tree());
567 return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
568 kNullCordzInfo;
569 }
570
571 // Returns the cordz_info sampling instance for this instance, or nullptr
572 // if the current instance is not sampled and does not have CordzInfo data.
573 // Requires the current instance to hold a tree value.
cordz_info()574 CordzInfo* cordz_info() const {
575 assert(is_tree());
576 intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
577 static_cast<uint64_t>(rep_.cordz_info())));
578 assert(info & 1);
579 return reinterpret_cast<CordzInfo*>(info - 1);
580 }
581
582 // Sets the current cordz_info sampling instance for this instance, or nullptr
583 // if the current instance is not sampled and does not have CordzInfo data.
584 // Requires the current instance to hold a tree value.
set_cordz_info(CordzInfo * cordz_info)585 void set_cordz_info(CordzInfo* cordz_info) {
586 assert(is_tree());
587 uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
588 rep_.set_cordz_info(
589 static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
590 }
591
592 // Resets the current cordz_info to null / empty.
clear_cordz_info()593 void clear_cordz_info() {
594 assert(is_tree());
595 rep_.set_cordz_info(kNullCordzInfo);
596 }
597
598 // Returns a read only pointer to the character data inside this instance.
599 // Requires the current instance to hold inline data.
as_chars()600 const char* as_chars() const {
601 assert(!is_tree());
602 return rep_.as_chars();
603 }
604
605 // Returns a mutable pointer to the character data inside this instance.
606 // Should be used for 'write only' operations setting an inlined value.
607 // Applications can set the value of inlined data either before or after
608 // setting the inlined size, i.e., both of the below are valid:
609 //
610 // // Set inlined data and inline size
611 // memcpy(data_.as_chars(), data, size);
612 // data_.set_inline_size(size);
613 //
614 // // Set inlined size and inline data
615 // data_.set_inline_size(size);
616 // memcpy(data_.as_chars(), data, size);
617 //
618 // It's an error to read from the returned pointer without a preceding write
619 // if the current instance does not hold inline data, i.e.: is_tree() == true.
as_chars()620 char* as_chars() { return rep_.as_chars(); }
621
622 // Returns the tree value of this value.
623 // Requires the current instance to hold a tree value.
as_tree()624 CordRep* as_tree() const {
625 assert(is_tree());
626 return rep_.tree();
627 }
628
set_inline_data(const char * data,size_t n)629 void set_inline_data(const char* data, size_t n) {
630 ABSL_ASSERT(n <= kMaxInline);
631 unpoison();
632 rep_.set_tag(static_cast<int8_t>(n << 1));
633 SmallMemmove<true>(rep_.as_chars(), data, n);
634 poison();
635 }
636
copy_max_inline_to(char * dst)637 void copy_max_inline_to(char* dst) const {
638 assert(!is_tree());
639 memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
640 }
641
642 // Initialize this instance to holding the tree value `rep`,
643 // initializing the cordz_info to null, i.e.: 'not profiled'.
make_tree(CordRep * rep)644 void make_tree(CordRep* rep) {
645 unpoison();
646 rep_.make_tree(rep);
647 }
648
649 // Set the tree value of this instance to 'rep`.
650 // Requires the current instance to already hold a tree value.
651 // Does not affect the value of cordz_info.
set_tree(CordRep * rep)652 void set_tree(CordRep* rep) {
653 assert(is_tree());
654 rep_.set_tree(rep);
655 }
656
657 // Returns the size of the inlined character data inside this instance.
658 // Requires the current instance to hold inline data.
inline_size()659 size_t inline_size() const { return rep_.inline_size(); }
660
661 // Sets the size of the inlined character data inside this instance.
662 // Requires `size` to be <= kMaxInline.
663 // See the documentation on 'as_chars()' for more information and examples.
set_inline_size(size_t size)664 void set_inline_size(size_t size) {
665 unpoison();
666 rep_.set_inline_size(size);
667 poison();
668 }
669
670 // Compares 'this' inlined data with rhs. The comparison is a straightforward
671 // lexicographic comparison. `Compare()` returns values as follows:
672 //
673 // -1 'this' InlineData instance is smaller
674 // 0 the InlineData instances are equal
675 // 1 'this' InlineData instance larger
Compare(const InlineData & rhs)676 int Compare(const InlineData& rhs) const {
677 return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
678 }
679
680 private:
681 struct Rep {
682 // See cordz_info_t for forced alignment and size of `cordz_info` details.
683 struct AsTree {
AsTreeRep::AsTree684 explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
685 : rep(tree) {}
686 cordz_info_t cordz_info = kNullCordzInfo;
687 absl::cord_internal::CordRep* rep;
688 };
689
RepRep690 explicit Rep(DefaultInitType) {}
RepRep691 constexpr Rep() : data{0} {}
692 constexpr Rep(const Rep&) = default;
693 constexpr Rep& operator=(const Rep&) = default;
694
RepRep695 explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
696
RepRep697 explicit constexpr Rep(absl::string_view chars)
698 : data{static_cast<char>((chars.size() << 1)),
699 GetOrNull(chars, 0),
700 GetOrNull(chars, 1),
701 GetOrNull(chars, 2),
702 GetOrNull(chars, 3),
703 GetOrNull(chars, 4),
704 GetOrNull(chars, 5),
705 GetOrNull(chars, 6),
706 GetOrNull(chars, 7),
707 GetOrNull(chars, 8),
708 GetOrNull(chars, 9),
709 GetOrNull(chars, 10),
710 GetOrNull(chars, 11),
711 GetOrNull(chars, 12),
712 GetOrNull(chars, 13),
713 GetOrNull(chars, 14)} {}
714
715 // Disable sanitizer as we must always be able to read `tag`.
716 ABSL_CORD_INTERNAL_NO_SANITIZE
tagRep717 int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
set_tagRep718 void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
719
as_charsRep720 char* as_chars() { return data + 1; }
as_charsRep721 const char* as_chars() const { return data + 1; }
722
is_treeRep723 bool is_tree() const { return (tag() & 1) != 0; }
724
inline_sizeRep725 size_t inline_size() const {
726 ABSL_ASSERT(!is_tree());
727 return static_cast<size_t>(tag()) >> 1;
728 }
729
set_inline_sizeRep730 void set_inline_size(size_t size) {
731 ABSL_ASSERT(size <= kMaxInline);
732 set_tag(static_cast<int8_t>(size << 1));
733 }
734
treeRep735 CordRep* tree() const { return as_tree.rep; }
set_treeRep736 void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
737
cordz_infoRep738 cordz_info_t cordz_info() const { return as_tree.cordz_info; }
set_cordz_infoRep739 void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
740
make_treeRep741 void make_tree(CordRep* tree) {
742 as_tree.rep = tree;
743 as_tree.cordz_info = kNullCordzInfo;
744 }
745
746 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
SanitizerSafeCopyRep747 constexpr Rep SanitizerSafeCopy() const {
748 if (!absl::is_constant_evaluated()) {
749 Rep res;
750 if (is_tree()) {
751 res = *this;
752 } else {
753 res.set_tag(tag());
754 memcpy(res.as_chars(), as_chars(), inline_size());
755 }
756 return res;
757 } else {
758 return *this;
759 }
760 }
761 #else
SanitizerSafeCopyRep762 constexpr const Rep& SanitizerSafeCopy() const { return *this; }
763 #endif
764
765 // If the data has length <= kMaxInline, we store it in `data`, and
766 // store the size in the first char of `data` shifted left + 1.
767 // Else we store it in a tree and store a pointer to that tree in
768 // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
769 union {
770 char data[kMaxInline + 1];
771 AsTree as_tree;
772 };
773 };
774
775 // Private implementation of `Compare()`
Compare(const Rep & lhs,const Rep & rhs)776 static inline int Compare(const Rep& lhs, const Rep& rhs) {
777 uint64_t x, y;
778 memcpy(&x, lhs.as_chars(), sizeof(x));
779 memcpy(&y, rhs.as_chars(), sizeof(y));
780 if (x == y) {
781 memcpy(&x, lhs.as_chars() + 7, sizeof(x));
782 memcpy(&y, rhs.as_chars() + 7, sizeof(y));
783 if (x == y) {
784 if (lhs.inline_size() == rhs.inline_size()) return 0;
785 return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
786 }
787 }
788 x = absl::big_endian::FromHost64(x);
789 y = absl::big_endian::FromHost64(y);
790 return x < y ? -1 : 1;
791 }
792
793 Rep rep_;
794 };
795
796 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
797
798 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
799
InlineData(const InlineData & rhs)800 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
801 : rep_(rhs.rep_.SanitizerSafeCopy()) {
802 poison();
803 }
804
805 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
806 unpoison();
807 rep_ = rhs.rep_.SanitizerSafeCopy();
808 poison();
809 return *this;
810 }
811
poison_this()812 constexpr void InlineData::poison_this() {
813 if (!absl::is_constant_evaluated()) {
814 container_internal::SanitizerPoisonObject(this);
815 }
816 }
817
unpoison()818 constexpr void InlineData::unpoison() {
819 if (!absl::is_constant_evaluated()) {
820 container_internal::SanitizerUnpoisonObject(this);
821 }
822 }
823
poison()824 constexpr void InlineData::poison() {
825 if (!absl::is_constant_evaluated()) {
826 if (is_tree()) {
827 container_internal::SanitizerUnpoisonObject(this);
828 } else if (const size_t size = inline_size()) {
829 if (size < kMaxInline) {
830 const char* end = rep_.as_chars() + size;
831 container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
832 }
833 } else {
834 container_internal::SanitizerPoisonObject(this);
835 }
836 }
837 }
838
839 #else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
840
841 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
842 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
843
poison_this()844 constexpr void InlineData::poison_this() {}
unpoison()845 constexpr void InlineData::unpoison() {}
poison()846 constexpr void InlineData::poison() {}
847
848 #endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
849
substring()850 inline CordRepSubstring* CordRep::substring() {
851 assert(IsSubstring());
852 return static_cast<CordRepSubstring*>(this);
853 }
854
substring()855 inline const CordRepSubstring* CordRep::substring() const {
856 assert(IsSubstring());
857 return static_cast<const CordRepSubstring*>(this);
858 }
859
external()860 inline CordRepExternal* CordRep::external() {
861 assert(IsExternal());
862 return static_cast<CordRepExternal*>(this);
863 }
864
external()865 inline const CordRepExternal* CordRep::external() const {
866 assert(IsExternal());
867 return static_cast<const CordRepExternal*>(this);
868 }
869
Ref(CordRep * rep)870 inline CordRep* CordRep::Ref(CordRep* rep) {
871 // ABSL_ASSUME is a workaround for
872 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
873 ABSL_ASSUME(rep != nullptr);
874 rep->refcount.Increment();
875 return rep;
876 }
877
Unref(CordRep * rep)878 inline void CordRep::Unref(CordRep* rep) {
879 assert(rep != nullptr);
880 // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
881 // typically outweigh the cost of an extra branch checking for ref == 1.
882 if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
883 Destroy(rep);
884 }
885 }
886
887 } // namespace cord_internal
888
889 ABSL_NAMESPACE_END
890 } // namespace absl
891 #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
892