1 // Copyright 2019 The TCMalloc Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // This file documents extensions supported by TCMalloc. These extensions
16 // provide hooks for both surfacing telemetric data about TCMalloc's usage and
17 // tuning the internal implementation of TCMalloc. The internal implementation
18 // functions use weak linkage, allowing an application to link against the
19 // extensions without always linking against TCMalloc.
20 //
21 // Many of these APIs are also supported when built with sanitizers.
22
23 #ifndef TCMALLOC_MALLOC_EXTENSION_H_
24 #define TCMALLOC_MALLOC_EXTENSION_H_
25
26 #include <cstddef>
27 #include <cstdint>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <new>
32 #include <optional>
33 #include <string>
34 #include <utility>
35
36 #include "third_party/base/censushandle.h" // tcmalloc:google3-only(b/197108627)
37 #include "third_party/abseil-cpp/absl/base/attributes.h"
38 // tcmalloc:google3-begin(b/143069684)
39 #include "third_party/abseil-cpp/absl/base/internal/malloc_hook.h"
40 // tcmalloc:google3-end
41 #include "third_party/abseil-cpp/absl/base/macros.h"
42 #include "third_party/abseil-cpp/absl/functional/function_ref.h"
43 #include "third_party/abseil-cpp/absl/strings/numbers.h"
44 #include "third_party/abseil-cpp/absl/strings/str_cat.h"
45 #include "third_party/abseil-cpp/absl/strings/string_view.h"
46 #include "third_party/abseil-cpp/absl/time/time.h"
47 #include "third_party/abseil-cpp/absl/types/optional.h"
48 #include "third_party/abseil-cpp/absl/types/span.h"
49
50 // Not all versions of Abseil provide this macro.
51 // TODO(b/323943471): Remove on upgrading to version that provides the macro.
52 #ifndef ABSL_DEPRECATE_AND_INLINE
53 #define ABSL_DEPRECATE_AND_INLINE()
54 #endif
55
56 // Indicates how frequently accessed the allocation is expected to be.
57 // 0 - The allocation is rarely accessed.
58 // ...
59 // 255 - The allocation is accessed very frequently.
60 enum class __hot_cold_t : uint8_t;
61
62 // TODO(ckennelly): Lifetimes
63
64 namespace tcmalloc {
65
66 // Alias to the newer type in the global namespace, so that existing code works
67 // as is.
68 using hot_cold_t = __hot_cold_t;
69
70 constexpr hot_cold_t kDefaultMinHotAccessHint =
71 static_cast<tcmalloc::hot_cold_t>(2);
72
73 } // namespace tcmalloc
74
AbslParseFlag(absl::string_view text,tcmalloc::hot_cold_t * hotness,std::string *)75 inline bool AbslParseFlag(absl::string_view text, tcmalloc::hot_cold_t* hotness,
76 std::string* /* error */) {
77 uint32_t value;
78 if (!absl::SimpleAtoi(text, &value)) {
79 return false;
80 }
81 // hot_cold_t is a uint8_t, so make sure the flag is within the allowable
82 // range before casting.
83 if (value > std::numeric_limits<uint8_t>::max()) {
84 return false;
85 }
86 *hotness = static_cast<tcmalloc::hot_cold_t>(value);
87 return true;
88 }
89
AbslUnparseFlag(tcmalloc::hot_cold_t hotness)90 inline std::string AbslUnparseFlag(tcmalloc::hot_cold_t hotness) {
91 return absl::StrCat(hotness);
92 }
93
94 namespace tcmalloc {
95 namespace tcmalloc_internal {
96 class AllocationProfilingTokenAccessor;
97 class AllocationProfilingTokenBase;
98 class ProfileAccessor;
99 class ProfileBase;
100 } // namespace tcmalloc_internal
101
102 enum class ProfileType {
103 // Approximation of current heap usage
104 kHeap,
105
106 // Fragmentation report
107 kFragmentation,
108
109 // Sample of objects that were live at a recent peak of total heap usage. The
110 // specifics of when exactly this profile is collected are subject to change.
111 kPeakHeap,
112
113 // Sample of objects allocated from the start of allocation profiling until
114 // the profile was terminated with Stop().
115 kAllocations,
116
117 // Lifetimes of sampled objects that are live during the profiling session.
118 kLifetimes,
119
120 // Only present to prevent switch statements without a default clause so that
121 // we can extend this enumeration without breaking code.
122 kDoNotUse,
123 };
124
125 class Profile final {
126 public:
127 Profile() = default;
128 Profile(Profile&&) = default;
129 Profile(const Profile&) = delete;
130
131 ~Profile();
132
133 Profile& operator=(Profile&&) = default;
134 Profile& operator=(const Profile&) = delete;
135
136 struct Sample {
137 static constexpr int kMaxStackDepth = 64;
138
139 int64_t sum;
140 // The reported count of samples, with possible rounding up for unsample.
141 // A given sample typically corresponds to some allocated objects, and the
142 // number of objects is the quotient of weight (number of bytes requested
143 // between previous and current samples) divided by the requested size.
144 int64_t count;
145
146 size_t requested_size;
147 size_t requested_alignment;
148 size_t allocated_size;
149
150 // tcmalloc:google3-begin(b/197108627)
151 CensusHandle census_handle;
152 // tcmalloc:google3-end
153
154 // tcmalloc:google3-begin(b/143069684)
155 MallocHook::AllocHandle alloc_handle;
156 // tcmalloc:google3-end
157
158 // Return whether the allocation was returned with
159 // tcmalloc_size_returning_operator_new or its variants.
160 bool requested_size_returning;
161
162 // tcmalloc:google3-begin(context)
163 // TODO(b/265065949): Remove this field when we can use `census_handle` to
164 // tell if the context is present. `census_handle` is available when Census
165 // memory accounting is enabled. For now, all C++ binaries have enabled it.
166 // However, for mixed binaries such as C++/Java (e.g. GWS) and C++/Go
167 // (e.g. Raffia) it is disabled because the Java implementation does not
168 // exist and the Go implementation has non-trivial overhead. We can remove
169 // the field when mixed binaries enable Census memory accounting.
170 enum class Context : uint8_t {
171 Absent,
172 Present,
173 };
174
175 Context context;
176 // tcmalloc:google3-end
177
178 enum class Access : uint8_t {
179 Hot,
180 Cold,
181
182 // Only present to prevent switch statements without a default clause so
183 // that we can extend this enumeration without breaking code.
184 kDoNotUse,
185 };
186 hot_cold_t access_hint;
187 Access access_allocated;
188
189 // Whether this sample captures allocations where the deallocation event
190 // was not observed. Thus the measurements are censored in the statistical
191 // sense, see https://en.wikipedia.org/wiki/Censoring_(statistics)#Types.
192 bool is_censored = false;
193
194 // Provide the status of GWP-ASAN guarding for a given sample.
195 enum class GuardedStatus : int8_t {
196 // Conditions which represent why a sample was not guarded:
197 //
198 // The requested_size of the allocation sample is larger than the
199 // available pages which are guardable.
200 LargerThanOnePage = -1,
201 // By flag, the guarding of samples has been disabled.
202 Disabled = -2,
203 // Too many guards have been placed, any further guards will cause
204 // unexpected load on binary.
205 RateLimited = -3,
206 // The requested_size of the allocation sample is too small (= 0) to be
207 // guarded.
208 TooSmall = -4,
209 // Too many samples are already guarded.
210 NoAvailableSlots = -5,
211 // Perhaps the only true error, when the mprotect call fails.
212 MProtectFailed = -6,
213 // Used in an improved guarding selection algorithm.
214 Filtered = -7,
215 // An unexpected state, which represents that branch for selection was
216 // missed.
217 Unknown = -100,
218 // When guarding is not even considered on a sample.
219 NotAttempted = 0,
220 // The following values do not represent final states, but rather intent
221 // based on the applied algorithm for selecting guarded samples:
222 //
223 // Request guard: may still not be guarded for other reasons (see
224 // above)
225 Requested = 1,
226 // Unused.
227 Required = 2,
228 // The result when a sample is actually guarded by GWP-ASAN.
229 Guarded = 10,
230 };
231 GuardedStatus guarded_status = GuardedStatus::Unknown;
232
233 // How the memory was allocated (new/malloc/etc.).
234 enum class AllocationType : uint8_t {
235 New,
236 Malloc,
237 AlignedMalloc,
238 };
239
240 AllocationType type;
241
242 // tcmalloc:google3-begin(static)
243 int static_initialization_depth;
244 // tcmalloc:google3-end
245
246 int depth;
247 void* stack[kMaxStackDepth];
248
249 // The following vars are used by the lifetime (deallocation) profiler.
250 uint64_t profile_id;
251
252 // Timestamp of allocation.
253 absl::Time allocation_time;
254
255 // Aggregated lifetime statistics per callstack.
256 absl::Duration avg_lifetime;
257 absl::Duration stddev_lifetime;
258 absl::Duration min_lifetime;
259 absl::Duration max_lifetime;
260
261 // For the *_matched vars below we use true = "same", false = "different".
262 // When the value is unavailable the profile contains "none". For
263 // right-censored observations, CPU and thread matched values are "none".
264 std::optional<bool> allocator_deallocator_physical_cpu_matched;
265 std::optional<bool> allocator_deallocator_virtual_cpu_matched;
266 std::optional<bool> allocator_deallocator_l3_matched;
267 std::optional<bool> allocator_deallocator_numa_matched;
268 std::optional<bool> allocator_deallocator_thread_matched;
269 // tcmalloc:google3-begin(context)
270 std::optional<bool> allocator_deallocator_context_matched;
271 // tcmalloc:google3-end
272
273 // The start address of the sampled allocation, used to calculate the
274 // residency info for the objects represented by this sampled allocation.
275 void* span_start_address;
276 };
277
278 void Iterate(absl::FunctionRef<void(const Sample&)> f) const;
279
280 ProfileType Type() const;
281
282 // Time stamp when the profile collection started. Returns std::nullopt if
283 // this is not available.
284 std::optional<absl::Time> StartTime() const;
285
286 // The duration the profile was collected for. For instantaneous profiles
287 // (heap, peakheap, etc.), this returns absl::ZeroDuration().
288 absl::Duration Duration() const;
289
290 private:
291 explicit Profile(std::unique_ptr<const tcmalloc_internal::ProfileBase>);
292
293 std::unique_ptr<const tcmalloc_internal::ProfileBase> impl_;
294 friend class tcmalloc_internal::ProfileAccessor;
295 };
296
297 // tcmalloc:google3-begin(coroner)
298 class MallocExtensionAsanWriter;
299 // tcmalloc:google3-end
300
301 class AddressRegion {
302 public:
AddressRegion()303 AddressRegion() {}
304 virtual ~AddressRegion();
305
306 // Allocates at least size bytes of memory from this region, aligned with
307 // alignment. Returns a pair containing a pointer to the start the allocated
308 // memory and the actual size allocated. Returns {nullptr, 0} on failure.
309 //
310 // Alloc must return memory located within the address range given in the call
311 // to AddressRegionFactory::Create that created this AddressRegion.
312 virtual std::pair<void*, size_t> Alloc(size_t size, size_t alignment) = 0;
313 };
314
315 // Interface to a pluggable address region allocator.
316 class AddressRegionFactory {
317 public:
318 enum class UsageHint {
319 kNormal, // Normal usage.
320 kInfrequentAllocation, // TCMalloc allocates from these regions less
321 // frequently than normal regions.
322 kInfrequent ABSL_DEPRECATED("Use kInfrequentAllocation") =
323 kInfrequentAllocation,
324 kMetadata, // Metadata for TCMalloc not returned via new/malloc.
325 kInfrequentAccess, // TCMalloc places cold allocations in these regions.
326 // Usage of the below implies numa_aware is enabled. tcmalloc will mbind the
327 // address region to the hinted socket, but also passes the hint in case
328 // mbind is not sufficient (e.g. when dealing with pre-faulted memory).
329 kNormalNumaAwareS0, // Normal usage intended for NUMA S0 under numa_aware.
330 kNormalNumaAwareS1, // Normal usage intended for NUMA S1 under numa_aware.
331 };
332
333 constexpr AddressRegionFactory() = default;
334 virtual ~AddressRegionFactory() = default;
335
336 // Returns an AddressRegion with the specified start address and size. hint
337 // indicates how the caller intends to use the returned region (helpful for
338 // deciding which regions to remap with hugepages, which regions should have
339 // pages prefaulted, etc.). The returned AddressRegion must never be deleted.
340 //
341 // The caller must have reserved size bytes of address space starting at
342 // start_addr with mmap(PROT_NONE) prior to calling this function (so it is
343 // safe for Create() to mmap(MAP_FIXED) over the specified address range).
344 // start_addr and size are always page-aligned.
345 virtual AddressRegion* Create(void* start_addr, size_t size,
346 UsageHint hint) = 0;
347
348 // Gets a human-readable description of the current state of the allocator.
349 //
350 // The state is stored in the provided buffer. The number of bytes used (or
351 // would have been required, had the buffer been of sufficient size) is
352 // returned.
353 virtual size_t GetStats(absl::Span<char> buffer);
354
355 // Gets a description of the current state of the allocator in pbtxt format.
356 //
357 // The state is stored in the provided buffer. The number of bytes used (or
358 // would have been required, had the buffer been of sufficient size) is
359 // returned.
360 virtual size_t GetStatsInPbtxt(absl::Span<char> buffer);
361
362 // Returns the total number of bytes allocated by MallocInternal().
363 static size_t InternalBytesAllocated();
364
365 protected:
366 // Dynamically allocates memory for use by AddressRegionFactory. Particularly
367 // useful for creating AddressRegions inside Create().
368 //
369 // This memory is never freed, so allocate sparingly.
370 static void* MallocInternal(size_t size);
371 };
372
373 class MallocExtension final {
374 public:
375 // Gets a human readable description of the current state of the malloc data
376 // structures.
377 //
378 // See g3doc/third_party/tcmalloc/g3doc/stats.md for how to interpret these
379 // statistics.
380 static std::string GetStats();
381
382 // -------------------------------------------------------------------
383 // Control operations for getting malloc implementation specific parameters.
384 // Some currently useful properties:
385 //
386 // generic
387 // -------
388 // "generic.current_allocated_bytes"
389 // Number of bytes currently allocated by application
390 //
391 // "generic.heap_size"
392 // Number of bytes in the heap ==
393 // current_allocated_bytes +
394 // fragmentation +
395 // freed (but not released to OS) memory regions
396 //
397 // tcmalloc
398 // --------
399 // "tcmalloc.max_total_thread_cache_bytes"
400 // Upper limit on total number of bytes stored across all
401 // per-thread caches. Default: 16MB.
402 //
403 // "tcmalloc.current_total_thread_cache_bytes"
404 // Number of bytes used across all thread caches.
405 //
406 // "tcmalloc.pageheap_free_bytes"
407 // Number of bytes in free, mapped pages in page heap. These
408 // bytes can be used to fulfill allocation requests. They
409 // always count towards virtual memory usage, and unless the
410 // underlying memory is swapped out by the OS, they also count
411 // towards physical memory usage.
412 //
413 // "tcmalloc.pageheap_unmapped_bytes"
414 // Number of bytes in free, unmapped pages in page heap.
415 // These are bytes that have been released back to the OS,
416 // possibly by one of the MallocExtension "Release" calls.
417 // They can be used to fulfill allocation requests, but
418 // typically incur a page fault. They always count towards
419 // virtual memory usage, and depending on the OS, typically
420 // do not count towards physical memory usage.
421 //
422 // "tcmalloc.per_cpu_caches_active"
423 // Whether tcmalloc is using per-CPU caches (1 or 0 respectively).
424 // -------------------------------------------------------------------
425
426 // Gets the named property's value or a nullopt if the property is not valid.
427 static std::optional<size_t> GetNumericProperty(absl::string_view property);
428
429 // Marks the current thread as "idle". This function may optionally be called
430 // by threads as a hint to the malloc implementation that any thread-specific
431 // resources should be released. Note: this may be an expensive function, so
432 // it should not be called too often.
433 //
434 // Also, if the code that calls this function will go to sleep for a while, it
435 // should take care to not allocate anything between the call to this function
436 // and the beginning of the sleep.
437 static void MarkThreadIdle();
438
439 // Marks the current thread as "busy". This function should be called after
440 // MarkThreadIdle() if the thread will now do more work. If this method is
441 // not called, performance may suffer.
442 static void MarkThreadBusy();
443
444 // Attempts to free any resources associated with cpu <cpu> (in the sense of
445 // only being usable from that CPU.) Returns the number of bytes previously
446 // assigned to "cpu" that were freed. Safe to call from any processor, not
447 // just <cpu>.
448 static size_t ReleaseCpuMemory(int cpu);
449
450 // Gets the region factory used by the malloc extension instance. Returns null
451 // for malloc implementations that do not support pluggable region factories.
452 static AddressRegionFactory* GetRegionFactory();
453
454 // Sets the region factory to the specified.
455 //
456 // Users could register their own region factories by doing:
457 // factory = new MyOwnRegionFactory();
458 // MallocExtension::SetRegionFactory(factory);
459 //
460 // It's up to users whether to fall back (recommended) to the default region
461 // factory (use GetRegionFactory() above) or not. The caller is responsible to
462 // any necessary locking.
463 static void SetRegionFactory(AddressRegionFactory* a);
464
465 // Tries to release at least num_bytes of free memory back to the OS for
466 // reuse.
467 //
468 // Depending on the state of the malloc implementation, more than num_bytes of
469 // memory may be released to the OS.
470 //
471 // This request may not be completely honored if:
472 // * The underlying malloc implementation does not support releasing memory to
473 // the OS.
474 // * There are not at least num_bytes of free memory cached, or free memory is
475 // fragmented in ways that keep it from being returned to the OS.
476 //
477 // Returning memory to the OS can hurt performance in two ways:
478 // * Parts of huge pages may be free and returning them to the OS requires
479 // breaking up the huge page they are located on. This can slow accesses to
480 // still-allocated memory due to increased TLB pressure for the working set.
481 // * If the memory is ultimately needed again, pages will need to be faulted
482 // back in.
483 static void ReleaseMemoryToSystem(size_t num_bytes);
484
485 enum class LimitKind { kSoft, kHard };
486
487 // Make a best effort attempt to prevent more than limit bytes of memory
488 // from being allocated by the system. In particular, if satisfying a given
489 // malloc call would require passing this limit, release as much memory to
490 // the OS as needed to stay under it if possible.
491 //
492 // If limit_kind == kHard, crash if returning memory is unable to get below
493 // the limit.
494 static size_t GetMemoryLimit(LimitKind limit_kind);
495 static void SetMemoryLimit(size_t limit, LimitKind limit_kind);
496
497 // Gets the sampling interval. Returns a value < 0 if unknown.
498 static int64_t GetProfileSamplingInterval();
499 // Sets the sampling interval for heap profiles. TCMalloc samples
500 // approximately every interval bytes allocated.
501 static void SetProfileSamplingInterval(int64_t interval);
502
503 // Gets the guarded sampling rate. Returns a value < 0 if unknown.
504 static int64_t GetGuardedSamplingInterval();
505 // Sets the guarded sampling interval for sampled allocations. TCMalloc
506 // samples approximately every interval bytes allocated, subject to
507 // implementation limitations in GWP-ASan.
508 //
509 // Guarded samples provide probabilistic protections against buffer underflow,
510 // overflow, and use-after-free when GWP-ASan is active (via calling
511 // ActivateGuardedSampling).
512 static void SetGuardedSamplingInterval(int64_t interval);
513
514 // The old names to get and set profile sampling intervals used "rate" to
515 // refer to intervals. Use of the below is deprecated to avoid confusion.
ABSL_DEPRECATE_AND_INLINE()516 ABSL_DEPRECATE_AND_INLINE()
517 static int64_t GetProfileSamplingRate() {
518 return GetProfileSamplingInterval();
519 }
ABSL_DEPRECATE_AND_INLINE()520 ABSL_DEPRECATE_AND_INLINE()
521 static void SetProfileSamplingRate(int64_t rate) {
522 SetProfileSamplingInterval(rate);
523 }
ABSL_DEPRECATE_AND_INLINE()524 ABSL_DEPRECATE_AND_INLINE()
525 static int64_t GetGuardedSamplingRate() {
526 return GetGuardedSamplingInterval();
527 }
ABSL_DEPRECATE_AND_INLINE()528 ABSL_DEPRECATE_AND_INLINE()
529 static void SetGuardedSamplingRate(int64_t rate) {
530 SetGuardedSamplingInterval(rate);
531 }
532
533 // Switches TCMalloc to guard sampled allocations for underflow, overflow, and
534 // use-after-free according to the guarded sample parameter value.
535 static void ActivateGuardedSampling();
536
537 // Gets whether TCMalloc is using per-CPU caches.
538 static bool PerCpuCachesActive();
539
540 // Gets the current maximum cache size per CPU cache.
541 static int32_t GetMaxPerCpuCacheSize();
542 // Sets the maximum cache size per CPU cache. This is a per-core limit.
543 static void SetMaxPerCpuCacheSize(int32_t value);
544
545 // Gets the current maximum thread cache.
546 static int64_t GetMaxTotalThreadCacheBytes();
547 // Sets the maximum thread cache size. This is a whole-process limit.
548 static void SetMaxTotalThreadCacheBytes(int64_t value);
549
550 // Enables or disables background processes.
551 static bool GetBackgroundProcessActionsEnabled();
552 static void SetBackgroundProcessActionsEnabled(bool value);
553
554 // Gets and sets background process sleep time. This controls the interval
555 // granularity at which the actions are invoked.
556 static absl::Duration GetBackgroundProcessSleepInterval();
557 static void SetBackgroundProcessSleepInterval(absl::Duration value);
558
559 // Gets and sets intervals used for finding recent demand peak, short-term
560 // demand fluctuation, and long-term demand trend. Zero duration means not
561 // considering corresponding demand history for delayed subrelease. Delayed
562 // subrelease is disabled if all intervals are zero.
563 static absl::Duration GetSkipSubreleaseInterval();
564 static void SetSkipSubreleaseInterval(absl::Duration value);
565 static absl::Duration GetSkipSubreleaseShortInterval();
566 static void SetSkipSubreleaseShortInterval(absl::Duration value);
567 static absl::Duration GetSkipSubreleaseLongInterval();
568 static void SetSkipSubreleaseLongInterval(absl::Duration value);
569
570 // Gets and sets intervals used for finding the recent short-term demand
571 // fluctuation and long-term demand trend in HugeCache. Zero duration means
572 // not considering corresponding demand history for delayed (demand-based)
573 // hugepage release. The feature is disabled if both intervals are zero.
574 static absl::Duration GetCacheDemandReleaseShortInterval();
575 static void SetCacheDemandReleaseShortInterval(absl::Duration value);
576 static absl::Duration GetCacheDemandReleaseLongInterval();
577 static void SetCacheDemandReleaseLongInterval(absl::Duration value);
578
579 // tcmalloc:google3-begin(memfs malloc)
580 // Parameters for the hugetlbfs-backed allocator
581 struct MemfsMallocConfig final {
582 // If no path is specified, the hugetlbfs allocator is inactive.
583 std::optional<std::string> path;
584
585 // Declare the ctor and dtor explicitly so that it is put in the
586 // google_malloc section.
587 MemfsMallocConfig() = default;
588 ~MemfsMallocConfig() = default;
589 };
590
591 // Gets the current parameters for the hugetlbfs allocator.
592 static MemfsMallocConfig GetMemfsMallocConfig();
593
594 // Switches TCMalloc to using the hugetlbfs allocator with the specified
595 // configuration.
596 static void ActivateMemfsMalloc(const MemfsMallocConfig& config);
597 // tcmalloc:google3-end
598
599 // Returns the estimated number of bytes that will be allocated for a request
600 // of "size" bytes. This is an estimate: an allocation of "size" bytes may
601 // reserve more bytes, but will never reserve fewer.
602 static size_t GetEstimatedAllocatedSize(size_t size);
603
604 // Returns the actual number N of bytes reserved by tcmalloc for the pointer
605 // p. This number may be equal to or greater than the number of bytes
606 // requested when p was allocated.
607 //
608 // This function is just useful for statistics collection. The client must
609 // *not* read or write from the extra bytes that are indicated by this call.
610 //
611 // Example, suppose the client gets memory by calling
612 // p = malloc(10)
613 // and GetAllocatedSize(p) returns 16. The client must only use the first 10
614 // bytes p[0..9], and not attempt to read or write p[10..15].
615 //
616 // p must have been allocated by TCMalloc and must not be an interior pointer
617 // -- that is, must be exactly the pointer returned to by malloc() et al., not
618 // some offset from that -- and should not have been freed yet. p may be
619 // null.
620 static std::optional<size_t> GetAllocatedSize(const void* p);
621
622 // Returns
623 // * kOwned if TCMalloc allocated the memory pointed to by p, or
624 // * kNotOwned if allocated elsewhere or p is null.
625 //
626 // REQUIRES: p must be a value returned from a previous call to malloc(),
627 // calloc(), realloc(), memalign(), posix_memalign(), valloc(), pvalloc(),
628 // new, or new[], and must refer to memory that is currently allocated (so,
629 // for instance, you should not pass in a pointer after having called free()
630 // on it).
631 enum class Ownership { kUnknown = 0, kOwned, kNotOwned };
632 static Ownership GetOwnership(const void* p);
633
634 // Type used by GetProperties. See comment on GetProperties.
635 struct Property {
636 size_t value;
637 };
638
639 // Returns detailed statistics about the state of TCMalloc. The map is keyed
640 // by the name of the statistic.
641 //
642 // Common across malloc implementations:
643 // generic.bytes_in_use_by_app -- Bytes currently in use by application
644 // generic.physical_memory_used -- Overall (including malloc internals)
645 // generic.virtual_memory_used -- Overall (including malloc internals)
646 //
647 // Tcmalloc specific properties
648 // tcmalloc.cpu_free -- Bytes in per-cpu free-lists
649 // tcmalloc.thread_cache_free -- Bytes in per-thread free-lists
650 // tcmalloc.transfer_cache -- Bytes in cross-thread transfer caches
651 // tcmalloc.central_cache_free -- Bytes in central cache
652 // tcmalloc.page_heap_free -- Bytes in page heap
653 // tcmalloc.page_heap_unmapped -- Bytes in page heap (no backing phys. mem)
654 // tcmalloc.metadata_bytes -- Used by internal data structures
655 // tcmalloc.thread_cache_count -- Number of thread caches in use
656 // tcmalloc.experiment.NAME -- Experiment NAME is running if 1
657 static std::map<std::string, Property> GetProperties();
658
659 static Profile SnapshotCurrent(tcmalloc::ProfileType type);
660
661 // AllocationProfilingToken tracks an active profiling session started with
662 // StartAllocationProfiling. Profiling continues until Stop() is called.
663 class AllocationProfilingToken {
664 public:
665 AllocationProfilingToken() = default;
666 AllocationProfilingToken(AllocationProfilingToken&&) = default;
667 AllocationProfilingToken(const AllocationProfilingToken&) = delete;
668 ~AllocationProfilingToken();
669
670 AllocationProfilingToken& operator=(AllocationProfilingToken&&) = default;
671 AllocationProfilingToken& operator=(const AllocationProfilingToken&) =
672 delete;
673
674 // Finish the recording started by the corresponding call to
675 // StartAllocationProfile, and return samples of calls to each function. If
676 // it is called more than once, subsequent calls will return an empty
677 // profile.
678 Profile Stop() &&;
679
680 private:
681 explicit AllocationProfilingToken(
682 std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase>);
683
684 std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> impl_;
685 friend class tcmalloc_internal::AllocationProfilingTokenAccessor;
686 };
687
688 // Start recording a sample of allocation and deallocation calls. Returns
689 // null if the implementation does not support profiling.
690 static AllocationProfilingToken StartAllocationProfiling();
691
692 // Start recording lifetimes of objects live during this profiling
693 // session. Returns null if the implementation does not support profiling.
694 static AllocationProfilingToken StartLifetimeProfiling();
695
696 // Runs housekeeping actions for the allocator off of the main allocation path
697 // of new/delete. As of 2020, this includes:
698 // * Inspecting the current CPU mask and releasing memory from inaccessible
699 // CPUs.
700 // * Releasing GetBackgroundReleaseRate() bytes per second from the page
701 // heap, if that many bytes are free, via ReleaseMemoryToSystem().
702 //
703 // When linked against TCMalloc, this method does not return.
704 static void ProcessBackgroundActions();
705
706 // Return true if ProcessBackgroundActions should be called on this platform.
707 // Not all platforms need/support background actions. As of 2021 this
708 // includes Apple and Emscripten.
709 static bool NeedsProcessBackgroundActions();
710
711 // Specifies a rate in bytes per second.
712 //
713 // The enum is used to provide strong-typing for the value.
714 enum class BytesPerSecond : size_t {};
715
716 // Gets the current release rate (in bytes per second) from the page heap.
717 // Zero inhibits the release path.
718 static BytesPerSecond GetBackgroundReleaseRate();
719 // Specifies the release rate from the page heap. ProcessBackgroundActions
720 // must be called for this to be operative.
721 static void SetBackgroundReleaseRate(BytesPerSecond rate);
722 };
723
724 // tcmalloc:google3-begin(coroner)
725 class MallocExtensionAsanWriter {
726 public:
~MallocExtensionAsanWriter()727 virtual ~MallocExtensionAsanWriter() {}
728
729 // Argument for WriteErrorType. Should match error values tracked by
730 // GuardedPageAllocator::ErrorType.
731 enum class ErrorType {
732 kUseAfterFree,
733 kUseAfterFreeRead,
734 kUseAfterFreeWrite,
735 kBufferUnderflow,
736 kBufferUnderflowRead,
737 kBufferUnderflowWrite,
738 kBufferOverflow,
739 kBufferOverflowRead,
740 kBufferOverflowWrite,
741 kDoubleFree,
742 kBufferOverflowOnDealloc,
743 kSizedDeleteMismatch,
744 kUnknown,
745 };
746
747 // Record that an ASAN error happened with the provided error.
748 virtual void WriteErrorType(ErrorType error_type) = 0;
749
750 // Record an ASAN debug string with additional information about the error.
751 virtual void WriteDebugString(absl::string_view message) = 0;
752
753 enum class StackType {
754 // Stack corresponds to the location of the ASAN allocation.
755 kAllocation = 0,
756 // Stack corresponds to the location of the ASAN deallocation.
757 kDeallocation,
758 };
759
760 // Store the provided stack for the ASAN error.
761 virtual void WriteStack(StackType type, void* const stack[], int depth) = 0;
762 };
763
764 // If the provided signal corresponds to an ASAN error, write ASAN crash
765 // metadata to the provided writer. `ptr` is the address of the faulting
766 // instruction from the caught signal. May be unimplemented for some variants of
767 // malloc.
768 extern "C" void MallocExtension_ExtractGWPAsan(
769 const void* ptr, const void* context, MallocExtensionAsanWriter* writer);
770 // tcmalloc:google3-end
771
772 } // namespace tcmalloc
773
774 // The nallocx function allocates no memory, but it performs the same size
775 // computation as the malloc function, and returns the real size of the
776 // allocation that would result from the equivalent malloc function call.
777 // Default weak implementation returns size unchanged, but tcmalloc overrides it
778 // and returns rounded up size. See the following link for details:
779 // http://www.unix.com/man-page/freebsd/3/nallocx/
780 // NOTE: prefer using tcmalloc_size_returning_operator_new over nallocx.
781 // tcmalloc_size_returning_operator_new is more efficienct and provides tcmalloc
782 // with better telemetry.
783 extern "C" size_t nallocx(size_t size, int flags) noexcept;
784
785 // The sdallocx function deallocates memory allocated by malloc or memalign. It
786 // takes a size parameter to pass the original allocation size.
787 //
788 // The default weak implementation calls free(), but TCMalloc overrides it and
789 // uses the size to improve deallocation performance.
790 extern "C" void sdallocx(void* ptr, size_t size, int flags) noexcept;
791
792 #if !defined(__STDC_VERSION_STDLIB_H__) || __STDC_VERSION_STDLIB_H__ < 202311L
793 // Frees ptr allocated with malloc(size) introduced in C23.
794 extern "C" void free_sized(void* ptr, size_t size);
795
796 // Frees ptr allocated with aligned_alloc/posix_memalign with the specified size
797 // and alignment introduced in C23.
798 extern "C" void free_aligned_sized(void* ptr, size_t alignment, size_t size);
799 #endif
800
801 // Define __sized_ptr_t in the global namespace so that it can be named by the
802 // __size_returning_new implementations defined in tcmalloc.cc.
803 // tcmalloc:google3-begin(b/283854704)
804 #if defined(__has_include) && __has_include(<__google3/new.h>)
805
806 // Use the __sized_ptr_t defined in __google3/new.h extension.
807 using __sized_ptr_t = std::__google3::__sized_ptr_t;
808
809 #else // __has_include(<__google3/new.h>)
810
811 struct __sized_ptr_t {
812 void* p;
813 size_t n;
814 };
815
816 #endif // __has_include(<__google3/new.h>)
817
818 // tcmalloc:oss-replace-begin
819 // struct __sized_ptr_t {
820 // void* p;
821 // size_t n;
822 // };
823 // tcmalloc:oss-replace-end
824
825 namespace tcmalloc {
826 // sized_ptr_t constains pointer / capacity information as returned
827 // by `tcmalloc_size_returning_operator_new()`.
828 // See `tcmalloc_size_returning_operator_new()` for more information.
829 using sized_ptr_t = __sized_ptr_t;
830 } // namespace tcmalloc
831
832 // Allocates memory of at least the requested size.
833 //
834 // Returns a `sized_ptr_t` struct holding the allocated pointer, and the
835 // capacity of the allocated memory, which may be larger than the requested
836 // size.
837 //
838 // The returned pointer follows the alignment requirements of the standard new
839 // operator. This function will terminate on failure, except for the APIs
840 // accepting the std::nothrow parameter which will return {nullptr, 0} on
841 // failure.
842 //
843 // The returned pointer must be freed calling the matching ::operator delete.
844 //
845 // If a sized operator delete operator is invoked, then the 'size' parameter
846 // passed to delete must be greater or equal to the original requested size, and
847 // less than or equal to the capacity of the allocated memory as returned by the
848 // `tcmalloc_size_returning_operator_new` method.
849 //
850 // If neither the original size or capacity is known, then the non-sized
851 // operator delete can be invoked, however, this should be avoided, as this is
852 // substantially less efficient.
853 //
854 // The default weak implementation allocates the memory using the corresponding
855 // (matching) ::operator new(size_t, ...).
856 //
857 // This is a prototype API for the extension to C++ "size feedback in operator
858 // new" proposal:
859 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html
860 extern "C" {
861 // The following declarations provide an alternative spelling which should be
862 // used so that the compiler can identify these as allocator functions.
863 __sized_ptr_t __size_returning_new(size_t size);
864 __sized_ptr_t __size_returning_new_hot_cold(size_t, __hot_cold_t);
865 __sized_ptr_t __size_returning_new_aligned(size_t, std::align_val_t);
866 __sized_ptr_t __size_returning_new_aligned_hot_cold(size_t, std::align_val_t,
867 __hot_cold_t);
868
869 // tcmalloc:google3-begin(b/283856455)
870 // Experiment for using __size_returning_new in libc++.
871 __sized_ptr_t __size_returning_new_experiment(size_t size);
872 __sized_ptr_t __size_returning_new_aligned_experiment(size_t, std::align_val_t);
873 // tcmalloc:google3-end
874
ABSL_DEPRECATE_AND_INLINE()875 ABSL_DEPRECATE_AND_INLINE()
876 inline __sized_ptr_t tcmalloc_size_returning_operator_new(size_t size) {
877 return __size_returning_new(size);
878 }
879 __sized_ptr_t tcmalloc_size_returning_operator_new_nothrow(
880 size_t size) noexcept;
ABSL_DEPRECATE_AND_INLINE()881 ABSL_DEPRECATE_AND_INLINE()
882 inline __sized_ptr_t tcmalloc_size_returning_operator_new_hot_cold(
883 size_t size, tcmalloc::hot_cold_t hot_cold) {
884 return __size_returning_new_hot_cold(size, hot_cold);
885 }
886 __sized_ptr_t tcmalloc_size_returning_operator_new_hot_cold_nothrow(
887 size_t size, tcmalloc::hot_cold_t hot_cold) noexcept;
888
889 #if defined(__cpp_aligned_new)
890
891 // Identical to `tcmalloc_size_returning_operator_new` except that the returned
892 // memory is aligned according to the `alignment` argument.
ABSL_DEPRECATE_AND_INLINE()893 ABSL_DEPRECATE_AND_INLINE()
894 inline __sized_ptr_t tcmalloc_size_returning_operator_new_aligned(
895 size_t size, std::align_val_t alignment) {
896 return __size_returning_new_aligned(size, alignment);
897 }
898 __sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow(
899 size_t size, std::align_val_t alignment) noexcept;
ABSL_DEPRECATE_AND_INLINE()900 ABSL_DEPRECATE_AND_INLINE()
901 inline __sized_ptr_t tcmalloc_size_returning_operator_new_aligned_hot_cold(
902 size_t size, std::align_val_t alignment, tcmalloc::hot_cold_t hot_cold) {
903 return __size_returning_new_aligned_hot_cold(size, alignment, hot_cold);
904 }
905 __sized_ptr_t tcmalloc_size_returning_operator_new_aligned_hot_cold_nothrow(
906 size_t size, std::align_val_t alignment,
907 tcmalloc::hot_cold_t hot_cold) noexcept;
908
909 #endif // __cpp_aligned_new
910
911 } // extern "C"
912
913 void* operator new(size_t size, tcmalloc::hot_cold_t hot_cold) noexcept(false);
914 void* operator new(size_t size, const std::nothrow_t&,
915 tcmalloc::hot_cold_t hot_cold) noexcept;
916 void* operator new[](size_t size,
917 tcmalloc::hot_cold_t hot_cold) noexcept(false);
918 void* operator new[](size_t size, const std::nothrow_t&,
919 tcmalloc::hot_cold_t hot_cold) noexcept;
920
921 #ifdef __cpp_aligned_new
922 void* operator new(size_t size, std::align_val_t alignment,
923 tcmalloc::hot_cold_t hot_cold) noexcept(false);
924 void* operator new(size_t size, std::align_val_t alignment,
925 const std::nothrow_t&,
926 tcmalloc::hot_cold_t hot_cold) noexcept;
927 void* operator new[](size_t size, std::align_val_t alignment,
928 tcmalloc::hot_cold_t hot_cold) noexcept(false);
929 void* operator new[](size_t size, std::align_val_t alignment,
930 const std::nothrow_t&,
931 tcmalloc::hot_cold_t hot_cold) noexcept;
932 #endif // __cpp_aligned_new
933
934 #ifndef MALLOCX_LG_ALIGN
935 #define MALLOCX_LG_ALIGN(la) (la)
936 #endif
937
938 namespace tcmalloc {
939 namespace tcmalloc_internal {
940
941 // AllocationProfilingTokenBase tracks an on-going profiling session of sampled
942 // allocations. The session ends when Stop() is called.
943 //
944 // This decouples the implementation details (of TCMalloc) from the interface,
945 // allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
946 // while allowing the library to compile and link.
947 class AllocationProfilingTokenBase {
948 public:
949 // Explicitly declare the ctor to put it in the google_malloc section.
950 AllocationProfilingTokenBase() = default;
951
952 virtual ~AllocationProfilingTokenBase() = default;
953
954 // Finish recording started during construction of this object.
955 //
956 // After the first call, Stop() will return an empty profile.
957 virtual Profile Stop() && = 0;
958 };
959
960 // ProfileBase contains a profile of allocations.
961 //
962 // This decouples the implementation details (of TCMalloc) from the interface,
963 // allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
964 // while allowing the library to compile and link.
965 class ProfileBase {
966 public:
967 virtual ~ProfileBase() = default;
968
969 // For each sample in the profile, Iterate invokes the callback f on the
970 // sample.
971 virtual void Iterate(
972 absl::FunctionRef<void(const Profile::Sample&)> f) const = 0;
973
974 // The type of profile (live objects, allocated, etc.).
975 virtual ProfileType Type() const = 0;
976
977 virtual std::optional<absl::Time> StartTime() const = 0;
978
979 // The duration the profile was collected for. For instantaneous profiles
980 // (heap, peakheap, etc.), this returns absl::ZeroDuration().
981 virtual absl::Duration Duration() const = 0;
982 };
983
984 enum class MadvisePreference {
985 kNever = 0x0,
986 kDontNeed = 0x1,
987 kFreeAndDontNeed = 0x3,
988 kFreeOnly = 0x2,
989 };
990
AbslParseFlag(absl::string_view text,MadvisePreference * preference,std::string *)991 inline bool AbslParseFlag(absl::string_view text, MadvisePreference* preference,
992 std::string* /* error */) {
993 if (text == "NEVER") {
994 *preference = MadvisePreference::kNever;
995 return true;
996 } else if (text == "DONTNEED") {
997 *preference = MadvisePreference::kDontNeed;
998 return true;
999 } else if (text == "FREE_AND_DONTNEED") {
1000 *preference = MadvisePreference::kFreeAndDontNeed;
1001 return true;
1002 } else if (text == "FREE_ONLY") {
1003 *preference = MadvisePreference::kFreeOnly;
1004 return true;
1005 } else {
1006 return false;
1007 }
1008 }
1009
AbslUnparseFlag(MadvisePreference preference)1010 inline std::string AbslUnparseFlag(MadvisePreference preference) {
1011 switch (preference) {
1012 case MadvisePreference::kNever:
1013 return "NEVER";
1014 case MadvisePreference::kDontNeed:
1015 return "DONTNEED";
1016 case MadvisePreference::kFreeAndDontNeed:
1017 return "FREE_AND_DONTNEED";
1018 case MadvisePreference::kFreeOnly:
1019 return "FREE_ONLY";
1020 }
1021
1022 ABSL_UNREACHABLE();
1023 return "";
1024 }
1025
1026 } // namespace tcmalloc_internal
1027 } // namespace tcmalloc
1028
1029 #endif // THIRD_PARTY_TCMALLOC_MALLOC_EXTENSION_H_