1 //===-- memprof_allocator.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler, a memory profiler.
10 //
11 // Implementation of MemProf's memory allocator, which uses the allocator
12 // from sanitizer_common.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "memprof_allocator.h"
17 #include "memprof_mapping.h"
18 #include "memprof_stack.h"
19 #include "memprof_thread.h"
20 #include "sanitizer_common/sanitizer_allocator_checks.h"
21 #include "sanitizer_common/sanitizer_allocator_interface.h"
22 #include "sanitizer_common/sanitizer_allocator_report.h"
23 #include "sanitizer_common/sanitizer_errno.h"
24 #include "sanitizer_common/sanitizer_file.h"
25 #include "sanitizer_common/sanitizer_flags.h"
26 #include "sanitizer_common/sanitizer_internal_defs.h"
27 #include "sanitizer_common/sanitizer_list.h"
28 #include "sanitizer_common/sanitizer_stackdepot.h"
29
30 #include <sched.h>
31 #include <stdlib.h>
32 #include <time.h>
33
34 namespace __memprof {
35
GetCpuId(void)36 static int GetCpuId(void) {
37 // _memprof_preinit is called via the preinit_array, which subsequently calls
38 // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
39 // will seg fault as the address of __vdso_getcpu will be null.
40 if (!memprof_init_done)
41 return -1;
42 return sched_getcpu();
43 }
44
45 // Compute the timestamp in ms.
GetTimestamp(void)46 static int GetTimestamp(void) {
47 // timespec_get will segfault if called from dl_init
48 if (!memprof_timestamp_inited) {
49 // By returning 0, this will be effectively treated as being
50 // timestamped at memprof init time (when memprof_init_timestamp_s
51 // is initialized).
52 return 0;
53 }
54 timespec ts;
55 clock_gettime(CLOCK_REALTIME, &ts);
56 return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000;
57 }
58
59 static MemprofAllocator &get_allocator();
60
61 // The memory chunk allocated from the underlying allocator looks like this:
62 // H H U U U U U U
63 // H -- ChunkHeader (32 bytes)
64 // U -- user memory.
65
66 // If there is left padding before the ChunkHeader (due to use of memalign),
67 // we store a magic value in the first uptr word of the memory block and
68 // store the address of ChunkHeader in the next uptr.
69 // M B L L L L L L L L L H H U U U U U U
70 // | ^
71 // ---------------------|
72 // M -- magic value kAllocBegMagic
73 // B -- address of ChunkHeader pointing to the first 'H'
74
75 constexpr uptr kMaxAllowedMallocBits = 40;
76
77 // Should be no more than 32-bytes
78 struct ChunkHeader {
79 // 1-st 4 bytes.
80 u32 alloc_context_id;
81 // 2-nd 4 bytes
82 u32 cpu_id;
83 // 3-rd 4 bytes
84 u32 timestamp_ms;
85 // 4-th 4 bytes
86 // Note only 1 bit is needed for this flag if we need space in the future for
87 // more fields.
88 u32 from_memalign;
89 // 5-th and 6-th 4 bytes
90 // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this
91 // could be shrunk to kMaxAllowedMallocBits if we need space in the future for
92 // more fields.
93 atomic_uint64_t user_requested_size;
94 // 23 bits available
95 // 7-th and 8-th 4 bytes
96 u64 data_type_id; // TODO: hash of type name
97 };
98
99 static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
100 COMPILER_CHECK(kChunkHeaderSize == 32);
101
102 struct MemprofChunk : ChunkHeader {
Beg__memprof::MemprofChunk103 uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
UsedSize__memprof::MemprofChunk104 uptr UsedSize() {
105 return atomic_load(&user_requested_size, memory_order_relaxed);
106 }
AllocBeg__memprof::MemprofChunk107 void *AllocBeg() {
108 if (from_memalign)
109 return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
110 return reinterpret_cast<void *>(this);
111 }
112 };
113
114 class LargeChunkHeader {
115 static constexpr uptr kAllocBegMagic =
116 FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL);
117 atomic_uintptr_t magic;
118 MemprofChunk *chunk_header;
119
120 public:
Get() const121 MemprofChunk *Get() const {
122 return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic
123 ? chunk_header
124 : nullptr;
125 }
126
Set(MemprofChunk * p)127 void Set(MemprofChunk *p) {
128 if (p) {
129 chunk_header = p;
130 atomic_store(&magic, kAllocBegMagic, memory_order_release);
131 return;
132 }
133
134 uptr old = kAllocBegMagic;
135 if (!atomic_compare_exchange_strong(&magic, &old, 0,
136 memory_order_release)) {
137 CHECK_EQ(old, kAllocBegMagic);
138 }
139 }
140 };
141
FlushUnneededMemProfShadowMemory(uptr p,uptr size)142 void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
143 // Since memprof's mapping is compacting, the shadow chunk may be
144 // not page-aligned, so we only flush the page-aligned portion.
145 ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
146 }
147
OnMap(uptr p,uptr size) const148 void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
149 // Statistics.
150 MemprofStats &thread_stats = GetCurrentThreadStats();
151 thread_stats.mmaps++;
152 thread_stats.mmaped += size;
153 }
OnUnmap(uptr p,uptr size) const154 void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
155 // We are about to unmap a chunk of user memory.
156 // Mark the corresponding shadow memory as not needed.
157 FlushUnneededMemProfShadowMemory(p, size);
158 // Statistics.
159 MemprofStats &thread_stats = GetCurrentThreadStats();
160 thread_stats.munmaps++;
161 thread_stats.munmaped += size;
162 }
163
GetAllocatorCache(MemprofThreadLocalMallocStorage * ms)164 AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
165 CHECK(ms);
166 return &ms->allocator_cache;
167 }
168
169 struct MemInfoBlock {
170 u32 alloc_count;
171 u64 total_access_count, min_access_count, max_access_count;
172 u64 total_size;
173 u32 min_size, max_size;
174 u32 alloc_timestamp, dealloc_timestamp;
175 u64 total_lifetime;
176 u32 min_lifetime, max_lifetime;
177 u32 alloc_cpu_id, dealloc_cpu_id;
178 u32 num_migrated_cpu;
179
180 // Only compared to prior deallocated object currently.
181 u32 num_lifetime_overlaps;
182 u32 num_same_alloc_cpu;
183 u32 num_same_dealloc_cpu;
184
185 u64 data_type_id; // TODO: hash of type name
186
MemInfoBlock__memprof::MemInfoBlock187 MemInfoBlock() : alloc_count(0) {}
188
MemInfoBlock__memprof::MemInfoBlock189 MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
190 u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
191 : alloc_count(1), total_access_count(access_count),
192 min_access_count(access_count), max_access_count(access_count),
193 total_size(size), min_size(size), max_size(size),
194 alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
195 total_lifetime(dealloc_timestamp - alloc_timestamp),
196 min_lifetime(total_lifetime), max_lifetime(total_lifetime),
197 alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
198 num_lifetime_overlaps(0), num_same_alloc_cpu(0),
199 num_same_dealloc_cpu(0) {
200 num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
201 }
202
Print__memprof::MemInfoBlock203 void Print(u64 id) {
204 u64 p;
205 if (flags()->print_terse) {
206 p = total_size * 100 / alloc_count;
207 Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100,
208 min_size, max_size);
209 p = total_access_count * 100 / alloc_count;
210 Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count,
211 max_access_count);
212 p = total_lifetime * 100 / alloc_count;
213 Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime);
214 Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
215 num_same_alloc_cpu, num_same_dealloc_cpu);
216 } else {
217 p = total_size * 100 / alloc_count;
218 Printf("Memory allocation stack id = %llu\n", id);
219 Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n",
220 alloc_count, p / 100, p % 100, min_size, max_size);
221 p = total_access_count * 100 / alloc_count;
222 Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100,
223 p % 100, min_access_count, max_access_count);
224 p = total_lifetime * 100 / alloc_count;
225 Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100,
226 min_lifetime, max_lifetime);
227 Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
228 "cpu: %u, num same dealloc_cpu: %u\n",
229 num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
230 num_same_dealloc_cpu);
231 }
232 }
233
printHeader__memprof::MemInfoBlock234 static void printHeader() {
235 CHECK(flags()->print_terse);
236 Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
237 "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
238 "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
239 "NumSameDeallocCpu\n");
240 }
241
Merge__memprof::MemInfoBlock242 void Merge(MemInfoBlock &newMIB) {
243 alloc_count += newMIB.alloc_count;
244
245 total_access_count += newMIB.total_access_count;
246 min_access_count = Min(min_access_count, newMIB.min_access_count);
247 max_access_count = Max(max_access_count, newMIB.max_access_count);
248
249 total_size += newMIB.total_size;
250 min_size = Min(min_size, newMIB.min_size);
251 max_size = Max(max_size, newMIB.max_size);
252
253 total_lifetime += newMIB.total_lifetime;
254 min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
255 max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
256
257 // We know newMIB was deallocated later, so just need to check if it was
258 // allocated before last one deallocated.
259 num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
260 alloc_timestamp = newMIB.alloc_timestamp;
261 dealloc_timestamp = newMIB.dealloc_timestamp;
262
263 num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
264 num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
265 alloc_cpu_id = newMIB.alloc_cpu_id;
266 dealloc_cpu_id = newMIB.dealloc_cpu_id;
267 }
268 };
269
270 static u32 AccessCount = 0;
271 static u32 MissCount = 0;
272
273 struct SetEntry {
SetEntry__memprof::SetEntry274 SetEntry() : id(0), MIB() {}
Empty__memprof::SetEntry275 bool Empty() { return id == 0; }
Print__memprof::SetEntry276 void Print() {
277 CHECK(!Empty());
278 MIB.Print(id);
279 }
280 // The stack id
281 u64 id;
282 MemInfoBlock MIB;
283 };
284
285 struct CacheSet {
286 enum { kSetSize = 4 };
287
PrintAll__memprof::CacheSet288 void PrintAll() {
289 for (int i = 0; i < kSetSize; i++) {
290 if (Entries[i].Empty())
291 continue;
292 Entries[i].Print();
293 }
294 }
insertOrMerge__memprof::CacheSet295 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
296 AccessCount++;
297 SetAccessCount++;
298
299 for (int i = 0; i < kSetSize; i++) {
300 auto id = Entries[i].id;
301 // Check if this is a hit or an empty entry. Since we always move any
302 // filled locations to the front of the array (see below), we don't need
303 // to look after finding the first empty entry.
304 if (id == new_id || !id) {
305 if (id == 0) {
306 Entries[i].id = new_id;
307 Entries[i].MIB = newMIB;
308 } else {
309 Entries[i].MIB.Merge(newMIB);
310 }
311 // Assuming some id locality, we try to swap the matching entry
312 // into the first set position.
313 if (i != 0) {
314 auto tmp = Entries[0];
315 Entries[0] = Entries[i];
316 Entries[i] = tmp;
317 }
318 return;
319 }
320 }
321
322 // Miss
323 MissCount++;
324 SetMissCount++;
325
326 // We try to find the entries with the lowest alloc count to be evicted:
327 int min_idx = 0;
328 u64 min_count = Entries[0].MIB.alloc_count;
329 for (int i = 1; i < kSetSize; i++) {
330 CHECK(!Entries[i].Empty());
331 if (Entries[i].MIB.alloc_count < min_count) {
332 min_idx = i;
333 min_count = Entries[i].MIB.alloc_count;
334 }
335 }
336
337 // Print the evicted entry profile information
338 if (!flags()->print_terse)
339 Printf("Evicted:\n");
340 Entries[min_idx].Print();
341
342 // Similar to the hit case, put new MIB in first set position.
343 if (min_idx != 0)
344 Entries[min_idx] = Entries[0];
345 Entries[0].id = new_id;
346 Entries[0].MIB = newMIB;
347 }
348
PrintMissRate__memprof::CacheSet349 void PrintMissRate(int i) {
350 u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0;
351 Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount,
352 SetAccessCount, p / 100, p % 100);
353 }
354
355 SetEntry Entries[kSetSize];
356 u32 SetAccessCount = 0;
357 u32 SetMissCount = 0;
358 };
359
360 struct MemInfoBlockCache {
MemInfoBlockCache__memprof::MemInfoBlockCache361 MemInfoBlockCache() {
362 if (common_flags()->print_module_map)
363 DumpProcessMap();
364 if (flags()->print_terse)
365 MemInfoBlock::printHeader();
366 Sets =
367 (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
368 Constructed = true;
369 }
370
~MemInfoBlockCache__memprof::MemInfoBlockCache371 ~MemInfoBlockCache() { free(Sets); }
372
insertOrMerge__memprof::MemInfoBlockCache373 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
374 u64 hv = new_id;
375
376 // Use mod method where number of entries should be a prime close to power
377 // of 2.
378 hv %= flags()->mem_info_cache_entries;
379
380 return Sets[hv].insertOrMerge(new_id, newMIB);
381 }
382
PrintAll__memprof::MemInfoBlockCache383 void PrintAll() {
384 for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
385 Sets[i].PrintAll();
386 }
387 }
388
PrintMissRate__memprof::MemInfoBlockCache389 void PrintMissRate() {
390 if (!flags()->print_mem_info_cache_miss_rate)
391 return;
392 u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
393 Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount,
394 p / 100, p % 100);
395 if (flags()->print_mem_info_cache_miss_rate_details)
396 for (int i = 0; i < flags()->mem_info_cache_entries; i++)
397 Sets[i].PrintMissRate(i);
398 }
399
400 CacheSet *Sets;
401 // Flag when the Sets have been allocated, in case a deallocation is called
402 // very early before the static init of the Allocator and therefore this table
403 // have completed.
404 bool Constructed = false;
405 };
406
407 // Accumulates the access count from the shadow for the given pointer and size.
GetShadowCount(uptr p,u32 size)408 u64 GetShadowCount(uptr p, u32 size) {
409 u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
410 u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
411 u64 count = 0;
412 for (; shadow <= shadow_end; shadow++)
413 count += *shadow;
414 return count;
415 }
416
417 // Clears the shadow counters (when memory is allocated).
ClearShadow(uptr addr,uptr size)418 void ClearShadow(uptr addr, uptr size) {
419 CHECK(AddrIsAlignedByGranularity(addr));
420 CHECK(AddrIsInMem(addr));
421 CHECK(AddrIsAlignedByGranularity(addr + size));
422 CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
423 CHECK(REAL(memset));
424 uptr shadow_beg = MEM_TO_SHADOW(addr);
425 uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
426 if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
427 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
428 } else {
429 uptr page_size = GetPageSizeCached();
430 uptr page_beg = RoundUpTo(shadow_beg, page_size);
431 uptr page_end = RoundDownTo(shadow_end, page_size);
432
433 if (page_beg >= page_end) {
434 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
435 } else {
436 if (page_beg != shadow_beg) {
437 REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
438 }
439 if (page_end != shadow_end) {
440 REAL(memset)((void *)page_end, 0, shadow_end - page_end);
441 }
442 ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
443 }
444 }
445 }
446
447 struct Allocator {
448 static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
449
450 MemprofAllocator allocator;
451 StaticSpinMutex fallback_mutex;
452 AllocatorCache fallback_allocator_cache;
453
454 uptr max_user_defined_malloc_size;
455 atomic_uint8_t rss_limit_exceeded;
456
457 MemInfoBlockCache MemInfoBlockTable;
458 bool destructing;
459
460 // ------------------- Initialization ------------------------
Allocator__memprof::Allocator461 explicit Allocator(LinkerInitialized) : destructing(false) {}
462
~Allocator__memprof::Allocator463 ~Allocator() { FinishAndPrint(); }
464
FinishAndPrint__memprof::Allocator465 void FinishAndPrint() {
466 if (!flags()->print_terse)
467 Printf("Live on exit:\n");
468 allocator.ForceLock();
469 allocator.ForEachChunk(
470 [](uptr chunk, void *alloc) {
471 u64 user_requested_size;
472 MemprofChunk *m =
473 ((Allocator *)alloc)
474 ->GetMemprofChunk((void *)chunk, user_requested_size);
475 if (!m)
476 return;
477 uptr user_beg = ((uptr)m) + kChunkHeaderSize;
478 u64 c = GetShadowCount(user_beg, user_requested_size);
479 long curtime = GetTimestamp();
480 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
481 m->cpu_id, GetCpuId());
482 ((Allocator *)alloc)
483 ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
484 },
485 this);
486 allocator.ForceUnlock();
487
488 destructing = true;
489 MemInfoBlockTable.PrintMissRate();
490 MemInfoBlockTable.PrintAll();
491 StackDepotPrintAll();
492 }
493
InitLinkerInitialized__memprof::Allocator494 void InitLinkerInitialized() {
495 SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
496 allocator.InitLinkerInitialized(
497 common_flags()->allocator_release_to_os_interval_ms);
498 max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
499 ? common_flags()->max_allocation_size_mb
500 << 20
501 : kMaxAllowedMallocSize;
502 }
503
RssLimitExceeded__memprof::Allocator504 bool RssLimitExceeded() {
505 return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
506 }
507
SetRssLimitExceeded__memprof::Allocator508 void SetRssLimitExceeded(bool limit_exceeded) {
509 atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
510 }
511
512 // -------------------- Allocation/Deallocation routines ---------------
Allocate__memprof::Allocator513 void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
514 AllocType alloc_type) {
515 if (UNLIKELY(!memprof_inited))
516 MemprofInitFromRtl();
517 if (RssLimitExceeded()) {
518 if (AllocatorMayReturnNull())
519 return nullptr;
520 ReportRssLimitExceeded(stack);
521 }
522 CHECK(stack);
523 const uptr min_alignment = MEMPROF_ALIGNMENT;
524 if (alignment < min_alignment)
525 alignment = min_alignment;
526 if (size == 0) {
527 // We'd be happy to avoid allocating memory for zero-size requests, but
528 // some programs/tests depend on this behavior and assume that malloc
529 // would not return NULL even for zero-size allocations. Moreover, it
530 // looks like operator new should never return NULL, and results of
531 // consecutive "new" calls must be different even if the allocated size
532 // is zero.
533 size = 1;
534 }
535 CHECK(IsPowerOfTwo(alignment));
536 uptr rounded_size = RoundUpTo(size, alignment);
537 uptr needed_size = rounded_size + kChunkHeaderSize;
538 if (alignment > min_alignment)
539 needed_size += alignment;
540 CHECK(IsAligned(needed_size, min_alignment));
541 if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
542 size > max_user_defined_malloc_size) {
543 if (AllocatorMayReturnNull()) {
544 Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n",
545 (void *)size);
546 return nullptr;
547 }
548 uptr malloc_limit =
549 Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
550 ReportAllocationSizeTooBig(size, malloc_limit, stack);
551 }
552
553 MemprofThread *t = GetCurrentThread();
554 void *allocated;
555 if (t) {
556 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
557 allocated = allocator.Allocate(cache, needed_size, 8);
558 } else {
559 SpinMutexLock l(&fallback_mutex);
560 AllocatorCache *cache = &fallback_allocator_cache;
561 allocated = allocator.Allocate(cache, needed_size, 8);
562 }
563 if (UNLIKELY(!allocated)) {
564 SetAllocatorOutOfMemory();
565 if (AllocatorMayReturnNull())
566 return nullptr;
567 ReportOutOfMemory(size, stack);
568 }
569
570 uptr alloc_beg = reinterpret_cast<uptr>(allocated);
571 uptr alloc_end = alloc_beg + needed_size;
572 uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
573 uptr user_beg = beg_plus_header;
574 if (!IsAligned(user_beg, alignment))
575 user_beg = RoundUpTo(user_beg, alignment);
576 uptr user_end = user_beg + size;
577 CHECK_LE(user_end, alloc_end);
578 uptr chunk_beg = user_beg - kChunkHeaderSize;
579 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
580 m->from_memalign = alloc_beg != chunk_beg;
581 CHECK(size);
582
583 m->cpu_id = GetCpuId();
584 m->timestamp_ms = GetTimestamp();
585 m->alloc_context_id = StackDepotPut(*stack);
586
587 uptr size_rounded_down_to_granularity =
588 RoundDownTo(size, SHADOW_GRANULARITY);
589 if (size_rounded_down_to_granularity)
590 ClearShadow(user_beg, size_rounded_down_to_granularity);
591
592 MemprofStats &thread_stats = GetCurrentThreadStats();
593 thread_stats.mallocs++;
594 thread_stats.malloced += size;
595 thread_stats.malloced_overhead += needed_size - size;
596 if (needed_size > SizeClassMap::kMaxSize)
597 thread_stats.malloc_large++;
598 else
599 thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
600
601 void *res = reinterpret_cast<void *>(user_beg);
602 atomic_store(&m->user_requested_size, size, memory_order_release);
603 if (alloc_beg != chunk_beg) {
604 CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg);
605 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m);
606 }
607 MEMPROF_MALLOC_HOOK(res, size);
608 return res;
609 }
610
Deallocate__memprof::Allocator611 void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
612 BufferedStackTrace *stack, AllocType alloc_type) {
613 uptr p = reinterpret_cast<uptr>(ptr);
614 if (p == 0)
615 return;
616
617 MEMPROF_FREE_HOOK(ptr);
618
619 uptr chunk_beg = p - kChunkHeaderSize;
620 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
621
622 u64 user_requested_size =
623 atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
624 if (memprof_inited && memprof_init_done && !destructing &&
625 MemInfoBlockTable.Constructed) {
626 u64 c = GetShadowCount(p, user_requested_size);
627 long curtime = GetTimestamp();
628
629 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
630 m->cpu_id, GetCpuId());
631 {
632 SpinMutexLock l(&fallback_mutex);
633 MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
634 }
635 }
636
637 MemprofStats &thread_stats = GetCurrentThreadStats();
638 thread_stats.frees++;
639 thread_stats.freed += user_requested_size;
640
641 void *alloc_beg = m->AllocBeg();
642 if (alloc_beg != m) {
643 // Clear the magic value, as allocator internals may overwrite the
644 // contents of deallocated chunk, confusing GetMemprofChunk lookup.
645 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr);
646 }
647
648 MemprofThread *t = GetCurrentThread();
649 if (t) {
650 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
651 allocator.Deallocate(cache, alloc_beg);
652 } else {
653 SpinMutexLock l(&fallback_mutex);
654 AllocatorCache *cache = &fallback_allocator_cache;
655 allocator.Deallocate(cache, alloc_beg);
656 }
657 }
658
Reallocate__memprof::Allocator659 void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
660 CHECK(old_ptr && new_size);
661 uptr p = reinterpret_cast<uptr>(old_ptr);
662 uptr chunk_beg = p - kChunkHeaderSize;
663 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
664
665 MemprofStats &thread_stats = GetCurrentThreadStats();
666 thread_stats.reallocs++;
667 thread_stats.realloced += new_size;
668
669 void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
670 if (new_ptr) {
671 CHECK_NE(REAL(memcpy), nullptr);
672 uptr memcpy_size = Min(new_size, m->UsedSize());
673 REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
674 Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
675 }
676 return new_ptr;
677 }
678
Calloc__memprof::Allocator679 void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
680 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
681 if (AllocatorMayReturnNull())
682 return nullptr;
683 ReportCallocOverflow(nmemb, size, stack);
684 }
685 void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
686 // If the memory comes from the secondary allocator no need to clear it
687 // as it comes directly from mmap.
688 if (ptr && allocator.FromPrimary(ptr))
689 REAL(memset)(ptr, 0, nmemb * size);
690 return ptr;
691 }
692
CommitBack__memprof::Allocator693 void CommitBack(MemprofThreadLocalMallocStorage *ms,
694 BufferedStackTrace *stack) {
695 AllocatorCache *ac = GetAllocatorCache(ms);
696 allocator.SwallowCache(ac);
697 }
698
699 // -------------------------- Chunk lookup ----------------------
700
701 // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
GetMemprofChunk__memprof::Allocator702 MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) {
703 if (!alloc_beg)
704 return nullptr;
705 MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get();
706 if (!p) {
707 if (!allocator.FromPrimary(alloc_beg))
708 return nullptr;
709 p = reinterpret_cast<MemprofChunk *>(alloc_beg);
710 }
711 // The size is reset to 0 on deallocation (and a min of 1 on
712 // allocation).
713 user_requested_size =
714 atomic_load(&p->user_requested_size, memory_order_acquire);
715 if (user_requested_size)
716 return p;
717 return nullptr;
718 }
719
GetMemprofChunkByAddr__memprof::Allocator720 MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) {
721 void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
722 return GetMemprofChunk(alloc_beg, user_requested_size);
723 }
724
AllocationSize__memprof::Allocator725 uptr AllocationSize(uptr p) {
726 u64 user_requested_size;
727 MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size);
728 if (!m)
729 return 0;
730 if (m->Beg() != p)
731 return 0;
732 return user_requested_size;
733 }
734
Purge__memprof::Allocator735 void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); }
736
PrintStats__memprof::Allocator737 void PrintStats() { allocator.PrintStats(); }
738
ForceLock__memprof::Allocator739 void ForceLock() {
740 allocator.ForceLock();
741 fallback_mutex.Lock();
742 }
743
ForceUnlock__memprof::Allocator744 void ForceUnlock() {
745 fallback_mutex.Unlock();
746 allocator.ForceUnlock();
747 }
748 };
749
750 static Allocator instance(LINKER_INITIALIZED);
751
get_allocator()752 static MemprofAllocator &get_allocator() { return instance.allocator; }
753
InitializeAllocator()754 void InitializeAllocator() { instance.InitLinkerInitialized(); }
755
CommitBack()756 void MemprofThreadLocalMallocStorage::CommitBack() {
757 GET_STACK_TRACE_MALLOC;
758 instance.CommitBack(this, &stack);
759 }
760
PrintInternalAllocatorStats()761 void PrintInternalAllocatorStats() { instance.PrintStats(); }
762
memprof_free(void * ptr,BufferedStackTrace * stack,AllocType alloc_type)763 void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
764 instance.Deallocate(ptr, 0, 0, stack, alloc_type);
765 }
766
memprof_delete(void * ptr,uptr size,uptr alignment,BufferedStackTrace * stack,AllocType alloc_type)767 void memprof_delete(void *ptr, uptr size, uptr alignment,
768 BufferedStackTrace *stack, AllocType alloc_type) {
769 instance.Deallocate(ptr, size, alignment, stack, alloc_type);
770 }
771
memprof_malloc(uptr size,BufferedStackTrace * stack)772 void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
773 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
774 }
775
memprof_calloc(uptr nmemb,uptr size,BufferedStackTrace * stack)776 void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
777 return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
778 }
779
memprof_reallocarray(void * p,uptr nmemb,uptr size,BufferedStackTrace * stack)780 void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
781 BufferedStackTrace *stack) {
782 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
783 errno = errno_ENOMEM;
784 if (AllocatorMayReturnNull())
785 return nullptr;
786 ReportReallocArrayOverflow(nmemb, size, stack);
787 }
788 return memprof_realloc(p, nmemb * size, stack);
789 }
790
memprof_realloc(void * p,uptr size,BufferedStackTrace * stack)791 void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
792 if (!p)
793 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
794 if (size == 0) {
795 if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
796 instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
797 return nullptr;
798 }
799 // Allocate a size of 1 if we shouldn't free() on Realloc to 0
800 size = 1;
801 }
802 return SetErrnoOnNull(instance.Reallocate(p, size, stack));
803 }
804
memprof_valloc(uptr size,BufferedStackTrace * stack)805 void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
806 return SetErrnoOnNull(
807 instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
808 }
809
memprof_pvalloc(uptr size,BufferedStackTrace * stack)810 void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
811 uptr PageSize = GetPageSizeCached();
812 if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
813 errno = errno_ENOMEM;
814 if (AllocatorMayReturnNull())
815 return nullptr;
816 ReportPvallocOverflow(size, stack);
817 }
818 // pvalloc(0) should allocate one page.
819 size = size ? RoundUpTo(size, PageSize) : PageSize;
820 return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
821 }
822
memprof_memalign(uptr alignment,uptr size,BufferedStackTrace * stack,AllocType alloc_type)823 void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
824 AllocType alloc_type) {
825 if (UNLIKELY(!IsPowerOfTwo(alignment))) {
826 errno = errno_EINVAL;
827 if (AllocatorMayReturnNull())
828 return nullptr;
829 ReportInvalidAllocationAlignment(alignment, stack);
830 }
831 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
832 }
833
memprof_aligned_alloc(uptr alignment,uptr size,BufferedStackTrace * stack)834 void *memprof_aligned_alloc(uptr alignment, uptr size,
835 BufferedStackTrace *stack) {
836 if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
837 errno = errno_EINVAL;
838 if (AllocatorMayReturnNull())
839 return nullptr;
840 ReportInvalidAlignedAllocAlignment(size, alignment, stack);
841 }
842 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
843 }
844
memprof_posix_memalign(void ** memptr,uptr alignment,uptr size,BufferedStackTrace * stack)845 int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
846 BufferedStackTrace *stack) {
847 if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
848 if (AllocatorMayReturnNull())
849 return errno_EINVAL;
850 ReportInvalidPosixMemalignAlignment(alignment, stack);
851 }
852 void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
853 if (UNLIKELY(!ptr))
854 // OOM error is already taken care of by Allocate.
855 return errno_ENOMEM;
856 CHECK(IsAligned((uptr)ptr, alignment));
857 *memptr = ptr;
858 return 0;
859 }
860
memprof_malloc_usable_size(const void * ptr,uptr pc,uptr bp)861 uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
862 if (!ptr)
863 return 0;
864 uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
865 return usable_size;
866 }
867
MemprofSoftRssLimitExceededCallback(bool limit_exceeded)868 void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) {
869 instance.SetRssLimitExceeded(limit_exceeded);
870 }
871
872 } // namespace __memprof
873
874 // ---------------------- Interface ---------------- {{{1
875 using namespace __memprof;
876
877 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
878 // Provide default (no-op) implementation of malloc hooks.
SANITIZER_INTERFACE_WEAK_DEF(void,__sanitizer_malloc_hook,void * ptr,uptr size)879 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr,
880 uptr size) {
881 (void)ptr;
882 (void)size;
883 }
884
SANITIZER_INTERFACE_WEAK_DEF(void,__sanitizer_free_hook,void * ptr)885 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
886 (void)ptr;
887 }
888 #endif
889
__sanitizer_get_estimated_allocated_size(uptr size)890 uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
891
__sanitizer_get_ownership(const void * p)892 int __sanitizer_get_ownership(const void *p) {
893 return memprof_malloc_usable_size(p, 0, 0) != 0;
894 }
895
__sanitizer_get_allocated_size(const void * p)896 uptr __sanitizer_get_allocated_size(const void *p) {
897 return memprof_malloc_usable_size(p, 0, 0);
898 }
899
__memprof_profile_dump()900 int __memprof_profile_dump() {
901 instance.FinishAndPrint();
902 // In the future we may want to return non-zero if there are any errors
903 // detected during the dumping process.
904 return 0;
905 }
906