1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 #include "mimalloc.h"
8 #include "mimalloc/internal.h"
9 #include "mimalloc/atomic.h"
10 #include "mimalloc/prim.h"
11
12
13 /* -----------------------------------------------------------
14 Initialization.
15 On windows initializes support for aligned allocation and
16 large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
17 ----------------------------------------------------------- */
18
19 static mi_os_mem_config_t mi_os_mem_config = {
20 4096, // page size
21 0, // large page size (usually 2MiB)
22 4096, // allocation granularity
23 true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)
24 false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
25 true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
26 };
27
_mi_os_has_overcommit(void)28 bool _mi_os_has_overcommit(void) {
29 return mi_os_mem_config.has_overcommit;
30 }
31
_mi_os_has_virtual_reserve(void)32 bool _mi_os_has_virtual_reserve(void) {
33 return mi_os_mem_config.has_virtual_reserve;
34 }
35
36
37 // OS (small) page size
_mi_os_page_size(void)38 size_t _mi_os_page_size(void) {
39 return mi_os_mem_config.page_size;
40 }
41
42 // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
_mi_os_large_page_size(void)43 size_t _mi_os_large_page_size(void) {
44 return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
45 }
46
_mi_os_use_large_page(size_t size,size_t alignment)47 bool _mi_os_use_large_page(size_t size, size_t alignment) {
48 // if we have access, check the size and alignment requirements
49 if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
50 return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
51 }
52
53 // round to a good OS allocation size (bounded by max 12.5% waste)
_mi_os_good_alloc_size(size_t size)54 size_t _mi_os_good_alloc_size(size_t size) {
55 size_t align_size;
56 if (size < 512*MI_KiB) align_size = _mi_os_page_size();
57 else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
58 else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
59 else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
60 else align_size = 4*MI_MiB;
61 if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
62 return _mi_align_up(size, align_size);
63 }
64
_mi_os_init(void)65 void _mi_os_init(void) {
66 _mi_prim_mem_init(&mi_os_mem_config);
67 }
68
69
70 /* -----------------------------------------------------------
71 Util
72 -------------------------------------------------------------- */
73 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
74 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
75
mi_align_up_ptr(void * p,size_t alignment)76 static void* mi_align_up_ptr(void* p, size_t alignment) {
77 return (void*)_mi_align_up((uintptr_t)p, alignment);
78 }
79
mi_align_down_ptr(void * p,size_t alignment)80 static void* mi_align_down_ptr(void* p, size_t alignment) {
81 return (void*)_mi_align_down((uintptr_t)p, alignment);
82 }
83
84
85 /* -----------------------------------------------------------
86 aligned hinting
87 -------------------------------------------------------------- */
88
89 // On 64-bit systems, we can do efficient aligned allocation by using
90 // the 2TiB to 30TiB area to allocate those.
91 #if (MI_INTPTR_SIZE >= 8)
92 static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
93
94 // Return a MI_SEGMENT_SIZE aligned address that is probably available.
95 // If this returns NULL, the OS will determine the address but on some OS's that may not be
96 // properly aligned which can be more costly as it needs to be adjusted afterwards.
97 // For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
98 // (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
99 // in the middle of the 2TiB - 6TiB address range (see issue #372))
100
101 #define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
102 #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
103 #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
104
_mi_os_get_aligned_hint(size_t try_alignment,size_t size)105 void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
106 {
107 if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
108 size = _mi_align_up(size, MI_SEGMENT_SIZE);
109 if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
110 #if (MI_SECURE>0)
111 size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
112 #endif
113
114 uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
115 if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
116 uintptr_t init = MI_HINT_BASE;
117 #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
118 mi_heap_t* heap = mi_prim_get_default_heap();
119 // gh-123022: default heap may not be initialized in CPython in background threads
120 if (mi_heap_is_initialized(heap)) {
121 uintptr_t r = _mi_heap_random_next(heap);
122 init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
123 }
124 #endif
125 uintptr_t expected = hint + size;
126 mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
127 hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
128 }
129 if (hint%try_alignment != 0) return NULL;
130 return (void*)hint;
131 }
132 #else
_mi_os_get_aligned_hint(size_t try_alignment,size_t size)133 void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
134 MI_UNUSED(try_alignment); MI_UNUSED(size);
135 return NULL;
136 }
137 #endif
138
139
140 /* -----------------------------------------------------------
141 Free memory
142 -------------------------------------------------------------- */
143
144 static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
145
mi_os_prim_free(void * addr,size_t size,bool still_committed,mi_stats_t * tld_stats)146 static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
147 MI_UNUSED(tld_stats);
148 mi_assert_internal((size % _mi_os_page_size()) == 0);
149 if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
150 int err = _mi_prim_free(addr, size);
151 if (err != 0) {
152 _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
153 }
154 mi_stats_t* stats = &_mi_stats_main;
155 if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
156 _mi_stat_decrease(&stats->reserved, size);
157 }
158
_mi_os_free_ex(void * addr,size_t size,bool still_committed,mi_memid_t memid,mi_stats_t * tld_stats)159 void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
160 if (mi_memkind_is_os(memid.memkind)) {
161 size_t csize = _mi_os_good_alloc_size(size);
162 void* base = addr;
163 // different base? (due to alignment)
164 if (memid.mem.os.base != NULL) {
165 mi_assert(memid.mem.os.base <= addr);
166 mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
167 base = memid.mem.os.base;
168 csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
169 }
170 // free it
171 if (memid.memkind == MI_MEM_OS_HUGE) {
172 mi_assert(memid.is_pinned);
173 mi_os_free_huge_os_pages(base, csize, tld_stats);
174 }
175 else {
176 mi_os_prim_free(base, csize, still_committed, tld_stats);
177 }
178 }
179 else {
180 // nothing to do
181 mi_assert(memid.memkind < MI_MEM_OS);
182 }
183 }
184
_mi_os_free(void * p,size_t size,mi_memid_t memid,mi_stats_t * tld_stats)185 void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
186 _mi_os_free_ex(p, size, true, memid, tld_stats);
187 }
188
189
190 /* -----------------------------------------------------------
191 Primitive allocation from the OS.
192 -------------------------------------------------------------- */
193
194 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
mi_os_prim_alloc(size_t size,size_t try_alignment,bool commit,bool allow_large,bool * is_large,bool * is_zero,mi_stats_t * stats)195 static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
196 mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
197 mi_assert_internal(is_zero != NULL);
198 mi_assert_internal(is_large != NULL);
199 if (size == 0) return NULL;
200 if (!commit) { allow_large = false; }
201 if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
202
203 *is_zero = false;
204 void* p = NULL;
205 int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
206 if (err != 0) {
207 _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
208 }
209 mi_stat_counter_increase(stats->mmap_calls, 1);
210 if (p != NULL) {
211 _mi_stat_increase(&stats->reserved, size);
212 if (commit) {
213 _mi_stat_increase(&stats->committed, size);
214 // seems needed for asan (or `mimalloc-test-api` fails)
215 #ifdef MI_TRACK_ASAN
216 if (*is_zero) { mi_track_mem_defined(p,size); }
217 else { mi_track_mem_undefined(p,size); }
218 #endif
219 }
220 }
221 return p;
222 }
223
224
225 // Primitive aligned allocation from the OS.
226 // This function guarantees the allocated memory is aligned.
mi_os_prim_alloc_aligned(size_t size,size_t alignment,bool commit,bool allow_large,bool * is_large,bool * is_zero,void ** base,mi_stats_t * stats)227 static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {
228 mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
229 mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
230 mi_assert_internal(is_large != NULL);
231 mi_assert_internal(is_zero != NULL);
232 mi_assert_internal(base != NULL);
233 if (!commit) allow_large = false;
234 if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
235 size = _mi_align_up(size, _mi_os_page_size());
236
237 // try first with a hint (this will be aligned directly on Win 10+ or BSD)
238 void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
239 if (p == NULL) return NULL;
240
241 // aligned already?
242 if (((uintptr_t)p % alignment) == 0) {
243 *base = p;
244 }
245 else {
246 // if not aligned, free it, overallocate, and unmap around it
247 // NOTE(sgross): this warning causes issues in Python tests
248 // _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
249 mi_os_prim_free(p, size, commit, stats);
250 if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
251 const size_t over_size = size + alignment;
252
253 if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block
254 // over-allocate uncommitted (virtual) memory
255 p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
256 if (p == NULL) return NULL;
257
258 // set p to the aligned part in the full region
259 // note: this is dangerous on Windows as VirtualFree needs the actual base pointer
260 // this is handled though by having the `base` field in the memid's
261 *base = p; // remember the base
262 p = mi_align_up_ptr(p, alignment);
263
264 // explicitly commit only the aligned part
265 if (commit) {
266 _mi_os_commit(p, size, NULL, stats);
267 }
268 }
269 else { // mmap can free inside an allocation
270 // overallocate...
271 p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
272 if (p == NULL) return NULL;
273
274 // and selectively unmap parts around the over-allocated area. (noop on sbrk)
275 void* aligned_p = mi_align_up_ptr(p, alignment);
276 size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
277 size_t mid_size = _mi_align_up(size, _mi_os_page_size());
278 size_t post_size = over_size - pre_size - mid_size;
279 mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
280 if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); }
281 if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
282 // we can return the aligned pointer on `mmap` (and sbrk) systems
283 p = aligned_p;
284 *base = aligned_p; // since we freed the pre part, `*base == p`.
285 }
286 }
287
288 mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
289 return p;
290 }
291
292
293 /* -----------------------------------------------------------
294 OS API: alloc and alloc_aligned
295 ----------------------------------------------------------- */
296
_mi_os_alloc(size_t size,mi_memid_t * memid,mi_stats_t * tld_stats)297 void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
298 MI_UNUSED(tld_stats);
299 *memid = _mi_memid_none();
300 mi_stats_t* stats = &_mi_stats_main;
301 if (size == 0) return NULL;
302 size = _mi_os_good_alloc_size(size);
303 bool os_is_large = false;
304 bool os_is_zero = false;
305 void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
306 if (p != NULL) {
307 *memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
308 }
309 return p;
310 }
311
_mi_os_alloc_aligned(size_t size,size_t alignment,bool commit,bool allow_large,mi_memid_t * memid,mi_stats_t * tld_stats)312 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
313 {
314 MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
315 MI_UNUSED(tld_stats);
316 *memid = _mi_memid_none();
317 if (size == 0) return NULL;
318 size = _mi_os_good_alloc_size(size);
319 alignment = _mi_align_up(alignment, _mi_os_page_size());
320
321 bool os_is_large = false;
322 bool os_is_zero = false;
323 void* os_base = NULL;
324 void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ );
325 if (p != NULL) {
326 *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
327 memid->mem.os.base = os_base;
328 memid->mem.os.alignment = alignment;
329 }
330 return p;
331 }
332
333 /* -----------------------------------------------------------
334 OS aligned allocation with an offset. This is used
335 for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
336 page where the object can be aligned at an offset from the start of the segment.
337 As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
338 to use the actual start of the memory region.
339 ----------------------------------------------------------- */
340
_mi_os_alloc_aligned_at_offset(size_t size,size_t alignment,size_t offset,bool commit,bool allow_large,mi_memid_t * memid,mi_stats_t * tld_stats)341 void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
342 mi_assert(offset <= MI_SEGMENT_SIZE);
343 mi_assert(offset <= size);
344 mi_assert((alignment % _mi_os_page_size()) == 0);
345 *memid = _mi_memid_none();
346 if (offset > MI_SEGMENT_SIZE) return NULL;
347 if (offset == 0) {
348 // regular aligned allocation
349 return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
350 }
351 else {
352 // overallocate to align at an offset
353 const size_t extra = _mi_align_up(offset, alignment) - offset;
354 const size_t oversize = size + extra;
355 void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
356 if (start == NULL) return NULL;
357
358 void* const p = (uint8_t*)start + extra;
359 mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
360 // decommit the overallocation at the start
361 if (commit && extra > _mi_os_page_size()) {
362 _mi_os_decommit(start, extra, tld_stats);
363 }
364 return p;
365 }
366 }
367
368 /* -----------------------------------------------------------
369 OS memory API: reset, commit, decommit, protect, unprotect.
370 ----------------------------------------------------------- */
371
372 // OS page align within a given area, either conservative (pages inside the area only),
373 // or not (straddling pages outside the area is possible)
mi_os_page_align_areax(bool conservative,void * addr,size_t size,size_t * newsize)374 static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
375 mi_assert(addr != NULL && size > 0);
376 if (newsize != NULL) *newsize = 0;
377 if (size == 0 || addr == NULL) return NULL;
378
379 // page align conservatively within the range
380 void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
381 : mi_align_down_ptr(addr, _mi_os_page_size()));
382 void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
383 : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
384 ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
385 if (diff <= 0) return NULL;
386
387 mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
388 if (newsize != NULL) *newsize = (size_t)diff;
389 return start;
390 }
391
mi_os_page_align_area_conservative(void * addr,size_t size,size_t * newsize)392 static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
393 return mi_os_page_align_areax(true, addr, size, newsize);
394 }
395
_mi_os_commit(void * addr,size_t size,bool * is_zero,mi_stats_t * tld_stats)396 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
397 MI_UNUSED(tld_stats);
398 mi_stats_t* stats = &_mi_stats_main;
399 if (is_zero != NULL) { *is_zero = false; }
400 _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit
401 _mi_stat_counter_increase(&stats->commit_calls, 1);
402
403 // page align range
404 size_t csize;
405 void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
406 if (csize == 0) return true;
407
408 // commit
409 bool os_is_zero = false;
410 int err = _mi_prim_commit(start, csize, &os_is_zero);
411 if (err != 0) {
412 _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
413 return false;
414 }
415 if (os_is_zero && is_zero != NULL) {
416 *is_zero = true;
417 mi_assert_expensive(mi_mem_is_zero(start, csize));
418 }
419 // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
420 #ifdef MI_TRACK_ASAN
421 if (os_is_zero) { mi_track_mem_defined(start,csize); }
422 else { mi_track_mem_undefined(start,csize); }
423 #endif
424 return true;
425 }
426
mi_os_decommit_ex(void * addr,size_t size,bool * needs_recommit,mi_stats_t * tld_stats)427 static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
428 MI_UNUSED(tld_stats);
429 mi_stats_t* stats = &_mi_stats_main;
430 mi_assert_internal(needs_recommit!=NULL);
431 _mi_stat_decrease(&stats->committed, size);
432
433 // page align
434 size_t csize;
435 void* start = mi_os_page_align_area_conservative(addr, size, &csize);
436 if (csize == 0) return true;
437
438 // decommit
439 *needs_recommit = true;
440 int err = _mi_prim_decommit(start,csize,needs_recommit);
441 if (err != 0) {
442 _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
443 }
444 mi_assert_internal(err == 0);
445 return (err == 0);
446 }
447
_mi_os_decommit(void * addr,size_t size,mi_stats_t * tld_stats)448 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
449 bool needs_recommit;
450 return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
451 }
452
453
454 // Signal to the OS that the address range is no longer in use
455 // but may be used later again. This will release physical memory
456 // pages and reduce swapping while keeping the memory committed.
457 // We page align to a conservative area inside the range to reset.
_mi_os_reset(void * addr,size_t size,mi_stats_t * stats)458 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
459 // page align conservatively within the range
460 size_t csize;
461 void* start = mi_os_page_align_area_conservative(addr, size, &csize);
462 if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)
463 _mi_stat_increase(&stats->reset, csize);
464 _mi_stat_counter_increase(&stats->reset_calls, 1);
465
466 #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
467 memset(start, 0, csize); // pretend it is eagerly reset
468 #endif
469
470 int err = _mi_prim_reset(start, csize);
471 if (err != 0) {
472 _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
473 }
474 return (err == 0);
475 }
476
477
478 // either resets or decommits memory, returns true if the memory needs
479 // to be recommitted if it is to be re-used later on.
_mi_os_purge_ex(void * p,size_t size,bool allow_reset,mi_stats_t * stats)480 bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
481 {
482 if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed?
483 _mi_stat_counter_increase(&stats->purge_calls, 1);
484 _mi_stat_increase(&stats->purged, size);
485
486 if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit?
487 !_mi_preloading()) // don't decommit during preloading (unsafe)
488 {
489 bool needs_recommit = true;
490 mi_os_decommit_ex(p, size, &needs_recommit, stats);
491 return needs_recommit;
492 }
493 else {
494 if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
495 _mi_os_reset(p, size, stats);
496 }
497 return false; // needs no recommit
498 }
499 }
500
501 // either resets or decommits memory, returns true if the memory needs
502 // to be recommitted if it is to be re-used later on.
_mi_os_purge(void * p,size_t size,mi_stats_t * stats)503 bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
504 return _mi_os_purge_ex(p, size, true, stats);
505 }
506
507 // Protect a region in memory to be not accessible.
mi_os_protectx(void * addr,size_t size,bool protect)508 static bool mi_os_protectx(void* addr, size_t size, bool protect) {
509 // page align conservatively within the range
510 size_t csize = 0;
511 void* start = mi_os_page_align_area_conservative(addr, size, &csize);
512 if (csize == 0) return false;
513 /*
514 if (_mi_os_is_huge_reserved(addr)) {
515 _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
516 }
517 */
518 int err = _mi_prim_protect(start,csize,protect);
519 if (err != 0) {
520 _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
521 }
522 return (err == 0);
523 }
524
_mi_os_protect(void * addr,size_t size)525 bool _mi_os_protect(void* addr, size_t size) {
526 return mi_os_protectx(addr, size, true);
527 }
528
_mi_os_unprotect(void * addr,size_t size)529 bool _mi_os_unprotect(void* addr, size_t size) {
530 return mi_os_protectx(addr, size, false);
531 }
532
533
534
535 /* ----------------------------------------------------------------------------
536 Support for allocating huge OS pages (1Gib) that are reserved up-front
537 and possibly associated with a specific NUMA node. (use `numa_node>=0`)
538 -----------------------------------------------------------------------------*/
539 #define MI_HUGE_OS_PAGE_SIZE (MI_GiB)
540
541
542 #if (MI_INTPTR_SIZE >= 8)
543 // To ensure proper alignment, use our own area for huge OS pages
544 static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
545
546 // Claim an aligned address range for huge pages
mi_os_claim_huge_pages(size_t pages,size_t * total_size)547 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
548 if (total_size != NULL) *total_size = 0;
549 const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
550
551 uintptr_t start = 0;
552 uintptr_t end = 0;
553 uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
554 do {
555 start = huge_start;
556 if (start == 0) {
557 // Initialize the start address after the 32TiB area
558 start = ((uintptr_t)32 << 40); // 32TiB virtual start address
559 #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
560 mi_heap_t* heap = mi_prim_get_default_heap();
561 // gh-123022: default heap may not be initialized in CPython in background threads
562 if (mi_heap_is_initialized(heap)) {
563 uintptr_t r = _mi_heap_random_next(heap);
564 start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
565 }
566 #endif
567 }
568 end = start + size;
569 mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
570 } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
571
572 if (total_size != NULL) *total_size = size;
573 return (uint8_t*)start;
574 }
575 #else
mi_os_claim_huge_pages(size_t pages,size_t * total_size)576 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
577 MI_UNUSED(pages);
578 if (total_size != NULL) *total_size = 0;
579 return NULL;
580 }
581 #endif
582
583 // Allocate MI_SEGMENT_SIZE aligned huge pages
_mi_os_alloc_huge_os_pages(size_t pages,int numa_node,mi_msecs_t max_msecs,size_t * pages_reserved,size_t * psize,mi_memid_t * memid)584 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
585 *memid = _mi_memid_none();
586 if (psize != NULL) *psize = 0;
587 if (pages_reserved != NULL) *pages_reserved = 0;
588 size_t size = 0;
589 uint8_t* start = mi_os_claim_huge_pages(pages, &size);
590 if (start == NULL) return NULL; // or 32-bit systems
591
592 // Allocate one page at the time but try to place them contiguously
593 // We allocate one page at the time to be able to abort if it takes too long
594 // or to at least allocate as many as available on the system.
595 mi_msecs_t start_t = _mi_clock_start();
596 size_t page = 0;
597 bool all_zero = true;
598 while (page < pages) {
599 // allocate a page
600 bool is_zero = false;
601 void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
602 void* p = NULL;
603 int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
604 if (!is_zero) { all_zero = false; }
605 if (err != 0) {
606 _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
607 break;
608 }
609
610 // Did we succeed at a contiguous address?
611 if (p != addr) {
612 // no success, issue a warning and break
613 if (p != NULL) {
614 _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
615 mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
616 }
617 break;
618 }
619
620 // success, record it
621 page++; // increase before timeout check (see issue #711)
622 _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
623 _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
624
625 // check for timeout
626 if (max_msecs > 0) {
627 mi_msecs_t elapsed = _mi_clock_end(start_t);
628 if (page >= 1) {
629 mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
630 if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
631 elapsed = max_msecs + 1;
632 }
633 }
634 if (elapsed > max_msecs) {
635 _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
636 break;
637 }
638 }
639 }
640 mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
641 if (pages_reserved != NULL) { *pages_reserved = page; }
642 if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
643 if (page != 0) {
644 mi_assert(start != NULL);
645 *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
646 memid->memkind = MI_MEM_OS_HUGE;
647 mi_assert(memid->is_pinned);
648 #ifdef MI_TRACK_ASAN
649 if (all_zero) { mi_track_mem_defined(start,size); }
650 #endif
651 }
652 return (page == 0 ? NULL : start);
653 }
654
655 // free every huge page in a range individually (as we allocated per page)
656 // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
mi_os_free_huge_os_pages(void * p,size_t size,mi_stats_t * stats)657 static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
658 if (p==NULL || size==0) return;
659 uint8_t* base = (uint8_t*)p;
660 while (size >= MI_HUGE_OS_PAGE_SIZE) {
661 mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
662 size -= MI_HUGE_OS_PAGE_SIZE;
663 base += MI_HUGE_OS_PAGE_SIZE;
664 }
665 }
666
667 /* ----------------------------------------------------------------------------
668 Support NUMA aware allocation
669 -----------------------------------------------------------------------------*/
670
671 _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
672
_mi_os_numa_node_count_get(void)673 size_t _mi_os_numa_node_count_get(void) {
674 size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
675 if (count <= 0) {
676 long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
677 if (ncount > 0) {
678 count = (size_t)ncount;
679 }
680 else {
681 count = _mi_prim_numa_node_count(); // or detect dynamically
682 if (count == 0) count = 1;
683 }
684 mi_atomic_store_release(&_mi_numa_node_count, count); // save it
685 _mi_verbose_message("using %zd numa regions\n", count);
686 }
687 return count;
688 }
689
_mi_os_numa_node_get(mi_os_tld_t * tld)690 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
691 MI_UNUSED(tld);
692 size_t numa_count = _mi_os_numa_node_count();
693 if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
694 // never more than the node count and >= 0
695 size_t numa_node = _mi_prim_numa_node();
696 if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
697 return (int)numa_node;
698 }
699