• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 #include "mimalloc.h"
8 #include "mimalloc/internal.h"
9 #include "mimalloc/atomic.h"
10 #include "mimalloc/prim.h"
11 
12 
13 /* -----------------------------------------------------------
14   Initialization.
15   On windows initializes support for aligned allocation and
16   large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
17 ----------------------------------------------------------- */
18 
19 static mi_os_mem_config_t mi_os_mem_config = {
20   4096,   // page size
21   0,      // large page size (usually 2MiB)
22   4096,   // allocation granularity
23   true,   // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
24   false,  // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
25   true    // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
26 };
27 
_mi_os_has_overcommit(void)28 bool _mi_os_has_overcommit(void) {
29   return mi_os_mem_config.has_overcommit;
30 }
31 
_mi_os_has_virtual_reserve(void)32 bool _mi_os_has_virtual_reserve(void) {
33   return mi_os_mem_config.has_virtual_reserve;
34 }
35 
36 
37 // OS (small) page size
_mi_os_page_size(void)38 size_t _mi_os_page_size(void) {
39   return mi_os_mem_config.page_size;
40 }
41 
42 // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
_mi_os_large_page_size(void)43 size_t _mi_os_large_page_size(void) {
44   return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
45 }
46 
_mi_os_use_large_page(size_t size,size_t alignment)47 bool _mi_os_use_large_page(size_t size, size_t alignment) {
48   // if we have access, check the size and alignment requirements
49   if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
50   return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
51 }
52 
53 // round to a good OS allocation size (bounded by max 12.5% waste)
_mi_os_good_alloc_size(size_t size)54 size_t _mi_os_good_alloc_size(size_t size) {
55   size_t align_size;
56   if (size < 512*MI_KiB) align_size = _mi_os_page_size();
57   else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
58   else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
59   else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
60   else align_size = 4*MI_MiB;
61   if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
62   return _mi_align_up(size, align_size);
63 }
64 
_mi_os_init(void)65 void _mi_os_init(void) {
66   _mi_prim_mem_init(&mi_os_mem_config);
67 }
68 
69 
70 /* -----------------------------------------------------------
71   Util
72 -------------------------------------------------------------- */
73 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
74 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
75 
mi_align_up_ptr(void * p,size_t alignment)76 static void* mi_align_up_ptr(void* p, size_t alignment) {
77   return (void*)_mi_align_up((uintptr_t)p, alignment);
78 }
79 
mi_align_down_ptr(void * p,size_t alignment)80 static void* mi_align_down_ptr(void* p, size_t alignment) {
81   return (void*)_mi_align_down((uintptr_t)p, alignment);
82 }
83 
84 
85 /* -----------------------------------------------------------
86   aligned hinting
87 -------------------------------------------------------------- */
88 
89 // On 64-bit systems, we can do efficient aligned allocation by using
90 // the 2TiB to 30TiB area to allocate those.
91 #if (MI_INTPTR_SIZE >= 8)
92 static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
93 
94 // Return a MI_SEGMENT_SIZE aligned address that is probably available.
95 // If this returns NULL, the OS will determine the address but on some OS's that may not be
96 // properly aligned which can be more costly as it needs to be adjusted afterwards.
97 // For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
98 // (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
99 //  in the middle of the 2TiB - 6TiB address range (see issue #372))
100 
101 #define MI_HINT_BASE ((uintptr_t)2 << 40)  // 2TiB start
102 #define MI_HINT_AREA ((uintptr_t)4 << 40)  // upto 6TiB   (since before win8 there is "only" 8TiB available to processes)
103 #define MI_HINT_MAX  ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
104 
_mi_os_get_aligned_hint(size_t try_alignment,size_t size)105 void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
106 {
107   if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
108   size = _mi_align_up(size, MI_SEGMENT_SIZE);
109   if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
110   #if (MI_SECURE>0)
111   size += MI_SEGMENT_SIZE;        // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
112   #endif
113 
114   uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
115   if (hint == 0 || hint > MI_HINT_MAX) {   // wrap or initialize
116     uintptr_t init = MI_HINT_BASE;
117     #if (MI_SECURE>0 || MI_DEBUG==0)       // security: randomize start of aligned allocations unless in debug mode
118     mi_heap_t* heap = mi_prim_get_default_heap();
119     // gh-123022: default heap may not be initialized in CPython in background threads
120     if (mi_heap_is_initialized(heap)) {
121       uintptr_t r = _mi_heap_random_next(heap);
122       init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA);  // (randomly 20 bits)*4MiB == 0 to 4TiB
123     }
124     #endif
125     uintptr_t expected = hint + size;
126     mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
127     hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
128   }
129   if (hint%try_alignment != 0) return NULL;
130   return (void*)hint;
131 }
132 #else
_mi_os_get_aligned_hint(size_t try_alignment,size_t size)133 void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
134   MI_UNUSED(try_alignment); MI_UNUSED(size);
135   return NULL;
136 }
137 #endif
138 
139 
140 /* -----------------------------------------------------------
141   Free memory
142 -------------------------------------------------------------- */
143 
144 static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
145 
mi_os_prim_free(void * addr,size_t size,bool still_committed,mi_stats_t * tld_stats)146 static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
147   MI_UNUSED(tld_stats);
148   mi_assert_internal((size % _mi_os_page_size()) == 0);
149   if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
150   int err = _mi_prim_free(addr, size);
151   if (err != 0) {
152     _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
153   }
154   mi_stats_t* stats = &_mi_stats_main;
155   if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
156   _mi_stat_decrease(&stats->reserved, size);
157 }
158 
_mi_os_free_ex(void * addr,size_t size,bool still_committed,mi_memid_t memid,mi_stats_t * tld_stats)159 void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
160   if (mi_memkind_is_os(memid.memkind)) {
161     size_t csize = _mi_os_good_alloc_size(size);
162     void* base = addr;
163     // different base? (due to alignment)
164     if (memid.mem.os.base != NULL) {
165       mi_assert(memid.mem.os.base <= addr);
166       mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
167       base = memid.mem.os.base;
168       csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
169     }
170     // free it
171     if (memid.memkind == MI_MEM_OS_HUGE) {
172       mi_assert(memid.is_pinned);
173       mi_os_free_huge_os_pages(base, csize, tld_stats);
174     }
175     else {
176       mi_os_prim_free(base, csize, still_committed, tld_stats);
177     }
178   }
179   else {
180     // nothing to do
181     mi_assert(memid.memkind < MI_MEM_OS);
182   }
183 }
184 
_mi_os_free(void * p,size_t size,mi_memid_t memid,mi_stats_t * tld_stats)185 void  _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
186   _mi_os_free_ex(p, size, true, memid, tld_stats);
187 }
188 
189 
190 /* -----------------------------------------------------------
191    Primitive allocation from the OS.
192 -------------------------------------------------------------- */
193 
194 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
mi_os_prim_alloc(size_t size,size_t try_alignment,bool commit,bool allow_large,bool * is_large,bool * is_zero,mi_stats_t * stats)195 static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
196   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
197   mi_assert_internal(is_zero != NULL);
198   mi_assert_internal(is_large != NULL);
199   if (size == 0) return NULL;
200   if (!commit) { allow_large = false; }
201   if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
202 
203   *is_zero = false;
204   void* p = NULL;
205   int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
206   if (err != 0) {
207     _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
208   }
209   mi_stat_counter_increase(stats->mmap_calls, 1);
210   if (p != NULL) {
211     _mi_stat_increase(&stats->reserved, size);
212     if (commit) {
213       _mi_stat_increase(&stats->committed, size);
214       // seems needed for asan (or `mimalloc-test-api` fails)
215       #ifdef MI_TRACK_ASAN
216       if (*is_zero) { mi_track_mem_defined(p,size); }
217                else { mi_track_mem_undefined(p,size); }
218       #endif
219     }
220   }
221   return p;
222 }
223 
224 
225 // Primitive aligned allocation from the OS.
226 // This function guarantees the allocated memory is aligned.
mi_os_prim_alloc_aligned(size_t size,size_t alignment,bool commit,bool allow_large,bool * is_large,bool * is_zero,void ** base,mi_stats_t * stats)227 static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {
228   mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
229   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
230   mi_assert_internal(is_large != NULL);
231   mi_assert_internal(is_zero != NULL);
232   mi_assert_internal(base != NULL);
233   if (!commit) allow_large = false;
234   if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
235   size = _mi_align_up(size, _mi_os_page_size());
236 
237   // try first with a hint (this will be aligned directly on Win 10+ or BSD)
238   void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
239   if (p == NULL) return NULL;
240 
241   // aligned already?
242   if (((uintptr_t)p % alignment) == 0) {
243     *base = p;
244   }
245   else {
246     // if not aligned, free it, overallocate, and unmap around it
247     // NOTE(sgross): this warning causes issues in Python tests
248     // _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
249     mi_os_prim_free(p, size, commit, stats);
250     if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
251     const size_t over_size = size + alignment;
252 
253     if (mi_os_mem_config.must_free_whole) {  // win32 virtualAlloc cannot free parts of an allocate block
254       // over-allocate uncommitted (virtual) memory
255       p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
256       if (p == NULL) return NULL;
257 
258       // set p to the aligned part in the full region
259       // note: this is dangerous on Windows as VirtualFree needs the actual base pointer
260       // this is handled though by having the `base` field in the memid's
261       *base = p; // remember the base
262       p = mi_align_up_ptr(p, alignment);
263 
264       // explicitly commit only the aligned part
265       if (commit) {
266         _mi_os_commit(p, size, NULL, stats);
267       }
268     }
269     else  { // mmap can free inside an allocation
270       // overallocate...
271       p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
272       if (p == NULL) return NULL;
273 
274       // and selectively unmap parts around the over-allocated area. (noop on sbrk)
275       void* aligned_p = mi_align_up_ptr(p, alignment);
276       size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
277       size_t mid_size = _mi_align_up(size, _mi_os_page_size());
278       size_t post_size = over_size - pre_size - mid_size;
279       mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
280       if (pre_size > 0)  { mi_os_prim_free(p, pre_size, commit, stats); }
281       if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
282       // we can return the aligned pointer on `mmap` (and sbrk) systems
283       p = aligned_p;
284       *base = aligned_p; // since we freed the pre part, `*base == p`.
285     }
286   }
287 
288   mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
289   return p;
290 }
291 
292 
293 /* -----------------------------------------------------------
294   OS API: alloc and alloc_aligned
295 ----------------------------------------------------------- */
296 
_mi_os_alloc(size_t size,mi_memid_t * memid,mi_stats_t * tld_stats)297 void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
298   MI_UNUSED(tld_stats);
299   *memid = _mi_memid_none();
300   mi_stats_t* stats = &_mi_stats_main;
301   if (size == 0) return NULL;
302   size = _mi_os_good_alloc_size(size);
303   bool os_is_large = false;
304   bool os_is_zero  = false;
305   void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
306   if (p != NULL) {
307     *memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
308   }
309   return p;
310 }
311 
_mi_os_alloc_aligned(size_t size,size_t alignment,bool commit,bool allow_large,mi_memid_t * memid,mi_stats_t * tld_stats)312 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
313 {
314   MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
315   MI_UNUSED(tld_stats);
316   *memid = _mi_memid_none();
317   if (size == 0) return NULL;
318   size = _mi_os_good_alloc_size(size);
319   alignment = _mi_align_up(alignment, _mi_os_page_size());
320 
321   bool os_is_large = false;
322   bool os_is_zero  = false;
323   void* os_base = NULL;
324   void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ );
325   if (p != NULL) {
326     *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
327     memid->mem.os.base = os_base;
328     memid->mem.os.alignment = alignment;
329   }
330   return p;
331 }
332 
333 /* -----------------------------------------------------------
334   OS aligned allocation with an offset. This is used
335   for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
336   page where the object can be aligned at an offset from the start of the segment.
337   As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
338   to use the actual start of the memory region.
339 ----------------------------------------------------------- */
340 
_mi_os_alloc_aligned_at_offset(size_t size,size_t alignment,size_t offset,bool commit,bool allow_large,mi_memid_t * memid,mi_stats_t * tld_stats)341 void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
342   mi_assert(offset <= MI_SEGMENT_SIZE);
343   mi_assert(offset <= size);
344   mi_assert((alignment % _mi_os_page_size()) == 0);
345   *memid = _mi_memid_none();
346   if (offset > MI_SEGMENT_SIZE) return NULL;
347   if (offset == 0) {
348     // regular aligned allocation
349     return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
350   }
351   else {
352     // overallocate to align at an offset
353     const size_t extra = _mi_align_up(offset, alignment) - offset;
354     const size_t oversize = size + extra;
355     void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
356     if (start == NULL) return NULL;
357 
358     void* const p = (uint8_t*)start + extra;
359     mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
360     // decommit the overallocation at the start
361     if (commit && extra > _mi_os_page_size()) {
362       _mi_os_decommit(start, extra, tld_stats);
363     }
364     return p;
365   }
366 }
367 
368 /* -----------------------------------------------------------
369   OS memory API: reset, commit, decommit, protect, unprotect.
370 ----------------------------------------------------------- */
371 
372 // OS page align within a given area, either conservative (pages inside the area only),
373 // or not (straddling pages outside the area is possible)
mi_os_page_align_areax(bool conservative,void * addr,size_t size,size_t * newsize)374 static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
375   mi_assert(addr != NULL && size > 0);
376   if (newsize != NULL) *newsize = 0;
377   if (size == 0 || addr == NULL) return NULL;
378 
379   // page align conservatively within the range
380   void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
381     : mi_align_down_ptr(addr, _mi_os_page_size()));
382   void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
383     : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
384   ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
385   if (diff <= 0) return NULL;
386 
387   mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
388   if (newsize != NULL) *newsize = (size_t)diff;
389   return start;
390 }
391 
mi_os_page_align_area_conservative(void * addr,size_t size,size_t * newsize)392 static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
393   return mi_os_page_align_areax(true, addr, size, newsize);
394 }
395 
_mi_os_commit(void * addr,size_t size,bool * is_zero,mi_stats_t * tld_stats)396 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
397   MI_UNUSED(tld_stats);
398   mi_stats_t* stats = &_mi_stats_main;
399   if (is_zero != NULL) { *is_zero = false; }
400   _mi_stat_increase(&stats->committed, size);  // use size for precise commit vs. decommit
401   _mi_stat_counter_increase(&stats->commit_calls, 1);
402 
403   // page align range
404   size_t csize;
405   void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
406   if (csize == 0) return true;
407 
408   // commit
409   bool os_is_zero = false;
410   int err = _mi_prim_commit(start, csize, &os_is_zero);
411   if (err != 0) {
412     _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
413     return false;
414   }
415   if (os_is_zero && is_zero != NULL) {
416     *is_zero = true;
417     mi_assert_expensive(mi_mem_is_zero(start, csize));
418   }
419   // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
420   #ifdef MI_TRACK_ASAN
421   if (os_is_zero) { mi_track_mem_defined(start,csize); }
422              else { mi_track_mem_undefined(start,csize); }
423   #endif
424   return true;
425 }
426 
mi_os_decommit_ex(void * addr,size_t size,bool * needs_recommit,mi_stats_t * tld_stats)427 static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
428   MI_UNUSED(tld_stats);
429   mi_stats_t* stats = &_mi_stats_main;
430   mi_assert_internal(needs_recommit!=NULL);
431   _mi_stat_decrease(&stats->committed, size);
432 
433   // page align
434   size_t csize;
435   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
436   if (csize == 0) return true;
437 
438   // decommit
439   *needs_recommit = true;
440   int err = _mi_prim_decommit(start,csize,needs_recommit);
441   if (err != 0) {
442     _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
443   }
444   mi_assert_internal(err == 0);
445   return (err == 0);
446 }
447 
_mi_os_decommit(void * addr,size_t size,mi_stats_t * tld_stats)448 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
449   bool needs_recommit;
450   return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
451 }
452 
453 
454 // Signal to the OS that the address range is no longer in use
455 // but may be used later again. This will release physical memory
456 // pages and reduce swapping while keeping the memory committed.
457 // We page align to a conservative area inside the range to reset.
_mi_os_reset(void * addr,size_t size,mi_stats_t * stats)458 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
459   // page align conservatively within the range
460   size_t csize;
461   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
462   if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
463   _mi_stat_increase(&stats->reset, csize);
464   _mi_stat_counter_increase(&stats->reset_calls, 1);
465 
466   #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
467   memset(start, 0, csize); // pretend it is eagerly reset
468   #endif
469 
470   int err = _mi_prim_reset(start, csize);
471   if (err != 0) {
472     _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
473   }
474   return (err == 0);
475 }
476 
477 
478 // either resets or decommits memory, returns true if the memory needs
479 // to be recommitted if it is to be re-used later on.
_mi_os_purge_ex(void * p,size_t size,bool allow_reset,mi_stats_t * stats)480 bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
481 {
482   if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
483   _mi_stat_counter_increase(&stats->purge_calls, 1);
484   _mi_stat_increase(&stats->purged, size);
485 
486   if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
487       !_mi_preloading())                                   // don't decommit during preloading (unsafe)
488   {
489     bool needs_recommit = true;
490     mi_os_decommit_ex(p, size, &needs_recommit, stats);
491     return needs_recommit;
492   }
493   else {
494     if (allow_reset) {  // this can sometimes be not allowed if the range is not fully committed
495       _mi_os_reset(p, size, stats);
496     }
497     return false;  // needs no recommit
498   }
499 }
500 
501 // either resets or decommits memory, returns true if the memory needs
502 // to be recommitted if it is to be re-used later on.
_mi_os_purge(void * p,size_t size,mi_stats_t * stats)503 bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
504   return _mi_os_purge_ex(p, size, true, stats);
505 }
506 
507 // Protect a region in memory to be not accessible.
mi_os_protectx(void * addr,size_t size,bool protect)508 static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
509   // page align conservatively within the range
510   size_t csize = 0;
511   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
512   if (csize == 0) return false;
513   /*
514   if (_mi_os_is_huge_reserved(addr)) {
515           _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
516   }
517   */
518   int err = _mi_prim_protect(start,csize,protect);
519   if (err != 0) {
520     _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
521   }
522   return (err == 0);
523 }
524 
_mi_os_protect(void * addr,size_t size)525 bool _mi_os_protect(void* addr, size_t size) {
526   return mi_os_protectx(addr, size, true);
527 }
528 
_mi_os_unprotect(void * addr,size_t size)529 bool _mi_os_unprotect(void* addr, size_t size) {
530   return mi_os_protectx(addr, size, false);
531 }
532 
533 
534 
535 /* ----------------------------------------------------------------------------
536 Support for allocating huge OS pages (1Gib) that are reserved up-front
537 and possibly associated with a specific NUMA node. (use `numa_node>=0`)
538 -----------------------------------------------------------------------------*/
539 #define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)
540 
541 
542 #if (MI_INTPTR_SIZE >= 8)
543 // To ensure proper alignment, use our own area for huge OS pages
544 static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
545 
546 // Claim an aligned address range for huge pages
mi_os_claim_huge_pages(size_t pages,size_t * total_size)547 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
548   if (total_size != NULL) *total_size = 0;
549   const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
550 
551   uintptr_t start = 0;
552   uintptr_t end = 0;
553   uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
554   do {
555     start = huge_start;
556     if (start == 0) {
557       // Initialize the start address after the 32TiB area
558       start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
559     #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
560       mi_heap_t* heap = mi_prim_get_default_heap();
561       // gh-123022: default heap may not be initialized in CPython in background threads
562       if (mi_heap_is_initialized(heap)) {
563         uintptr_t r = _mi_heap_random_next(heap);
564         start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
565       }
566     #endif
567     }
568     end = start + size;
569     mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
570   } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
571 
572   if (total_size != NULL) *total_size = size;
573   return (uint8_t*)start;
574 }
575 #else
mi_os_claim_huge_pages(size_t pages,size_t * total_size)576 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
577   MI_UNUSED(pages);
578   if (total_size != NULL) *total_size = 0;
579   return NULL;
580 }
581 #endif
582 
583 // Allocate MI_SEGMENT_SIZE aligned huge pages
_mi_os_alloc_huge_os_pages(size_t pages,int numa_node,mi_msecs_t max_msecs,size_t * pages_reserved,size_t * psize,mi_memid_t * memid)584 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
585   *memid = _mi_memid_none();
586   if (psize != NULL) *psize = 0;
587   if (pages_reserved != NULL) *pages_reserved = 0;
588   size_t size = 0;
589   uint8_t* start = mi_os_claim_huge_pages(pages, &size);
590   if (start == NULL) return NULL; // or 32-bit systems
591 
592   // Allocate one page at the time but try to place them contiguously
593   // We allocate one page at the time to be able to abort if it takes too long
594   // or to at least allocate as many as available on the system.
595   mi_msecs_t start_t = _mi_clock_start();
596   size_t page = 0;
597   bool all_zero = true;
598   while (page < pages) {
599     // allocate a page
600     bool is_zero = false;
601     void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
602     void* p = NULL;
603     int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
604     if (!is_zero) { all_zero = false;  }
605     if (err != 0) {
606       _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
607       break;
608     }
609 
610     // Did we succeed at a contiguous address?
611     if (p != addr) {
612       // no success, issue a warning and break
613       if (p != NULL) {
614         _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
615         mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
616       }
617       break;
618     }
619 
620     // success, record it
621     page++;  // increase before timeout check (see issue #711)
622     _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
623     _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
624 
625     // check for timeout
626     if (max_msecs > 0) {
627       mi_msecs_t elapsed = _mi_clock_end(start_t);
628       if (page >= 1) {
629         mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
630         if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
631           elapsed = max_msecs + 1;
632         }
633       }
634       if (elapsed > max_msecs) {
635         _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
636         break;
637       }
638     }
639   }
640   mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
641   if (pages_reserved != NULL) { *pages_reserved = page; }
642   if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
643   if (page != 0) {
644     mi_assert(start != NULL);
645     *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
646     memid->memkind = MI_MEM_OS_HUGE;
647     mi_assert(memid->is_pinned);
648     #ifdef MI_TRACK_ASAN
649     if (all_zero) { mi_track_mem_defined(start,size); }
650     #endif
651   }
652   return (page == 0 ? NULL : start);
653 }
654 
655 // free every huge page in a range individually (as we allocated per page)
656 // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
mi_os_free_huge_os_pages(void * p,size_t size,mi_stats_t * stats)657 static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
658   if (p==NULL || size==0) return;
659   uint8_t* base = (uint8_t*)p;
660   while (size >= MI_HUGE_OS_PAGE_SIZE) {
661     mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
662     size -= MI_HUGE_OS_PAGE_SIZE;
663     base += MI_HUGE_OS_PAGE_SIZE;
664   }
665 }
666 
667 /* ----------------------------------------------------------------------------
668 Support NUMA aware allocation
669 -----------------------------------------------------------------------------*/
670 
671 _Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count
672 
_mi_os_numa_node_count_get(void)673 size_t _mi_os_numa_node_count_get(void) {
674   size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
675   if (count <= 0) {
676     long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
677     if (ncount > 0) {
678       count = (size_t)ncount;
679     }
680     else {
681       count = _mi_prim_numa_node_count(); // or detect dynamically
682       if (count == 0) count = 1;
683     }
684     mi_atomic_store_release(&_mi_numa_node_count, count); // save it
685     _mi_verbose_message("using %zd numa regions\n", count);
686   }
687   return count;
688 }
689 
_mi_os_numa_node_get(mi_os_tld_t * tld)690 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
691   MI_UNUSED(tld);
692   size_t numa_count = _mi_os_numa_node_count();
693   if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
694   // never more than the node count and >= 0
695   size_t numa_node = _mi_prim_numa_node();
696   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
697   return (int)numa_node;
698 }
699