• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*----------------------------------------------------------------------------
2 Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 /* -----------------------------------------------------------
9   The core of the allocator. Every segment contains
10   pages of a certain block size. The main function
11   exported is `mi_malloc_generic`.
12 ----------------------------------------------------------- */
13 
14 #include "mimalloc.h"
15 #include "mimalloc/internal.h"
16 #include "mimalloc/atomic.h"
17 
18 /* -----------------------------------------------------------
19   Definition of page queues for each block size
20 ----------------------------------------------------------- */
21 
22 #define MI_IN_PAGE_C
23 #include "page-queue.c"
24 #undef MI_IN_PAGE_C
25 
26 
27 /* -----------------------------------------------------------
28   Page helpers
29 ----------------------------------------------------------- */
30 
31 // Index a block in a page
mi_page_block_at(const mi_page_t * page,void * page_start,size_t block_size,size_t i)32 static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
33   MI_UNUSED(page);
34   mi_assert_internal(page != NULL);
35   mi_assert_internal(i <= page->reserved);
36   return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
37 }
38 
39 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
40 static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
41 
42 #if (MI_DEBUG>=3)
mi_page_list_count(mi_page_t * page,mi_block_t * head)43 static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
44   size_t count = 0;
45   while (head != NULL) {
46     mi_assert_internal(page == _mi_ptr_page(head));
47     count++;
48     head = mi_block_next(page, head);
49   }
50   return count;
51 }
52 
53 /*
54 // Start of the page available memory
55 static inline uint8_t* mi_page_area(const mi_page_t* page) {
56   return _mi_page_start(_mi_page_segment(page), page, NULL);
57 }
58 */
59 
mi_page_list_is_valid(mi_page_t * page,mi_block_t * p)60 static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
61   size_t psize;
62   uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize);
63   mi_block_t* start = (mi_block_t*)page_area;
64   mi_block_t* end   = (mi_block_t*)(page_area + psize);
65   while(p != NULL) {
66     if (p < start || p >= end) return false;
67     p = mi_block_next(page, p);
68   }
69 #if MI_DEBUG>3 // generally too expensive to check this
70   if (page->free_is_zero) {
71     const size_t ubsize = mi_page_usable_block_size(page);
72     for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
73       mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
74     }
75   }
76 #endif
77   return true;
78 }
79 
mi_page_is_valid_init(mi_page_t * page)80 static bool mi_page_is_valid_init(mi_page_t* page) {
81   mi_assert_internal(page->xblock_size > 0);
82   mi_assert_internal(page->used <= page->capacity);
83   mi_assert_internal(page->capacity <= page->reserved);
84 
85   mi_segment_t* segment = _mi_page_segment(page);
86   uint8_t* start = _mi_page_start(segment,page,NULL);
87   mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
88   //const size_t bsize = mi_page_block_size(page);
89   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
90 
91   mi_assert_internal(mi_page_list_is_valid(page,page->free));
92   mi_assert_internal(mi_page_list_is_valid(page,page->local_free));
93 
94   #if MI_DEBUG>3 // generally too expensive to check this
95   if (page->free_is_zero) {
96     const size_t ubsize = mi_page_usable_block_size(page);
97     for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
98       mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
99     }
100   }
101   #endif
102 
103   #if !MI_TRACK_ENABLED && !MI_TSAN
104   mi_block_t* tfree = mi_page_thread_free(page);
105   mi_assert_internal(mi_page_list_is_valid(page, tfree));
106   //size_t tfree_count = mi_page_list_count(page, tfree);
107   //mi_assert_internal(tfree_count <= page->thread_freed + 1);
108   #endif
109 
110   size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
111   mi_assert_internal(page->used + free_count == page->capacity);
112 
113   return true;
114 }
115 
116 extern bool _mi_process_is_initialized;             // has mi_process_init been called?
117 
_mi_page_is_valid(mi_page_t * page)118 bool _mi_page_is_valid(mi_page_t* page) {
119   mi_assert_internal(mi_page_is_valid_init(page));
120   #if MI_SECURE
121   mi_assert_internal(page->keys[0] != 0);
122   #endif
123   if (mi_page_heap(page)!=NULL) {
124     mi_segment_t* segment = _mi_page_segment(page);
125 
126     mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id);
127     #if MI_HUGE_PAGE_ABANDON
128     if (segment->kind != MI_SEGMENT_HUGE)
129     #endif
130     {
131       mi_page_queue_t* pq = mi_page_queue_of(page);
132       mi_assert_internal(mi_page_queue_contains(pq, page));
133       mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page));
134       mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
135     }
136   }
137   return true;
138 }
139 #endif
140 
_mi_page_use_delayed_free(mi_page_t * page,mi_delayed_t delay,bool override_never)141 void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
142   while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
143     mi_atomic_yield();
144   }
145 }
146 
_mi_page_try_use_delayed_free(mi_page_t * page,mi_delayed_t delay,bool override_never)147 bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
148   mi_thread_free_t tfreex;
149   mi_delayed_t     old_delay;
150   mi_thread_free_t tfree;
151   size_t yield_count = 0;
152   do {
153     tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
154     tfreex = mi_tf_set_delayed(tfree, delay);
155     old_delay = mi_tf_delayed(tfree);
156     if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
157       if (yield_count >= 4) return false;  // give up after 4 tries
158       yield_count++;
159       mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
160       // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
161     }
162     else if (delay == old_delay) {
163       break; // avoid atomic operation if already equal
164     }
165     else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
166       break; // leave never-delayed flag set
167     }
168   } while ((old_delay == MI_DELAYED_FREEING) ||
169            !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
170 
171   return true; // success
172 }
173 
174 /* -----------------------------------------------------------
175   Page collect the `local_free` and `thread_free` lists
176 ----------------------------------------------------------- */
177 
178 // Collect the local `thread_free` list using an atomic exchange.
179 // Note: The exchange must be done atomically as this is used right after
180 // moving to the full list in `mi_page_collect_ex` and we need to
181 // ensure that there was no race where the page became unfull just before the move.
_mi_page_thread_free_collect(mi_page_t * page)182 static void _mi_page_thread_free_collect(mi_page_t* page)
183 {
184   mi_block_t* head;
185   mi_thread_free_t tfreex;
186   mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
187   do {
188     head = mi_tf_block(tfree);
189     tfreex = mi_tf_set_block(tfree,NULL);
190   } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
191 
192   // return if the list is empty
193   if (head == NULL) return;
194 
195   // find the tail -- also to get a proper count (without data races)
196   uint32_t max_count = page->capacity; // cannot collect more than capacity
197   uint32_t count = 1;
198   mi_block_t* tail = head;
199   mi_block_t* next;
200   while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
201     count++;
202     tail = next;
203   }
204   // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
205   if (count > max_count) {
206     _mi_error_message(EFAULT, "corrupted thread-free list\n");
207     return; // the thread-free items cannot be freed
208   }
209 
210   // and append the current local free list
211   mi_block_set_next(page,tail, page->local_free);
212   page->local_free = head;
213 
214   // update counts now
215   page->used -= count;
216 }
217 
_mi_page_free_collect(mi_page_t * page,bool force)218 void _mi_page_free_collect(mi_page_t* page, bool force) {
219   mi_assert_internal(page!=NULL);
220 
221   // collect the thread free list
222   if (force || mi_page_thread_free(page) != NULL) {  // quick test to avoid an atomic operation
223     _mi_page_thread_free_collect(page);
224   }
225 
226   // and the local free list
227   if (page->local_free != NULL) {
228     // any previous QSBR goals are no longer valid because we reused the page
229     _PyMem_mi_page_clear_qsbr(page);
230 
231     if mi_likely(page->free == NULL) {
232       // usual case
233       page->free = page->local_free;
234       page->local_free = NULL;
235       page->free_is_zero = false;
236     }
237     else if (force) {
238       // append -- only on shutdown (force) as this is a linear operation
239       mi_block_t* tail = page->local_free;
240       mi_block_t* next;
241       while ((next = mi_block_next(page, tail)) != NULL) {
242         tail = next;
243       }
244       mi_block_set_next(page, tail, page->free);
245       page->free = page->local_free;
246       page->local_free = NULL;
247       page->free_is_zero = false;
248     }
249   }
250 
251   mi_assert_internal(!force || page->local_free == NULL);
252 }
253 
254 
255 
256 /* -----------------------------------------------------------
257   Page fresh and retire
258 ----------------------------------------------------------- */
259 
260 // called from segments when reclaiming abandoned pages
_mi_page_reclaim(mi_heap_t * heap,mi_page_t * page)261 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
262   mi_assert_expensive(mi_page_is_valid_init(page));
263 
264   mi_assert_internal(mi_page_heap(page) == heap);
265   mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
266   #if MI_HUGE_PAGE_ABANDON
267   mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
268   #endif
269 
270   // TODO: push on full queue immediately if it is full?
271   mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
272   mi_page_queue_push(heap, pq, page);
273   _PyMem_mi_page_reclaimed(page);
274   mi_assert_expensive(_mi_page_is_valid(page));
275 }
276 
277 // allocate a fresh page from a segment
mi_page_fresh_alloc(mi_heap_t * heap,mi_page_queue_t * pq,size_t block_size,size_t page_alignment)278 static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
279   #if !MI_HUGE_PAGE_ABANDON
280   mi_assert_internal(pq != NULL);
281   mi_assert_internal(mi_heap_contains_queue(heap, pq));
282   mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size);
283   #endif
284   mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
285   if (page == NULL) {
286     // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
287     return NULL;
288   }
289   mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
290   mi_assert_internal(pq!=NULL || page->xblock_size != 0);
291   mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
292   // a fresh page was found, initialize it
293   const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
294   mi_assert_internal(full_block_size >= block_size);
295   mi_page_init(heap, page, full_block_size, heap->tld);
296   mi_heap_stat_increase(heap, pages, 1);
297   if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
298   mi_assert_expensive(_mi_page_is_valid(page));
299   return page;
300 }
301 
302 // Get a fresh page to use
mi_page_fresh(mi_heap_t * heap,mi_page_queue_t * pq)303 static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
304   mi_assert_internal(mi_heap_contains_queue(heap, pq));
305   mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
306   if (page==NULL) return NULL;
307   mi_assert_internal(pq->block_size==mi_page_block_size(page));
308   mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
309   return page;
310 }
311 
312 /* -----------------------------------------------------------
313    Do any delayed frees
314    (put there by other threads if they deallocated in a full page)
315 ----------------------------------------------------------- */
_mi_heap_delayed_free_all(mi_heap_t * heap)316 void _mi_heap_delayed_free_all(mi_heap_t* heap) {
317   while (!_mi_heap_delayed_free_partial(heap)) {
318     mi_atomic_yield();
319   }
320 }
321 
322 // returns true if all delayed frees were processed
_mi_heap_delayed_free_partial(mi_heap_t * heap)323 bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
324   // take over the list (note: no atomic exchange since it is often NULL)
325   mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
326   while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
327   bool all_freed = true;
328 
329   // and free them all
330   while(block != NULL) {
331     mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
332     // use internal free instead of regular one to keep stats etc correct
333     if (!_mi_free_delayed_block(block)) {
334       // we might already start delayed freeing while another thread has not yet
335       // reset the delayed_freeing flag; in that case delay it further by reinserting the current block
336       // into the delayed free list
337       all_freed = false;
338       mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
339       do {
340         mi_block_set_nextx(heap, block, dfree, heap->keys);
341       } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
342     }
343     block = next;
344   }
345   return all_freed;
346 }
347 
348 /* -----------------------------------------------------------
349   Unfull, abandon, free and retire
350 ----------------------------------------------------------- */
351 
352 // Move a page from the full list back to a regular list
_mi_page_unfull(mi_page_t * page)353 void _mi_page_unfull(mi_page_t* page) {
354   mi_assert_internal(page != NULL);
355   mi_assert_expensive(_mi_page_is_valid(page));
356   mi_assert_internal(mi_page_is_in_full(page));
357   if (!mi_page_is_in_full(page)) return;
358 
359   mi_heap_t* heap = mi_page_heap(page);
360   mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
361   mi_page_set_in_full(page, false); // to get the right queue
362   mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
363   mi_page_set_in_full(page, true);
364   mi_page_queue_enqueue_from(pq, pqfull, page);
365 }
366 
mi_page_to_full(mi_page_t * page,mi_page_queue_t * pq)367 static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
368   mi_assert_internal(pq == mi_page_queue_of(page));
369   mi_assert_internal(!mi_page_immediate_available(page));
370   mi_assert_internal(!mi_page_is_in_full(page));
371 
372   if (mi_page_is_in_full(page)) return;
373   mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
374   _mi_page_free_collect(page,false);  // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
375 }
376 
377 
378 // Abandon a page with used blocks at the end of a thread.
379 // Note: only call if it is ensured that no references exist from
380 // the `page->heap->thread_delayed_free` into this page.
381 // Currently only called through `mi_heap_collect_ex` which ensures this.
_mi_page_abandon(mi_page_t * page,mi_page_queue_t * pq)382 void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
383   mi_assert_internal(page != NULL);
384   mi_assert_expensive(_mi_page_is_valid(page));
385   mi_assert_internal(pq == mi_page_queue_of(page));
386   mi_assert_internal(mi_page_heap(page) != NULL);
387 
388   mi_heap_t* pheap = mi_page_heap(page);
389 
390 #ifdef Py_GIL_DISABLED
391   if (page->qsbr_node.next != NULL) {
392     // remove from QSBR queue, but keep the goal
393     llist_remove(&page->qsbr_node);
394   }
395 #endif
396 
397   // remove from our page list
398   mi_segments_tld_t* segments_tld = &pheap->tld->segments;
399   mi_page_queue_remove(pq, page);
400 
401   // page is no longer associated with our heap
402   mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
403   mi_page_set_heap(page, NULL);
404 
405 #if (MI_DEBUG>1) && !MI_TRACK_ENABLED
406   // check there are no references left..
407   for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
408     mi_assert_internal(_mi_ptr_page(block) != page);
409   }
410 #endif
411 
412   // and abandon it
413   mi_assert_internal(mi_page_heap(page) == NULL);
414   _mi_segment_page_abandon(page,segments_tld);
415 }
416 
417 
418 // Free a page with no more free blocks
_mi_page_free(mi_page_t * page,mi_page_queue_t * pq,bool force)419 void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
420   mi_assert_internal(page != NULL);
421   mi_assert_expensive(_mi_page_is_valid(page));
422   mi_assert_internal(pq == mi_page_queue_of(page));
423   mi_assert_internal(mi_page_all_free(page));
424   mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
425 
426   // no more aligned blocks in here
427   mi_page_set_has_aligned(page, false);
428 
429   mi_heap_t* heap = mi_page_heap(page);
430 
431 #ifdef Py_GIL_DISABLED
432   mi_assert_internal(page->qsbr_goal == 0);
433   mi_assert_internal(page->qsbr_node.next == NULL);
434 #endif
435 
436   // remove from the page list
437   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
438   mi_segments_tld_t* segments_tld = &heap->tld->segments;
439   mi_page_queue_remove(pq, page);
440 
441   // and free it
442   mi_page_set_heap(page,NULL);
443   _mi_segment_page_free(page, force, segments_tld);
444 }
445 
446 // Retire parameters
447 #define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
448 #define MI_RETIRE_CYCLES      (16)
449 
450 // Retire a page with no more used blocks
451 // Important to not retire too quickly though as new
452 // allocations might coming.
453 // Note: called from `mi_free` and benchmarks often
454 // trigger this due to freeing everything and then
455 // allocating again so careful when changing this.
_mi_page_retire(mi_page_t * page)456 void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
457   mi_assert_internal(page != NULL);
458   mi_assert_expensive(_mi_page_is_valid(page));
459   mi_assert_internal(mi_page_all_free(page));
460 
461   mi_page_set_has_aligned(page, false);
462 
463   // any previous QSBR goals are no longer valid because we reused the page
464   _PyMem_mi_page_clear_qsbr(page);
465 
466   // don't retire too often..
467   // (or we end up retiring and re-allocating most of the time)
468   // NOTE: refine this more: we should not retire if this
469   // is the only page left with free blocks. It is not clear
470   // how to check this efficiently though...
471   // for now, we don't retire if it is the only page left of this size class.
472   mi_page_queue_t* pq = mi_page_queue_of(page);
473   if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) {  // not too large && not full or huge queue?
474     if (pq->last==page && pq->first==page) { // the only page in the queue?
475       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
476       page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
477       mi_heap_t* heap = mi_page_heap(page);
478       mi_assert_internal(pq >= heap->pages);
479       const size_t index = pq - heap->pages;
480       mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE);
481       if (index < heap->page_retired_min) heap->page_retired_min = index;
482       if (index > heap->page_retired_max) heap->page_retired_max = index;
483       mi_assert_internal(mi_page_all_free(page));
484       return; // dont't free after all
485     }
486   }
487   _PyMem_mi_page_maybe_free(page, pq, false);
488 }
489 
490 // free retired pages: we don't need to look at the entire queues
491 // since we only retire pages that are at the head position in a queue.
_mi_heap_collect_retired(mi_heap_t * heap,bool force)492 void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
493   size_t min = MI_BIN_FULL;
494   size_t max = 0;
495   for(size_t bin = heap->page_retired_min; bin <= heap->page_retired_max; bin++) {
496     mi_page_queue_t* pq   = &heap->pages[bin];
497     mi_page_t*       page = pq->first;
498     if (page != NULL && page->retire_expire != 0) {
499       if (mi_page_all_free(page)) {
500         page->retire_expire--;
501         if (force || page->retire_expire == 0) {
502 #ifdef Py_GIL_DISABLED
503           mi_assert_internal(page->qsbr_goal == 0);
504 #endif
505           _PyMem_mi_page_maybe_free(page, pq, force);
506         }
507         else {
508           // keep retired, update min/max
509           if (bin < min) min = bin;
510           if (bin > max) max = bin;
511         }
512       }
513       else {
514         page->retire_expire = 0;
515       }
516     }
517   }
518   heap->page_retired_min = min;
519   heap->page_retired_max = max;
520 }
521 
522 
523 /* -----------------------------------------------------------
524   Initialize the initial free list in a page.
525   In secure mode we initialize a randomized list by
526   alternating between slices.
527 ----------------------------------------------------------- */
528 
529 #define MI_MAX_SLICE_SHIFT  (6)   // at most 64 slices
530 #define MI_MAX_SLICES       (1UL << MI_MAX_SLICE_SHIFT)
531 #define MI_MIN_SLICES       (2)
532 
mi_page_free_list_extend_secure(mi_heap_t * const heap,mi_page_t * const page,const size_t bsize,const size_t extend,mi_stats_t * const stats)533 static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
534   MI_UNUSED(stats);
535   #if (MI_SECURE<=2)
536   mi_assert_internal(page->free == NULL);
537   mi_assert_internal(page->local_free == NULL);
538   #endif
539   mi_assert_internal(page->capacity + extend <= page->reserved);
540   mi_assert_internal(bsize == mi_page_block_size(page));
541   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
542 
543   // initialize a randomized free list
544   // set up `slice_count` slices to alternate between
545   size_t shift = MI_MAX_SLICE_SHIFT;
546   while ((extend >> shift) == 0) {
547     shift--;
548   }
549   const size_t slice_count = (size_t)1U << shift;
550   const size_t slice_extend = extend / slice_count;
551   mi_assert_internal(slice_extend >= 1);
552   mi_block_t* blocks[MI_MAX_SLICES];   // current start of the slice
553   size_t      counts[MI_MAX_SLICES];   // available objects in the slice
554   for (size_t i = 0; i < slice_count; i++) {
555     blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend);
556     counts[i] = slice_extend;
557   }
558   counts[slice_count-1] += (extend % slice_count);  // final slice holds the modulus too (todo: distribute evenly?)
559 
560   // and initialize the free list by randomly threading through them
561   // set up first element
562   const uintptr_t r = _mi_heap_random_next(heap);
563   size_t current = r % slice_count;
564   counts[current]--;
565   mi_block_t* const free_start = blocks[current];
566   // and iterate through the rest; use `random_shuffle` for performance
567   uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
568   for (size_t i = 1; i < extend; i++) {
569     // call random_shuffle only every INTPTR_SIZE rounds
570     const size_t round = i%MI_INTPTR_SIZE;
571     if (round == 0) rnd = _mi_random_shuffle(rnd);
572     // select a random next slice index
573     size_t next = ((rnd >> 8*round) & (slice_count-1));
574     while (counts[next]==0) {                            // ensure it still has space
575       next++;
576       if (next==slice_count) next = 0;
577     }
578     // and link the current block to it
579     counts[next]--;
580     mi_block_t* const block = blocks[current];
581     blocks[current] = (mi_block_t*)((uint8_t*)block + bsize);  // bump to the following block
582     mi_block_set_next(page, block, blocks[next]);   // and set next; note: we may have `current == next`
583     current = next;
584   }
585   // prepend to the free list (usually NULL)
586   mi_block_set_next(page, blocks[current], page->free);  // end of the list
587   page->free = free_start;
588 }
589 
mi_page_free_list_extend(mi_page_t * const page,const size_t bsize,const size_t extend,mi_stats_t * const stats)590 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
591 {
592   MI_UNUSED(stats);
593   #if (MI_SECURE <= 2)
594   mi_assert_internal(page->free == NULL);
595   mi_assert_internal(page->local_free == NULL);
596   #endif
597   mi_assert_internal(page->capacity + extend <= page->reserved);
598   mi_assert_internal(bsize == mi_page_block_size(page));
599   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
600 
601   mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
602 
603   // initialize a sequential free list
604   mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1);
605   mi_block_t* block = start;
606   while(block <= last) {
607     mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
608     mi_block_set_next(page,block,next);
609     block = next;
610   }
611   // prepend to free list (usually `NULL`)
612   mi_block_set_next(page, last, page->free);
613   page->free = start;
614 }
615 
616 /* -----------------------------------------------------------
617   Page initialize and extend the capacity
618 ----------------------------------------------------------- */
619 
620 #define MI_MAX_EXTEND_SIZE    (4*1024)      // heuristic, one OS page seems to work well.
621 #if (MI_SECURE>0)
622 #define MI_MIN_EXTEND         (8*MI_SECURE) // extend at least by this many
623 #else
624 #define MI_MIN_EXTEND         (4)
625 #endif
626 
627 // Extend the capacity (up to reserved) by initializing a free list
628 // We do at most `MI_MAX_EXTEND` to avoid touching too much memory
629 // Note: we also experimented with "bump" allocation on the first
630 // allocations but this did not speed up any benchmark (due to an
631 // extra test in malloc? or cache effects?)
mi_page_extend_free(mi_heap_t * heap,mi_page_t * page,mi_tld_t * tld)632 static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
633   MI_UNUSED(tld);
634   mi_assert_expensive(mi_page_is_valid_init(page));
635   #if (MI_SECURE<=2)
636   mi_assert(page->free == NULL);
637   mi_assert(page->local_free == NULL);
638   if (page->free != NULL) return;
639   #endif
640   if (page->capacity >= page->reserved) return;
641 
642   size_t page_size;
643   _mi_page_start(_mi_page_segment(page), page, &page_size);
644   mi_stat_counter_increase(tld->stats.pages_extended, 1);
645 
646   // calculate the extend count
647   const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
648   size_t extend = page->reserved - page->capacity;
649   mi_assert_internal(extend > 0);
650 
651   size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
652   if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
653   mi_assert_internal(max_extend > 0);
654 
655   if (extend > max_extend) {
656     // ensure we don't touch memory beyond the page to reduce page commit.
657     // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
658     extend = max_extend;
659   }
660 
661   mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
662   mi_assert_internal(extend < (1UL<<16));
663 
664   // and append the extend the free list
665   if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
666     mi_page_free_list_extend(page, bsize, extend, &tld->stats );
667   }
668   else {
669     mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
670   }
671   // enable the new free list
672   page->capacity += (uint16_t)extend;
673   mi_stat_increase(tld->stats.page_committed, extend * bsize);
674   mi_assert_expensive(mi_page_is_valid_init(page));
675 }
676 
677 // Initialize a fresh page
mi_page_init(mi_heap_t * heap,mi_page_t * page,size_t block_size,mi_tld_t * tld)678 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
679   mi_assert(page != NULL);
680   mi_segment_t* segment = _mi_page_segment(page);
681   mi_assert(segment != NULL);
682   mi_assert_internal(block_size > 0);
683   // set fields
684   mi_page_set_heap(page, heap);
685   page->tag = heap->tag;
686   page->use_qsbr = heap->page_use_qsbr;
687   page->debug_offset = heap->debug_offset;
688   page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
689   size_t page_size;
690   const void* page_start = _mi_segment_page_start(segment, page, &page_size);
691   MI_UNUSED(page_start);
692   mi_track_mem_noaccess(page_start,page_size);
693   mi_assert_internal(mi_page_block_size(page) <= page_size);
694   mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
695   mi_assert_internal(page_size / block_size < (1L<<16));
696   page->reserved = (uint16_t)(page_size / block_size);
697   mi_assert_internal(page->reserved > 0);
698   #if (MI_PADDING || MI_ENCODE_FREELIST)
699   page->keys[0] = _mi_heap_random_next(heap);
700   page->keys[1] = _mi_heap_random_next(heap);
701   #endif
702   page->free_is_zero = page->is_zero_init;
703   #if MI_DEBUG>2
704   if (page->is_zero_init) {
705     mi_track_mem_defined(page_start, page_size);
706     mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
707   }
708   #endif
709 
710   mi_assert_internal(page->is_committed);
711   mi_assert_internal(page->capacity == 0);
712   mi_assert_internal(page->free == NULL);
713   mi_assert_internal(page->used == 0);
714   mi_assert_internal(page->xthread_free == 0);
715   mi_assert_internal(page->next == NULL);
716   mi_assert_internal(page->prev == NULL);
717 #ifdef Py_GIL_DISABLED
718   mi_assert_internal(page->qsbr_goal == 0);
719   mi_assert_internal(page->qsbr_node.next == NULL);
720 #endif
721   mi_assert_internal(page->retire_expire == 0);
722   mi_assert_internal(!mi_page_has_aligned(page));
723   #if (MI_PADDING || MI_ENCODE_FREELIST)
724   mi_assert_internal(page->keys[0] != 0);
725   mi_assert_internal(page->keys[1] != 0);
726   #endif
727   mi_assert_expensive(mi_page_is_valid_init(page));
728 
729   // initialize an initial free list
730   mi_page_extend_free(heap,page,tld);
731   mi_assert(mi_page_immediate_available(page));
732 }
733 
734 
735 /* -----------------------------------------------------------
736   Find pages with free blocks
737 -------------------------------------------------------------*/
738 
739 // Find a page with free blocks of `page->block_size`.
mi_page_queue_find_free_ex(mi_heap_t * heap,mi_page_queue_t * pq,bool first_try)740 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
741 {
742   // search through the pages in "next fit" order
743   #if MI_STAT
744   size_t count = 0;
745   #endif
746   mi_page_t* page = pq->first;
747   while (page != NULL)
748   {
749     mi_page_t* next = page->next; // remember next
750     #if MI_STAT
751     count++;
752     #endif
753 
754     // 0. collect freed blocks by us and other threads
755     _mi_page_free_collect(page, false);
756 
757     // 1. if the page contains free blocks, we are done
758     if (mi_page_immediate_available(page)) {
759       break;  // pick this one
760     }
761 
762     // 2. Try to extend
763     if (page->capacity < page->reserved) {
764       mi_page_extend_free(heap, page, heap->tld);
765       mi_assert_internal(mi_page_immediate_available(page));
766       break;
767     }
768 
769     // 3. If the page is completely full, move it to the `mi_pages_full`
770     // queue so we don't visit long-lived pages too often.
771     mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
772     mi_page_to_full(page, pq);
773 
774     page = next;
775   } // for each page
776 
777   mi_heap_stat_counter_increase(heap, searches, count);
778 
779   if (page == NULL) {
780     _PyMem_mi_heap_collect_qsbr(heap); // some pages might be safe to free now
781     _mi_heap_collect_retired(heap, false); // perhaps make a page available?
782     page = mi_page_fresh(heap, pq);
783     if (page == NULL && first_try) {
784       // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
785       page = mi_page_queue_find_free_ex(heap, pq, false);
786     }
787   }
788   else {
789     mi_assert(pq->first == page);
790     page->retire_expire = 0;
791     _PyMem_mi_page_clear_qsbr(page);
792   }
793   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
794   return page;
795 }
796 
797 
798 
799 // Find a page with free blocks of `size`.
mi_find_free_page(mi_heap_t * heap,size_t size)800 static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
801   mi_page_queue_t* pq = mi_page_queue(heap,size);
802   mi_page_t* page = pq->first;
803   if (page != NULL) {
804    #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
805     if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
806       mi_page_extend_free(heap, page, heap->tld);
807       mi_assert_internal(mi_page_immediate_available(page));
808     }
809     else
810    #endif
811     {
812       _mi_page_free_collect(page,false);
813     }
814 
815     if (mi_page_immediate_available(page)) {
816       page->retire_expire = 0;
817       _PyMem_mi_page_clear_qsbr(page);
818       return page; // fast path
819     }
820   }
821   return mi_page_queue_find_free_ex(heap, pq, true);
822 }
823 
824 
825 /* -----------------------------------------------------------
826   Users can register a deferred free function called
827   when the `free` list is empty. Since the `local_free`
828   is separate this is deterministically called after
829   a certain number of allocations.
830 ----------------------------------------------------------- */
831 
832 static mi_deferred_free_fun* volatile deferred_free = NULL;
833 static _Atomic(void*) deferred_arg; // = NULL
834 
_mi_deferred_free(mi_heap_t * heap,bool force)835 void _mi_deferred_free(mi_heap_t* heap, bool force) {
836   heap->tld->heartbeat++;
837   if (deferred_free != NULL && !heap->tld->recurse) {
838     heap->tld->recurse = true;
839     deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg));
840     heap->tld->recurse = false;
841   }
842 }
843 
mi_register_deferred_free(mi_deferred_free_fun * fn,void * arg)844 void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
845   deferred_free = fn;
846   mi_atomic_store_ptr_release(void,&deferred_arg, arg);
847 }
848 
849 
850 /* -----------------------------------------------------------
851   General allocation
852 ----------------------------------------------------------- */
853 
854 // Large and huge page allocation.
855 // Huge pages are allocated directly without being in a queue.
856 // Because huge pages contain just one block, and the segment contains
857 // just that page, we always treat them as abandoned and any thread
858 // that frees the block can free the whole page and segment directly.
859 // Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX).
mi_large_huge_page_alloc(mi_heap_t * heap,size_t size,size_t page_alignment)860 static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
861   size_t block_size = _mi_os_good_alloc_size(size);
862   mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
863   bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0);
864   #if MI_HUGE_PAGE_ABANDON
865   mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
866   #else
867   mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0
868   mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq));
869   #endif
870   mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
871   if (page != NULL) {
872     mi_assert_internal(mi_page_immediate_available(page));
873 
874     if (is_huge) {
875       mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
876       mi_assert_internal(_mi_page_segment(page)->used==1);
877       #if MI_HUGE_PAGE_ABANDON
878       mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
879       mi_page_set_heap(page, NULL);
880       #endif
881     }
882     else {
883       mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
884     }
885 
886     const size_t bsize = mi_page_usable_block_size(page);  // note: not `mi_page_block_size` to account for padding
887     if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
888       mi_heap_stat_increase(heap, large, bsize);
889       mi_heap_stat_counter_increase(heap, large_count, 1);
890     }
891     else {
892       mi_heap_stat_increase(heap, huge, bsize);
893       mi_heap_stat_counter_increase(heap, huge_count, 1);
894     }
895   }
896   return page;
897 }
898 
899 
900 // Allocate a page
901 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
mi_find_page(mi_heap_t * heap,size_t size,size_t huge_alignment)902 static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
903   // huge allocation?
904   const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
905   if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
906     if mi_unlikely(req_size > PTRDIFF_MAX) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
907       _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
908       return NULL;
909     }
910     else {
911       _PyMem_mi_heap_collect_qsbr(heap);
912       return mi_large_huge_page_alloc(heap,size,huge_alignment);
913     }
914   }
915   else {
916     // otherwise find a page with free blocks in our size segregated queues
917     #if MI_PADDING
918     mi_assert_internal(size >= MI_PADDING_SIZE);
919     #endif
920     return mi_find_free_page(heap, size);
921   }
922 }
923 
924 // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
925 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
926 // The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
927 // very large requested alignments in which case we use a huge segment.
_mi_malloc_generic(mi_heap_t * heap,size_t size,bool zero,size_t huge_alignment)928 void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
929 {
930   mi_assert_internal(heap != NULL);
931 
932   // initialize if necessary
933   if mi_unlikely(!mi_heap_is_initialized(heap)) {
934     heap = mi_heap_get_default(); // calls mi_thread_init
935     if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; }
936   }
937   mi_assert_internal(mi_heap_is_initialized(heap));
938 
939   // call potential deferred free routines
940   _mi_deferred_free(heap, false);
941 
942   // free delayed frees from other threads (but skip contended ones)
943   _mi_heap_delayed_free_partial(heap);
944 
945   // find (or allocate) a page of the right size
946   mi_page_t* page = mi_find_page(heap, size, huge_alignment);
947   if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
948     mi_heap_collect(heap, true /* force */);
949     page = mi_find_page(heap, size, huge_alignment);
950   }
951 
952   if mi_unlikely(page == NULL) { // out of memory
953     const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
954     _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
955     return NULL;
956   }
957 
958   mi_assert_internal(mi_page_immediate_available(page));
959   mi_assert_internal(mi_page_block_size(page) >= size);
960 
961   // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
962   if mi_unlikely(zero && page->xblock_size == 0) {
963     // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
964     void* p = _mi_page_malloc(heap, page, size, false);
965     mi_assert_internal(p != NULL);
966     _mi_memzero_aligned(p, mi_page_usable_block_size(page));
967     return p;
968   }
969   else {
970     return _mi_page_malloc(heap, page, size, zero);
971   }
972 }
973