• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Python's malloc wrappers (see pymem.h) */
2 
3 #include "Python.h"
4 #include "pycore_code.h"          // stats
5 #include "pycore_object.h"        // _PyDebugAllocatorStats() definition
6 #include "pycore_obmalloc.h"
7 #include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
8 #include "pycore_pymem.h"
9 #include "pycore_pystate.h"       // _PyInterpreterState_GET
10 #include "pycore_obmalloc_init.h"
11 
12 #include <stdlib.h>               // malloc()
13 #include <stdbool.h>
14 #ifdef WITH_MIMALLOC
15 // Forward declarations of functions used in our mimalloc modifications
16 static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
17 static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
18 static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
19 static void _PyMem_mi_page_reclaimed(mi_page_t *page);
20 static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
21 #  include "pycore_mimalloc.h"
22 #  include "mimalloc/static.c"
23 #  include "mimalloc/internal.h"  // for stats
24 #endif
25 
26 #if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
27 #  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
28 #endif
29 
30 #undef  uint
31 #define uint pymem_uint
32 
33 
34 /* Defined in tracemalloc.c */
35 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
36 
37 static void _PyObject_DebugDumpAddress(const void *p);
38 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
39 
40 
41 static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
42 static void set_up_debug_hooks_unlocked(void);
43 static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
44 static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
45 
46 
47 /***************************************/
48 /* low-level allocator implementations */
49 /***************************************/
50 
51 /* the default raw allocator (wraps malloc) */
52 
53 void *
_PyMem_RawMalloc(void * Py_UNUSED (ctx),size_t size)54 _PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
55 {
56     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
57        for malloc(0), which would be treated as an error. Some platforms would
58        return a pointer with no memory behind it, which would break pymalloc.
59        To solve these problems, allocate an extra byte. */
60     if (size == 0)
61         size = 1;
62     return malloc(size);
63 }
64 
65 void *
_PyMem_RawCalloc(void * Py_UNUSED (ctx),size_t nelem,size_t elsize)66 _PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
67 {
68     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
69        for calloc(0, 0), which would be treated as an error. Some platforms
70        would return a pointer with no memory behind it, which would break
71        pymalloc.  To solve these problems, allocate an extra byte. */
72     if (nelem == 0 || elsize == 0) {
73         nelem = 1;
74         elsize = 1;
75     }
76     return calloc(nelem, elsize);
77 }
78 
79 void *
_PyMem_RawRealloc(void * Py_UNUSED (ctx),void * ptr,size_t size)80 _PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
81 {
82     if (size == 0)
83         size = 1;
84     return realloc(ptr, size);
85 }
86 
87 void
_PyMem_RawFree(void * Py_UNUSED (ctx),void * ptr)88 _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
89 {
90     free(ptr);
91 }
92 
93 #ifdef WITH_MIMALLOC
94 
95 static void
_PyMem_mi_page_clear_qsbr(mi_page_t * page)96 _PyMem_mi_page_clear_qsbr(mi_page_t *page)
97 {
98 #ifdef Py_GIL_DISABLED
99     // Clear the QSBR goal and remove the page from the QSBR linked list.
100     page->qsbr_goal = 0;
101     if (page->qsbr_node.next != NULL) {
102         llist_remove(&page->qsbr_node);
103     }
104 #endif
105 }
106 
107 // Check if an empty, newly reclaimed page is safe to free now.
108 static bool
_PyMem_mi_page_is_safe_to_free(mi_page_t * page)109 _PyMem_mi_page_is_safe_to_free(mi_page_t *page)
110 {
111     assert(mi_page_all_free(page));
112 #ifdef Py_GIL_DISABLED
113     assert(page->qsbr_node.next == NULL);
114     if (page->use_qsbr && page->qsbr_goal != 0) {
115         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
116         if (tstate == NULL) {
117             return false;
118         }
119         return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
120     }
121 #endif
122     return true;
123 
124 }
125 
126 static bool
_PyMem_mi_page_maybe_free(mi_page_t * page,mi_page_queue_t * pq,bool force)127 _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
128 {
129 #ifdef Py_GIL_DISABLED
130     assert(mi_page_all_free(page));
131     if (page->use_qsbr) {
132         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
133         if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
134             _PyMem_mi_page_clear_qsbr(page);
135             _mi_page_free(page, pq, force);
136             return true;
137         }
138 
139         _PyMem_mi_page_clear_qsbr(page);
140         page->retire_expire = 0;
141         page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
142         llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
143         return false;
144     }
145 #endif
146     _mi_page_free(page, pq, force);
147     return true;
148 }
149 
150 static void
_PyMem_mi_page_reclaimed(mi_page_t * page)151 _PyMem_mi_page_reclaimed(mi_page_t *page)
152 {
153 #ifdef Py_GIL_DISABLED
154     assert(page->qsbr_node.next == NULL);
155     if (page->qsbr_goal != 0) {
156         if (mi_page_all_free(page)) {
157             assert(page->qsbr_node.next == NULL);
158             _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
159             page->retire_expire = 0;
160             llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
161         }
162         else {
163             page->qsbr_goal = 0;
164         }
165     }
166 #endif
167 }
168 
169 static void
_PyMem_mi_heap_collect_qsbr(mi_heap_t * heap)170 _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
171 {
172 #ifdef Py_GIL_DISABLED
173     if (!heap->page_use_qsbr) {
174         return;
175     }
176 
177     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
178     struct llist_node *head = &tstate->mimalloc.page_list;
179     if (llist_empty(head)) {
180         return;
181     }
182 
183     struct llist_node *node;
184     llist_for_each_safe(node, head) {
185         mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
186         if (!mi_page_all_free(page)) {
187             // We allocated from this page some point after the delayed free
188             _PyMem_mi_page_clear_qsbr(page);
189             continue;
190         }
191 
192         if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
193             return;
194         }
195 
196         _PyMem_mi_page_clear_qsbr(page);
197         _mi_page_free(page, mi_page_queue_of(page), false);
198     }
199 #endif
200 }
201 
202 void *
_PyMem_MiMalloc(void * ctx,size_t size)203 _PyMem_MiMalloc(void *ctx, size_t size)
204 {
205 #ifdef Py_GIL_DISABLED
206     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
207     mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
208     return mi_heap_malloc(heap, size);
209 #else
210     return mi_malloc(size);
211 #endif
212 }
213 
214 void *
_PyMem_MiCalloc(void * ctx,size_t nelem,size_t elsize)215 _PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
216 {
217 #ifdef Py_GIL_DISABLED
218     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
219     mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
220     return mi_heap_calloc(heap, nelem, elsize);
221 #else
222     return mi_calloc(nelem, elsize);
223 #endif
224 }
225 
226 void *
_PyMem_MiRealloc(void * ctx,void * ptr,size_t size)227 _PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
228 {
229 #ifdef Py_GIL_DISABLED
230     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
231     mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
232     return mi_heap_realloc(heap, ptr, size);
233 #else
234     return mi_realloc(ptr, size);
235 #endif
236 }
237 
238 void
_PyMem_MiFree(void * ctx,void * ptr)239 _PyMem_MiFree(void *ctx, void *ptr)
240 {
241     mi_free(ptr);
242 }
243 
244 void *
_PyObject_MiMalloc(void * ctx,size_t nbytes)245 _PyObject_MiMalloc(void *ctx, size_t nbytes)
246 {
247 #ifdef Py_GIL_DISABLED
248     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
249     mi_heap_t *heap = tstate->mimalloc.current_object_heap;
250     return mi_heap_malloc(heap, nbytes);
251 #else
252     return mi_malloc(nbytes);
253 #endif
254 }
255 
256 void *
_PyObject_MiCalloc(void * ctx,size_t nelem,size_t elsize)257 _PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
258 {
259 #ifdef Py_GIL_DISABLED
260     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
261     mi_heap_t *heap = tstate->mimalloc.current_object_heap;
262     return mi_heap_calloc(heap, nelem, elsize);
263 #else
264     return mi_calloc(nelem, elsize);
265 #endif
266 }
267 
268 
269 void *
_PyObject_MiRealloc(void * ctx,void * ptr,size_t nbytes)270 _PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
271 {
272 #ifdef Py_GIL_DISABLED
273     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
274     mi_heap_t *heap = tstate->mimalloc.current_object_heap;
275     return mi_heap_realloc(heap, ptr, nbytes);
276 #else
277     return mi_realloc(ptr, nbytes);
278 #endif
279 }
280 
281 void
_PyObject_MiFree(void * ctx,void * ptr)282 _PyObject_MiFree(void *ctx, void *ptr)
283 {
284     mi_free(ptr);
285 }
286 
287 #endif // WITH_MIMALLOC
288 
289 
290 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
291 
292 
293 #ifdef WITH_MIMALLOC
294 #  define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
295 #  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
296 #endif
297 
298 /* the pymalloc allocator */
299 
300 // The actual implementation is further down.
301 
302 #if defined(WITH_PYMALLOC)
303 void* _PyObject_Malloc(void *ctx, size_t size);
304 void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
305 void _PyObject_Free(void *ctx, void *p);
306 void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
307 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
308 #endif  // WITH_PYMALLOC
309 
310 #if defined(Py_GIL_DISABLED)
311 // Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
312 #  define PYRAW_ALLOC MALLOC_ALLOC
313 #  define PYMEM_ALLOC MIMALLOC_ALLOC
314 #  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
315 #elif defined(WITH_PYMALLOC)
316 #  define PYRAW_ALLOC MALLOC_ALLOC
317 #  define PYMEM_ALLOC PYMALLOC_ALLOC
318 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
319 #else
320 #  define PYRAW_ALLOC MALLOC_ALLOC
321 #  define PYMEM_ALLOC MALLOC_ALLOC
322 #  define PYOBJ_ALLOC MALLOC_ALLOC
323 #endif
324 
325 
326 /* the default debug allocators */
327 
328 // The actual implementation is further down.
329 
330 void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
331 void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
332 void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
333 void _PyMem_DebugRawFree(void *ctx, void *ptr);
334 
335 void* _PyMem_DebugMalloc(void *ctx, size_t size);
336 void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
337 void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
338 void _PyMem_DebugFree(void *ctx, void *p);
339 
340 #define PYDBGRAW_ALLOC \
341     {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
342 #define PYDBGMEM_ALLOC \
343     {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
344 #define PYDBGOBJ_ALLOC \
345     {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
346 
347 /* the low-level virtual memory allocator */
348 
349 #ifdef WITH_PYMALLOC
350 #  ifdef MS_WINDOWS
351 #    include <windows.h>
352 #  elif defined(HAVE_MMAP)
353 #    include <sys/mman.h>
354 #    ifdef MAP_ANONYMOUS
355 #      define ARENAS_USE_MMAP
356 #    endif
357 #  endif
358 #endif
359 
360 void *
_PyMem_ArenaAlloc(void * Py_UNUSED (ctx),size_t size)361 _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
362 {
363 #ifdef MS_WINDOWS
364     return VirtualAlloc(NULL, size,
365                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
366 #elif defined(ARENAS_USE_MMAP)
367     void *ptr;
368     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
369                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
370     if (ptr == MAP_FAILED)
371         return NULL;
372     assert(ptr != NULL);
373     return ptr;
374 #else
375     return malloc(size);
376 #endif
377 }
378 
379 void
_PyMem_ArenaFree(void * Py_UNUSED (ctx),void * ptr,size_t size)380 _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
381 #if defined(ARENAS_USE_MMAP)
382     size_t size
383 #else
384     size_t Py_UNUSED(size)
385 #endif
386 )
387 {
388 #ifdef MS_WINDOWS
389     /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
390     if (ptr == NULL) {
391         return;
392     }
393     VirtualFree(ptr, 0, MEM_RELEASE);
394 #elif defined(ARENAS_USE_MMAP)
395     /* Unlike free(), munmap() does not special-case NULL to noop. */
396     if (ptr == NULL) {
397         return;
398     }
399     munmap(ptr, size);
400 #else
401     free(ptr);
402 #endif
403 }
404 
405 /*******************************************/
406 /* end low-level allocator implementations */
407 /*******************************************/
408 
409 
410 #if defined(__has_feature)  /* Clang */
411 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
412 #    define _Py_NO_SANITIZE_ADDRESS \
413         __attribute__((no_sanitize("address")))
414 #  endif
415 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
416 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
417 #  endif
418 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
419 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
420 #  endif
421 #elif defined(__GNUC__)
422 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
423 #    define _Py_NO_SANITIZE_ADDRESS \
424         __attribute__((no_sanitize_address))
425 #  endif
426    // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
427    // is provided only since GCC 7.
428 #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
429 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
430 #  endif
431 #endif
432 
433 #ifndef _Py_NO_SANITIZE_ADDRESS
434 #  define _Py_NO_SANITIZE_ADDRESS
435 #endif
436 #ifndef _Py_NO_SANITIZE_THREAD
437 #  define _Py_NO_SANITIZE_THREAD
438 #endif
439 #ifndef _Py_NO_SANITIZE_MEMORY
440 #  define _Py_NO_SANITIZE_MEMORY
441 #endif
442 
443 
444 #define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
445 #define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
446 #define _PyMem (_PyRuntime.allocators.standard.mem)
447 #define _PyObject (_PyRuntime.allocators.standard.obj)
448 #define _PyMem_Debug (_PyRuntime.allocators.debug)
449 #define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
450 
451 
452 /***************************/
453 /* managing the allocators */
454 /***************************/
455 
456 static int
set_default_allocator_unlocked(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)457 set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
458                                PyMemAllocatorEx *old_alloc)
459 {
460     if (old_alloc != NULL) {
461         get_allocator_unlocked(domain, old_alloc);
462     }
463 
464 
465     PyMemAllocatorEx new_alloc;
466     switch(domain)
467     {
468     case PYMEM_DOMAIN_RAW:
469         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
470         break;
471     case PYMEM_DOMAIN_MEM:
472         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
473         break;
474     case PYMEM_DOMAIN_OBJ:
475         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
476         break;
477     default:
478         /* unknown domain */
479         return -1;
480     }
481     set_allocator_unlocked(domain, &new_alloc);
482     if (debug) {
483         set_up_debug_hooks_domain_unlocked(domain);
484     }
485     return 0;
486 }
487 
488 
489 #ifdef Py_DEBUG
490 static const int pydebug = 1;
491 #else
492 static const int pydebug = 0;
493 #endif
494 
495 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)496 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
497                            PyMemAllocatorEx *old_alloc)
498 {
499     PyMutex_Lock(&ALLOCATORS_MUTEX);
500     int res = set_default_allocator_unlocked(domain, pydebug, old_alloc);
501     PyMutex_Unlock(&ALLOCATORS_MUTEX);
502     return res;
503 }
504 
505 
506 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)507 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
508 {
509     if (name == NULL || *name == '\0') {
510         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
511            nameions): use default memory allocators */
512         *allocator = PYMEM_ALLOCATOR_DEFAULT;
513     }
514     else if (strcmp(name, "default") == 0) {
515         *allocator = PYMEM_ALLOCATOR_DEFAULT;
516     }
517     else if (strcmp(name, "debug") == 0) {
518         *allocator = PYMEM_ALLOCATOR_DEBUG;
519     }
520 #if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
521     else if (strcmp(name, "pymalloc") == 0) {
522         *allocator = PYMEM_ALLOCATOR_PYMALLOC;
523     }
524     else if (strcmp(name, "pymalloc_debug") == 0) {
525         *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
526     }
527 #endif
528 #ifdef WITH_MIMALLOC
529     else if (strcmp(name, "mimalloc") == 0) {
530         *allocator = PYMEM_ALLOCATOR_MIMALLOC;
531     }
532     else if (strcmp(name, "mimalloc_debug") == 0) {
533         *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
534     }
535 #endif
536 #ifndef Py_GIL_DISABLED
537     else if (strcmp(name, "malloc") == 0) {
538         *allocator = PYMEM_ALLOCATOR_MALLOC;
539     }
540     else if (strcmp(name, "malloc_debug") == 0) {
541         *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
542     }
543 #endif
544     else {
545         /* unknown allocator */
546         return -1;
547     }
548     return 0;
549 }
550 
551 
552 static int
set_up_allocators_unlocked(PyMemAllocatorName allocator)553 set_up_allocators_unlocked(PyMemAllocatorName allocator)
554 {
555     switch (allocator) {
556     case PYMEM_ALLOCATOR_NOT_SET:
557         /* do nothing */
558         break;
559 
560     case PYMEM_ALLOCATOR_DEFAULT:
561         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
562         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
563         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
564         _PyRuntime.allocators.is_debug_enabled = pydebug;
565         break;
566 
567     case PYMEM_ALLOCATOR_DEBUG:
568         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
569         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
570         (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
571         _PyRuntime.allocators.is_debug_enabled = 1;
572         break;
573 
574 #ifdef WITH_PYMALLOC
575     case PYMEM_ALLOCATOR_PYMALLOC:
576     case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
577     {
578         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
579         set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
580 
581         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
582         set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
583         set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
584 
585         int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
586         _PyRuntime.allocators.is_debug_enabled = is_debug;
587         if (is_debug) {
588             set_up_debug_hooks_unlocked();
589         }
590         break;
591     }
592 #endif
593 #ifdef WITH_MIMALLOC
594     case PYMEM_ALLOCATOR_MIMALLOC:
595     case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
596     {
597         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
598         set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
599 
600         PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
601         set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
602 
603         PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
604         set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
605 
606         int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
607         _PyRuntime.allocators.is_debug_enabled = is_debug;
608         if (is_debug) {
609             set_up_debug_hooks_unlocked();
610         }
611 
612         break;
613     }
614 #endif
615 
616     case PYMEM_ALLOCATOR_MALLOC:
617     case PYMEM_ALLOCATOR_MALLOC_DEBUG:
618     {
619         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
620         set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
621         set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
622         set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
623 
624         int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
625         _PyRuntime.allocators.is_debug_enabled = is_debug;
626         if (is_debug) {
627             set_up_debug_hooks_unlocked();
628         }
629         break;
630     }
631 
632     default:
633         /* unknown allocator */
634         return -1;
635     }
636 
637     return 0;
638 }
639 
640 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)641 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
642 {
643     PyMutex_Lock(&ALLOCATORS_MUTEX);
644     int res = set_up_allocators_unlocked(allocator);
645     PyMutex_Unlock(&ALLOCATORS_MUTEX);
646     return res;
647 }
648 
649 
650 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)651 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
652 {
653     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
654 }
655 
656 
657 static const char*
get_current_allocator_name_unlocked(void)658 get_current_allocator_name_unlocked(void)
659 {
660     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
661 #ifdef WITH_PYMALLOC
662     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
663 #endif
664 #ifdef WITH_MIMALLOC
665     PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
666     PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
667 #endif
668 
669     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
670         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
671         pymemallocator_eq(&_PyObject, &malloc_alloc))
672     {
673         return "malloc";
674     }
675 #ifdef WITH_PYMALLOC
676     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
677         pymemallocator_eq(&_PyMem, &pymalloc) &&
678         pymemallocator_eq(&_PyObject, &pymalloc))
679     {
680         return "pymalloc";
681     }
682 #endif
683 #ifdef WITH_MIMALLOC
684     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
685         pymemallocator_eq(&_PyMem, &mimalloc) &&
686         pymemallocator_eq(&_PyObject, &mimalloc_obj))
687     {
688         return "mimalloc";
689     }
690 #endif
691 
692     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
693     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
694     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
695 
696     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
697         pymemallocator_eq(&_PyMem, &dbg_mem) &&
698         pymemallocator_eq(&_PyObject, &dbg_obj))
699     {
700         /* Debug hooks installed */
701         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
702             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
703             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
704         {
705             return "malloc_debug";
706         }
707 #ifdef WITH_PYMALLOC
708         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
709             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
710             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
711         {
712             return "pymalloc_debug";
713         }
714 #endif
715 #ifdef WITH_MIMALLOC
716         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
717             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
718             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
719         {
720             return "mimalloc_debug";
721         }
722 #endif
723     }
724     return NULL;
725 }
726 
727 const char*
_PyMem_GetCurrentAllocatorName(void)728 _PyMem_GetCurrentAllocatorName(void)
729 {
730     PyMutex_Lock(&ALLOCATORS_MUTEX);
731     const char *name = get_current_allocator_name_unlocked();
732     PyMutex_Unlock(&ALLOCATORS_MUTEX);
733     return name;
734 }
735 
736 
737 int
_PyMem_DebugEnabled(void)738 _PyMem_DebugEnabled(void)
739 {
740     return _PyRuntime.allocators.is_debug_enabled;
741 }
742 
743 #ifdef WITH_PYMALLOC
744 static int
_PyMem_PymallocEnabled(void)745 _PyMem_PymallocEnabled(void)
746 {
747     if (_PyMem_DebugEnabled()) {
748         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
749     }
750     else {
751         return (_PyObject.malloc == _PyObject_Malloc);
752     }
753 }
754 
755 #ifdef WITH_MIMALLOC
756 static int
_PyMem_MimallocEnabled(void)757 _PyMem_MimallocEnabled(void)
758 {
759 #ifdef Py_GIL_DISABLED
760     return 1;
761 #else
762     if (_PyMem_DebugEnabled()) {
763         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
764     }
765     else {
766         return (_PyObject.malloc == _PyObject_MiMalloc);
767     }
768 #endif
769 }
770 #endif  // WITH_MIMALLOC
771 
772 #endif  // WITH_PYMALLOC
773 
774 
775 static void
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)776 set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
777 {
778     PyMemAllocatorEx alloc;
779 
780     if (domain == PYMEM_DOMAIN_RAW) {
781         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
782             return;
783         }
784 
785         get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
786         alloc.ctx = &_PyMem_Debug.raw;
787         alloc.malloc = _PyMem_DebugRawMalloc;
788         alloc.calloc = _PyMem_DebugRawCalloc;
789         alloc.realloc = _PyMem_DebugRawRealloc;
790         alloc.free = _PyMem_DebugRawFree;
791         set_allocator_unlocked(domain, &alloc);
792     }
793     else if (domain == PYMEM_DOMAIN_MEM) {
794         if (_PyMem.malloc == _PyMem_DebugMalloc) {
795             return;
796         }
797 
798         get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
799         alloc.ctx = &_PyMem_Debug.mem;
800         alloc.malloc = _PyMem_DebugMalloc;
801         alloc.calloc = _PyMem_DebugCalloc;
802         alloc.realloc = _PyMem_DebugRealloc;
803         alloc.free = _PyMem_DebugFree;
804         set_allocator_unlocked(domain, &alloc);
805     }
806     else if (domain == PYMEM_DOMAIN_OBJ)  {
807         if (_PyObject.malloc == _PyMem_DebugMalloc) {
808             return;
809         }
810 
811         get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
812         alloc.ctx = &_PyMem_Debug.obj;
813         alloc.malloc = _PyMem_DebugMalloc;
814         alloc.calloc = _PyMem_DebugCalloc;
815         alloc.realloc = _PyMem_DebugRealloc;
816         alloc.free = _PyMem_DebugFree;
817         set_allocator_unlocked(domain, &alloc);
818     }
819 }
820 
821 
822 static void
set_up_debug_hooks_unlocked(void)823 set_up_debug_hooks_unlocked(void)
824 {
825     set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
826     set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
827     set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
828     _PyRuntime.allocators.is_debug_enabled = 1;
829 }
830 
831 void
PyMem_SetupDebugHooks(void)832 PyMem_SetupDebugHooks(void)
833 {
834     PyMutex_Lock(&ALLOCATORS_MUTEX);
835     set_up_debug_hooks_unlocked();
836     PyMutex_Unlock(&ALLOCATORS_MUTEX);
837 }
838 
839 static void
get_allocator_unlocked(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)840 get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
841 {
842     switch(domain)
843     {
844     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
845     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
846     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
847     default:
848         /* unknown domain: set all attributes to NULL */
849         allocator->ctx = NULL;
850         allocator->malloc = NULL;
851         allocator->calloc = NULL;
852         allocator->realloc = NULL;
853         allocator->free = NULL;
854     }
855 }
856 
857 static void
set_allocator_unlocked(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)858 set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
859 {
860     switch(domain)
861     {
862     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
863     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
864     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
865     /* ignore unknown domain */
866     }
867 }
868 
869 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)870 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
871 {
872     PyMutex_Lock(&ALLOCATORS_MUTEX);
873     get_allocator_unlocked(domain, allocator);
874     PyMutex_Unlock(&ALLOCATORS_MUTEX);
875 }
876 
877 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)878 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
879 {
880     PyMutex_Lock(&ALLOCATORS_MUTEX);
881     set_allocator_unlocked(domain, allocator);
882     PyMutex_Unlock(&ALLOCATORS_MUTEX);
883 }
884 
885 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)886 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
887 {
888     PyMutex_Lock(&ALLOCATORS_MUTEX);
889     *allocator = _PyObject_Arena;
890     PyMutex_Unlock(&ALLOCATORS_MUTEX);
891 }
892 
893 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)894 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
895 {
896     PyMutex_Lock(&ALLOCATORS_MUTEX);
897     _PyObject_Arena = *allocator;
898     PyMutex_Unlock(&ALLOCATORS_MUTEX);
899 }
900 
901 
902 /* Note that there is a possible, but very unlikely, race in any place
903  * below where we call one of the allocator functions.  We access two
904  * fields in each case:  "malloc", etc. and "ctx".
905  *
906  * It is unlikely that the allocator will be changed while one of those
907  * calls is happening, much less in that very narrow window.
908  * Furthermore, the likelihood of a race is drastically reduced by the
909  * fact that the allocator may not be changed after runtime init
910  * (except with a wrapper).
911  *
912  * With the above in mind, we currently don't worry about locking
913  * around these uses of the runtime-global allocators state. */
914 
915 
916 /*************************/
917 /* the "arena" allocator */
918 /*************************/
919 
920 void *
_PyObject_VirtualAlloc(size_t size)921 _PyObject_VirtualAlloc(size_t size)
922 {
923     return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
924 }
925 
926 void
_PyObject_VirtualFree(void * obj,size_t size)927 _PyObject_VirtualFree(void *obj, size_t size)
928 {
929     _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
930 }
931 
932 
933 /***********************/
934 /* the "raw" allocator */
935 /***********************/
936 
937 void *
PyMem_RawMalloc(size_t size)938 PyMem_RawMalloc(size_t size)
939 {
940     /*
941      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
942      * Most python internals blindly use a signed Py_ssize_t to track
943      * things without checking for overflows or negatives.
944      * As size_t is unsigned, checking for size < 0 is not required.
945      */
946     if (size > (size_t)PY_SSIZE_T_MAX)
947         return NULL;
948     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
949 }
950 
951 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)952 PyMem_RawCalloc(size_t nelem, size_t elsize)
953 {
954     /* see PyMem_RawMalloc() */
955     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
956         return NULL;
957     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
958 }
959 
960 void*
PyMem_RawRealloc(void * ptr,size_t new_size)961 PyMem_RawRealloc(void *ptr, size_t new_size)
962 {
963     /* see PyMem_RawMalloc() */
964     if (new_size > (size_t)PY_SSIZE_T_MAX)
965         return NULL;
966     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
967 }
968 
PyMem_RawFree(void * ptr)969 void PyMem_RawFree(void *ptr)
970 {
971     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
972 }
973 
974 
975 /***********************/
976 /* the "mem" allocator */
977 /***********************/
978 
979 void *
PyMem_Malloc(size_t size)980 PyMem_Malloc(size_t size)
981 {
982     /* see PyMem_RawMalloc() */
983     if (size > (size_t)PY_SSIZE_T_MAX)
984         return NULL;
985     OBJECT_STAT_INC_COND(allocations512, size < 512);
986     OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
987     OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
988     OBJECT_STAT_INC(allocations);
989     return _PyMem.malloc(_PyMem.ctx, size);
990 }
991 
992 void *
PyMem_Calloc(size_t nelem,size_t elsize)993 PyMem_Calloc(size_t nelem, size_t elsize)
994 {
995     /* see PyMem_RawMalloc() */
996     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
997         return NULL;
998     OBJECT_STAT_INC_COND(allocations512, elsize < 512);
999     OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1000     OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1001     OBJECT_STAT_INC(allocations);
1002     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1003 }
1004 
1005 void *
PyMem_Realloc(void * ptr,size_t new_size)1006 PyMem_Realloc(void *ptr, size_t new_size)
1007 {
1008     /* see PyMem_RawMalloc() */
1009     if (new_size > (size_t)PY_SSIZE_T_MAX)
1010         return NULL;
1011     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1012 }
1013 
1014 void
PyMem_Free(void * ptr)1015 PyMem_Free(void *ptr)
1016 {
1017     OBJECT_STAT_INC(frees);
1018     _PyMem.free(_PyMem.ctx, ptr);
1019 }
1020 
1021 
1022 /***************************/
1023 /* pymem utility functions */
1024 /***************************/
1025 
1026 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)1027 _PyMem_RawWcsdup(const wchar_t *str)
1028 {
1029     assert(str != NULL);
1030 
1031     size_t len = wcslen(str);
1032     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1033         return NULL;
1034     }
1035 
1036     size_t size = (len + 1) * sizeof(wchar_t);
1037     wchar_t *str2 = PyMem_RawMalloc(size);
1038     if (str2 == NULL) {
1039         return NULL;
1040     }
1041 
1042     memcpy(str2, str, size);
1043     return str2;
1044 }
1045 
1046 char *
_PyMem_RawStrdup(const char * str)1047 _PyMem_RawStrdup(const char *str)
1048 {
1049     assert(str != NULL);
1050     size_t size = strlen(str) + 1;
1051     char *copy = PyMem_RawMalloc(size);
1052     if (copy == NULL) {
1053         return NULL;
1054     }
1055     memcpy(copy, str, size);
1056     return copy;
1057 }
1058 
1059 char *
_PyMem_Strdup(const char * str)1060 _PyMem_Strdup(const char *str)
1061 {
1062     assert(str != NULL);
1063     size_t size = strlen(str) + 1;
1064     char *copy = PyMem_Malloc(size);
1065     if (copy == NULL) {
1066         return NULL;
1067     }
1068     memcpy(copy, str, size);
1069     return copy;
1070 }
1071 
1072 /***********************************************/
1073 /* Delayed freeing support for Py_GIL_DISABLED */
1074 /***********************************************/
1075 
1076 // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1077 #define WORK_ITEMS_PER_CHUNK 254
1078 
1079 // A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1080 struct _mem_work_item {
1081     uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1082     uint64_t qsbr_goal;
1083 };
1084 
1085 // A fixed-size buffer of pointers to be freed
1086 struct _mem_work_chunk {
1087     // Linked list node of chunks in queue
1088     struct llist_node node;
1089 
1090     Py_ssize_t rd_idx;  // index of next item to read
1091     Py_ssize_t wr_idx;  // index of next item to write
1092     struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1093 };
1094 
1095 static void
free_work_item(uintptr_t ptr)1096 free_work_item(uintptr_t ptr)
1097 {
1098     if (ptr & 0x01) {
1099         PyObject_Free((char *)(ptr - 1));
1100     }
1101     else {
1102         PyMem_Free((void *)ptr);
1103     }
1104 }
1105 
1106 static void
free_delayed(uintptr_t ptr)1107 free_delayed(uintptr_t ptr)
1108 {
1109 #ifndef Py_GIL_DISABLED
1110     free_work_item(ptr);
1111 #else
1112     if (_PyRuntime.stoptheworld.world_stopped) {
1113         // Free immediately if the world is stopped, including during
1114         // interpreter shutdown.
1115         free_work_item(ptr);
1116         return;
1117     }
1118 
1119     _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1120     struct llist_node *head = &tstate->mem_free_queue;
1121 
1122     struct _mem_work_chunk *buf = NULL;
1123     if (!llist_empty(head)) {
1124         // Try to re-use the last buffer
1125         buf = llist_data(head->prev, struct _mem_work_chunk, node);
1126         if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1127             // already full
1128             buf = NULL;
1129         }
1130     }
1131 
1132     if (buf == NULL) {
1133         buf = PyMem_Calloc(1, sizeof(*buf));
1134         if (buf != NULL) {
1135             llist_insert_tail(head, &buf->node);
1136         }
1137     }
1138 
1139     if (buf == NULL) {
1140         // failed to allocate a buffer, free immediately
1141         _PyEval_StopTheWorld(tstate->base.interp);
1142         free_work_item(ptr);
1143         _PyEval_StartTheWorld(tstate->base.interp);
1144         return;
1145     }
1146 
1147     assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1148     uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr);
1149     buf->array[buf->wr_idx].ptr = ptr;
1150     buf->array[buf->wr_idx].qsbr_goal = seq;
1151     buf->wr_idx++;
1152 
1153     if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1154         _PyMem_ProcessDelayed((PyThreadState *)tstate);
1155     }
1156 #endif
1157 }
1158 
1159 void
_PyMem_FreeDelayed(void * ptr)1160 _PyMem_FreeDelayed(void *ptr)
1161 {
1162     assert(!((uintptr_t)ptr & 0x01));
1163     free_delayed((uintptr_t)ptr);
1164 }
1165 
1166 void
_PyObject_FreeDelayed(void * ptr)1167 _PyObject_FreeDelayed(void *ptr)
1168 {
1169     assert(!((uintptr_t)ptr & 0x01));
1170     free_delayed(((uintptr_t)ptr)|0x01);
1171 }
1172 
1173 static struct _mem_work_chunk *
work_queue_first(struct llist_node * head)1174 work_queue_first(struct llist_node *head)
1175 {
1176     return llist_data(head->next, struct _mem_work_chunk, node);
1177 }
1178 
1179 static void
process_queue(struct llist_node * head,struct _qsbr_thread_state * qsbr,bool keep_empty)1180 process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
1181               bool keep_empty)
1182 {
1183     while (!llist_empty(head)) {
1184         struct _mem_work_chunk *buf = work_queue_first(head);
1185 
1186         while (buf->rd_idx < buf->wr_idx) {
1187             struct _mem_work_item *item = &buf->array[buf->rd_idx];
1188             if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) {
1189                 return;
1190             }
1191 
1192             free_work_item(item->ptr);
1193             buf->rd_idx++;
1194         }
1195 
1196         assert(buf->rd_idx == buf->wr_idx);
1197         if (keep_empty && buf->node.next == head) {
1198             // Keep the last buffer in the queue to reduce re-allocations
1199             buf->rd_idx = buf->wr_idx = 0;
1200             return;
1201         }
1202 
1203         llist_remove(&buf->node);
1204         PyMem_Free(buf);
1205     }
1206 }
1207 
1208 static void
process_interp_queue(struct _Py_mem_interp_free_queue * queue,struct _qsbr_thread_state * qsbr)1209 process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1210                      struct _qsbr_thread_state *qsbr)
1211 {
1212     if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1213         return;
1214     }
1215 
1216     // Try to acquire the lock, but don't block if it's already held.
1217     if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1218         process_queue(&queue->head, qsbr, false);
1219 
1220         int more_work = !llist_empty(&queue->head);
1221         _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1222 
1223         PyMutex_Unlock(&queue->mutex);
1224     }
1225 }
1226 
1227 void
_PyMem_ProcessDelayed(PyThreadState * tstate)1228 _PyMem_ProcessDelayed(PyThreadState *tstate)
1229 {
1230     PyInterpreterState *interp = tstate->interp;
1231     _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1232 
1233     // Process thread-local work
1234     process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
1235 
1236     // Process shared interpreter work
1237     process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
1238 }
1239 
1240 void
_PyMem_AbandonDelayed(PyThreadState * tstate)1241 _PyMem_AbandonDelayed(PyThreadState *tstate)
1242 {
1243     PyInterpreterState *interp = tstate->interp;
1244     struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1245 
1246     if (llist_empty(queue)) {
1247         return;
1248     }
1249 
1250     // Check if the queue contains one empty buffer
1251     struct _mem_work_chunk *buf = work_queue_first(queue);
1252     if (buf->rd_idx == buf->wr_idx) {
1253         llist_remove(&buf->node);
1254         PyMem_Free(buf);
1255         assert(llist_empty(queue));
1256         return;
1257     }
1258 
1259     // Merge the thread's work queue into the interpreter's work queue.
1260     PyMutex_Lock(&interp->mem_free_queue.mutex);
1261     llist_concat(&interp->mem_free_queue.head, queue);
1262     _Py_atomic_store_int_relaxed(&interp->mem_free_queue.has_work, 1);
1263     PyMutex_Unlock(&interp->mem_free_queue.mutex);
1264 
1265     assert(llist_empty(queue));  // the thread's queue is now empty
1266 }
1267 
1268 void
_PyMem_FiniDelayed(PyInterpreterState * interp)1269 _PyMem_FiniDelayed(PyInterpreterState *interp)
1270 {
1271     struct llist_node *head = &interp->mem_free_queue.head;
1272     while (!llist_empty(head)) {
1273         struct _mem_work_chunk *buf = work_queue_first(head);
1274 
1275         while (buf->rd_idx < buf->wr_idx) {
1276             // Free the remaining items immediately. There should be no other
1277             // threads accessing the memory at this point during shutdown.
1278             struct _mem_work_item *item = &buf->array[buf->rd_idx];
1279             free_work_item(item->ptr);
1280             buf->rd_idx++;
1281         }
1282 
1283         llist_remove(&buf->node);
1284         PyMem_Free(buf);
1285     }
1286 }
1287 
1288 /**************************/
1289 /* the "object" allocator */
1290 /**************************/
1291 
1292 void *
PyObject_Malloc(size_t size)1293 PyObject_Malloc(size_t size)
1294 {
1295     /* see PyMem_RawMalloc() */
1296     if (size > (size_t)PY_SSIZE_T_MAX)
1297         return NULL;
1298     OBJECT_STAT_INC_COND(allocations512, size < 512);
1299     OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1300     OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1301     OBJECT_STAT_INC(allocations);
1302     return _PyObject.malloc(_PyObject.ctx, size);
1303 }
1304 
1305 void *
PyObject_Calloc(size_t nelem,size_t elsize)1306 PyObject_Calloc(size_t nelem, size_t elsize)
1307 {
1308     /* see PyMem_RawMalloc() */
1309     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1310         return NULL;
1311     OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1312     OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1313     OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1314     OBJECT_STAT_INC(allocations);
1315     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1316 }
1317 
1318 void *
PyObject_Realloc(void * ptr,size_t new_size)1319 PyObject_Realloc(void *ptr, size_t new_size)
1320 {
1321     /* see PyMem_RawMalloc() */
1322     if (new_size > (size_t)PY_SSIZE_T_MAX)
1323         return NULL;
1324     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1325 }
1326 
1327 void
PyObject_Free(void * ptr)1328 PyObject_Free(void *ptr)
1329 {
1330     OBJECT_STAT_INC(frees);
1331     _PyObject.free(_PyObject.ctx, ptr);
1332 }
1333 
1334 
1335 /* If we're using GCC, use __builtin_expect() to reduce overhead of
1336    the valgrind checks */
1337 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
1338 #  define UNLIKELY(value) __builtin_expect((value), 0)
1339 #  define LIKELY(value) __builtin_expect((value), 1)
1340 #else
1341 #  define UNLIKELY(value) (value)
1342 #  define LIKELY(value) (value)
1343 #endif
1344 
1345 #ifdef WITH_PYMALLOC
1346 
1347 #ifdef WITH_VALGRIND
1348 #include <valgrind/valgrind.h>
1349 
1350 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
1351 static int running_on_valgrind = -1;
1352 #endif
1353 
1354 typedef struct _obmalloc_state OMState;
1355 
1356 /* obmalloc state for main interpreter and shared by all interpreters without
1357  * their own obmalloc state.  By not explicitly initalizing this structure, it
1358  * will be allocated in the BSS which is a small performance win.  The radix
1359  * tree arrays are fairly large but are sparsely used.  */
1360 static struct _obmalloc_state obmalloc_state_main;
1361 static bool obmalloc_state_initialized;
1362 
1363 static inline int
has_own_state(PyInterpreterState * interp)1364 has_own_state(PyInterpreterState *interp)
1365 {
1366     return (_Py_IsMainInterpreter(interp) ||
1367             !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1368             _Py_IsMainInterpreterFinalizing(interp));
1369 }
1370 
1371 static inline OMState *
get_state(void)1372 get_state(void)
1373 {
1374     PyInterpreterState *interp = _PyInterpreterState_GET();
1375     assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1376     return interp->obmalloc;
1377 }
1378 
1379 // These macros all rely on a local "state" variable.
1380 #define usedpools (state->pools.used)
1381 #define allarenas (state->mgmt.arenas)
1382 #define maxarenas (state->mgmt.maxarenas)
1383 #define unused_arena_objects (state->mgmt.unused_arena_objects)
1384 #define usable_arenas (state->mgmt.usable_arenas)
1385 #define nfp2lasta (state->mgmt.nfp2lasta)
1386 #define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1387 #define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1388 #define narenas_highwater (state->mgmt.narenas_highwater)
1389 #define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1390 
1391 #ifdef WITH_MIMALLOC
count_blocks(const mi_heap_t * heap,const mi_heap_area_t * area,void * block,size_t block_size,void * allocated_blocks)1392 static bool count_blocks(
1393     const mi_heap_t* heap, const mi_heap_area_t* area,
1394     void* block, size_t block_size, void* allocated_blocks)
1395 {
1396     *(size_t *)allocated_blocks += area->used;
1397     return 1;
1398 }
1399 
1400 static Py_ssize_t
get_mimalloc_allocated_blocks(PyInterpreterState * interp)1401 get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1402 {
1403     size_t allocated_blocks = 0;
1404 #ifdef Py_GIL_DISABLED
1405     for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
1406         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1407         for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1408             mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1409             mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1410         }
1411     }
1412 
1413     mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1414     for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1415         _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1416                                         &allocated_blocks);
1417     }
1418 #else
1419     // TODO(sgross): this only counts the current thread's blocks.
1420     mi_heap_t *heap = mi_heap_get_default();
1421     mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1422 #endif
1423     return allocated_blocks;
1424 }
1425 #endif
1426 
1427 Py_ssize_t
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState * interp)1428 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1429 {
1430 #ifdef WITH_MIMALLOC
1431     if (_PyMem_MimallocEnabled()) {
1432         return get_mimalloc_allocated_blocks(interp);
1433     }
1434 #endif
1435 
1436 #ifdef Py_DEBUG
1437     assert(has_own_state(interp));
1438 #else
1439     if (!has_own_state(interp)) {
1440         _Py_FatalErrorFunc(__func__,
1441                            "the interpreter doesn't have its own allocator");
1442     }
1443 #endif
1444     OMState *state = interp->obmalloc;
1445 
1446     if (state == NULL) {
1447         return 0;
1448     }
1449 
1450     Py_ssize_t n = raw_allocated_blocks;
1451     /* add up allocated blocks for used pools */
1452     for (uint i = 0; i < maxarenas; ++i) {
1453         /* Skip arenas which are not allocated. */
1454         if (allarenas[i].address == 0) {
1455             continue;
1456         }
1457 
1458         uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1459 
1460         /* visit every pool in the arena */
1461         assert(base <= (uintptr_t) allarenas[i].pool_address);
1462         for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1463             poolp p = (poolp)base;
1464             n += p->ref.count;
1465         }
1466     }
1467     return n;
1468 }
1469 
1470 static void free_obmalloc_arenas(PyInterpreterState *interp);
1471 
1472 void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState * interp)1473 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1474 {
1475 #ifdef WITH_MIMALLOC
1476     if (_PyMem_MimallocEnabled()) {
1477         return;
1478     }
1479 #endif
1480     if (has_own_state(interp) && interp->obmalloc != NULL) {
1481         Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1482         assert(has_own_state(interp) || leaked == 0);
1483         interp->runtime->obmalloc.interpreter_leaks += leaked;
1484         if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1485             // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1486             // then some of the memory allocated by obmalloc has not been
1487             // freed.  It might be safe to free the arenas in that case but
1488             // it's possible that extension modules are still using that
1489             // memory.  So, it is safer to not free and to leak.  Perhaps there
1490             // should be warning when this happens.  It should be possible to
1491             // use a tool like "-fsanitize=address" to track down these leaks.
1492             free_obmalloc_arenas(interp);
1493         }
1494     }
1495 }
1496 
1497 static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1498 
1499 /* We preserve the number of blocks leaked during runtime finalization,
1500    so they can be reported if the runtime is initialized again. */
1501 // XXX We don't lose any information by dropping this,
1502 // so we should consider doing so.
1503 static Py_ssize_t last_final_leaks = 0;
1504 
1505 void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState * runtime)1506 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1507 {
1508     last_final_leaks = get_num_global_allocated_blocks(runtime);
1509     runtime->obmalloc.interpreter_leaks = 0;
1510 }
1511 
1512 static Py_ssize_t
get_num_global_allocated_blocks(_PyRuntimeState * runtime)1513 get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1514 {
1515     Py_ssize_t total = 0;
1516     if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1517         PyInterpreterState *interp = _PyInterpreterState_Main();
1518         if (interp == NULL) {
1519             /* We are at the very end of runtime finalization.
1520                We can't rely on finalizing->interp since that thread
1521                state is probably already freed, so we don't worry
1522                about it. */
1523             assert(PyInterpreterState_Head() == NULL);
1524         }
1525         else {
1526             assert(interp != NULL);
1527             /* It is probably the last interpreter but not necessarily. */
1528             assert(PyInterpreterState_Next(interp) == NULL);
1529             total += _PyInterpreterState_GetAllocatedBlocks(interp);
1530         }
1531     }
1532     else {
1533         _PyEval_StopTheWorldAll(&_PyRuntime);
1534         HEAD_LOCK(runtime);
1535         PyInterpreterState *interp = PyInterpreterState_Head();
1536         assert(interp != NULL);
1537 #ifdef Py_DEBUG
1538         int got_main = 0;
1539 #endif
1540         for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1541 #ifdef Py_DEBUG
1542             if (_Py_IsMainInterpreter(interp)) {
1543                 assert(!got_main);
1544                 got_main = 1;
1545                 assert(has_own_state(interp));
1546             }
1547 #endif
1548             if (has_own_state(interp)) {
1549                 total += _PyInterpreterState_GetAllocatedBlocks(interp);
1550             }
1551         }
1552         HEAD_UNLOCK(runtime);
1553         _PyEval_StartTheWorldAll(&_PyRuntime);
1554 #ifdef Py_DEBUG
1555         assert(got_main);
1556 #endif
1557     }
1558     total += runtime->obmalloc.interpreter_leaks;
1559     total += last_final_leaks;
1560     return total;
1561 }
1562 
1563 Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)1564 _Py_GetGlobalAllocatedBlocks(void)
1565 {
1566     return get_num_global_allocated_blocks(&_PyRuntime);
1567 }
1568 
1569 #if WITH_PYMALLOC_RADIX_TREE
1570 /*==========================================================================*/
1571 /* radix tree for tracking arena usage. */
1572 
1573 #define arena_map_root (state->usage.arena_map_root)
1574 #ifdef USE_INTERIOR_NODES
1575 #define arena_map_mid_count (state->usage.arena_map_mid_count)
1576 #define arena_map_bot_count (state->usage.arena_map_bot_count)
1577 #endif
1578 
1579 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1580  * it cannot be created */
1581 static inline Py_ALWAYS_INLINE arena_map_bot_t *
arena_map_get(OMState * state,pymem_block * p,int create)1582 arena_map_get(OMState *state, pymem_block *p, int create)
1583 {
1584 #ifdef USE_INTERIOR_NODES
1585     /* sanity check that IGNORE_BITS is correct */
1586     assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1587     int i1 = MAP_TOP_INDEX(p);
1588     if (arena_map_root.ptrs[i1] == NULL) {
1589         if (!create) {
1590             return NULL;
1591         }
1592         arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1593         if (n == NULL) {
1594             return NULL;
1595         }
1596         arena_map_root.ptrs[i1] = n;
1597         arena_map_mid_count++;
1598     }
1599     int i2 = MAP_MID_INDEX(p);
1600     if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1601         if (!create) {
1602             return NULL;
1603         }
1604         arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1605         if (n == NULL) {
1606             return NULL;
1607         }
1608         arena_map_root.ptrs[i1]->ptrs[i2] = n;
1609         arena_map_bot_count++;
1610     }
1611     return arena_map_root.ptrs[i1]->ptrs[i2];
1612 #else
1613     return &arena_map_root;
1614 #endif
1615 }
1616 
1617 
1618 /* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1619  * away 24 bits of the address.  That reduces the space requirement of
1620  * the tree compared to similar radix tree page-map schemes.  In
1621  * exchange for slashing the space requirement, it needs more
1622  * computation to check an address.
1623  *
1624  * Tracking coverage is done by "ideal" arena address.  It is easier to
1625  * explain in decimal so let's say that the arena size is 100 bytes.
1626  * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1627  * pointer address is inside an actual arena, we have to check two ideal
1628  * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1629  * 300.  In the rare case that an arena is aligned in the ideal way
1630  * (e.g. base address of arena is 200) then we only have to check one
1631  * ideal address.
1632  *
1633  * The tree nodes for 200 and 300 both store the address of arena.
1634  * There are two cases: the arena starts at a lower ideal arena and
1635  * extends to this one, or the arena starts in this arena and extends to
1636  * the next ideal arena.  The tail_lo and tail_hi members correspond to
1637  * these two cases.
1638  */
1639 
1640 
1641 /* mark or unmark addresses covered by arena */
1642 static int
arena_map_mark_used(OMState * state,uintptr_t arena_base,int is_used)1643 arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1644 {
1645     /* sanity check that IGNORE_BITS is correct */
1646     assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1647     arena_map_bot_t *n_hi = arena_map_get(
1648             state, (pymem_block *)arena_base, is_used);
1649     if (n_hi == NULL) {
1650         assert(is_used); /* otherwise node should already exist */
1651         return 0; /* failed to allocate space for node */
1652     }
1653     int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1654     int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1655     if (tail == 0) {
1656         /* is ideal arena address */
1657         n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1658     }
1659     else {
1660         /* arena_base address is not ideal (aligned to arena size) and
1661          * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1662          * for the next arena.  Note that it might be in different MAP_TOP
1663          * and MAP_MID nodes as well so we need to call arena_map_get()
1664          * again (do the full tree traversal).
1665          */
1666         n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1667         uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1668         /* If arena_base is a legit arena address, so is arena_base_next - 1
1669          * (last address in arena).  If arena_base_next overflows then it
1670          * must overflow to 0.  However, that would mean arena_base was
1671          * "ideal" and we should not be in this case. */
1672         assert(arena_base < arena_base_next);
1673         arena_map_bot_t *n_lo = arena_map_get(
1674                 state, (pymem_block *)arena_base_next, is_used);
1675         if (n_lo == NULL) {
1676             assert(is_used); /* otherwise should already exist */
1677             n_hi->arenas[i3].tail_hi = 0;
1678             return 0; /* failed to allocate space for node */
1679         }
1680         int i3_next = MAP_BOT_INDEX(arena_base_next);
1681         n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1682     }
1683     return 1;
1684 }
1685 
1686 /* Return true if 'p' is a pointer inside an obmalloc arena.
1687  * _PyObject_Free() calls this so it needs to be very fast. */
1688 static int
arena_map_is_used(OMState * state,pymem_block * p)1689 arena_map_is_used(OMState *state, pymem_block *p)
1690 {
1691     arena_map_bot_t *n = arena_map_get(state, p, 0);
1692     if (n == NULL) {
1693         return 0;
1694     }
1695     int i3 = MAP_BOT_INDEX(p);
1696     /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1697     int32_t hi = n->arenas[i3].tail_hi;
1698     int32_t lo = n->arenas[i3].tail_lo;
1699     int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1700     return (tail < lo) || (tail >= hi && hi != 0);
1701 }
1702 
1703 /* end of radix tree logic */
1704 /*==========================================================================*/
1705 #endif /* WITH_PYMALLOC_RADIX_TREE */
1706 
1707 
1708 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1709  * allocate a new arena, and return the address of an arena_object
1710  * describing the new arena.  It's expected that the caller will set
1711  * `usable_arenas` to the return value.
1712  */
1713 static struct arena_object*
new_arena(OMState * state)1714 new_arena(OMState *state)
1715 {
1716     struct arena_object* arenaobj;
1717     uint excess;        /* number of bytes above pool alignment */
1718     void *address;
1719 
1720     int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1721     if (debug_stats == -1) {
1722         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1723         debug_stats = (opt != NULL && *opt != '\0');
1724         _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1725     }
1726     if (debug_stats) {
1727         _PyObject_DebugMallocStats(stderr);
1728     }
1729 
1730     if (unused_arena_objects == NULL) {
1731         uint i;
1732         uint numarenas;
1733         size_t nbytes;
1734 
1735         /* Double the number of arena objects on each allocation.
1736          * Note that it's possible for `numarenas` to overflow.
1737          */
1738         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1739         if (numarenas <= maxarenas)
1740             return NULL;                /* overflow */
1741 #if SIZEOF_SIZE_T <= SIZEOF_INT
1742         if (numarenas > SIZE_MAX / sizeof(*allarenas))
1743             return NULL;                /* overflow */
1744 #endif
1745         nbytes = numarenas * sizeof(*allarenas);
1746         arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1747         if (arenaobj == NULL)
1748             return NULL;
1749         allarenas = arenaobj;
1750 
1751         /* We might need to fix pointers that were copied.  However,
1752          * new_arena only gets called when all the pages in the
1753          * previous arenas are full.  Thus, there are *no* pointers
1754          * into the old array. Thus, we don't have to worry about
1755          * invalid pointers.  Just to be sure, some asserts:
1756          */
1757         assert(usable_arenas == NULL);
1758         assert(unused_arena_objects == NULL);
1759 
1760         /* Put the new arenas on the unused_arena_objects list. */
1761         for (i = maxarenas; i < numarenas; ++i) {
1762             allarenas[i].address = 0;              /* mark as unassociated */
1763             allarenas[i].nextarena = i < numarenas - 1 ?
1764                                         &allarenas[i+1] : NULL;
1765         }
1766 
1767         /* Update globals. */
1768         unused_arena_objects = &allarenas[maxarenas];
1769         maxarenas = numarenas;
1770     }
1771 
1772     /* Take the next available arena object off the head of the list. */
1773     assert(unused_arena_objects != NULL);
1774     arenaobj = unused_arena_objects;
1775     unused_arena_objects = arenaobj->nextarena;
1776     assert(arenaobj->address == 0);
1777     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1778 #if WITH_PYMALLOC_RADIX_TREE
1779     if (address != NULL) {
1780         if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
1781             /* marking arena in radix tree failed, abort */
1782             _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1783             address = NULL;
1784         }
1785     }
1786 #endif
1787     if (address == NULL) {
1788         /* The allocation failed: return NULL after putting the
1789          * arenaobj back.
1790          */
1791         arenaobj->nextarena = unused_arena_objects;
1792         unused_arena_objects = arenaobj;
1793         return NULL;
1794     }
1795     arenaobj->address = (uintptr_t)address;
1796 
1797     ++narenas_currently_allocated;
1798     ++ntimes_arena_allocated;
1799     if (narenas_currently_allocated > narenas_highwater)
1800         narenas_highwater = narenas_currently_allocated;
1801     arenaobj->freepools = NULL;
1802     /* pool_address <- first pool-aligned address in the arena
1803        nfreepools <- number of whole pools that fit after alignment */
1804     arenaobj->pool_address = (pymem_block*)arenaobj->address;
1805     arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1806     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1807     if (excess != 0) {
1808         --arenaobj->nfreepools;
1809         arenaobj->pool_address += POOL_SIZE - excess;
1810     }
1811     arenaobj->ntotalpools = arenaobj->nfreepools;
1812 
1813     return arenaobj;
1814 }
1815 
1816 
1817 
1818 #if WITH_PYMALLOC_RADIX_TREE
1819 /* Return true if and only if P is an address that was allocated by
1820    pymalloc.  When the radix tree is used, 'poolp' is unused.
1821  */
1822 static bool
address_in_range(OMState * state,void * p,poolp Py_UNUSED (pool))1823 address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
1824 {
1825     return arena_map_is_used(state, p);
1826 }
1827 #else
1828 /*
1829 address_in_range(P, POOL)
1830 
1831 Return true if and only if P is an address that was allocated by pymalloc.
1832 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1833 (the caller is asked to compute this because the macro expands POOL more than
1834 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1835 variable and pass the latter to the macro; because address_in_range is
1836 called on every alloc/realloc/free, micro-efficiency is important here).
1837 
1838 Tricky:  Let B be the arena base address associated with the pool, B =
1839 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1840 
1841     B <= P < B + ARENA_SIZE
1842 
1843 Subtracting B throughout, this is true iff
1844 
1845     0 <= P-B < ARENA_SIZE
1846 
1847 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1848 
1849 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1850 before the first arena has been allocated.  `arenas` is still NULL in that
1851 case.  We're relying on that maxarenas is also 0 in that case, so that
1852 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1853 into a NULL arenas.
1854 
1855 Details:  given P and POOL, the arena_object corresponding to P is AO =
1856 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1857 stores, etc), POOL is the correct address of P's pool, AO.address is the
1858 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1859 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1860 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1861 controls P.
1862 
1863 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1864 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1865 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1866 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1867 control P.
1868 
1869 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1870 allocated arena, obmalloc controls all the memory in slice AO.address :
1871 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1872 so P doesn't lie in that slice, so the macro correctly reports that P is not
1873 controlled by obmalloc.
1874 
1875 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1876 arena_object (one not currently associated with an allocated arena),
1877 AO.address is 0, and the second test in the macro reduces to:
1878 
1879     P < ARENA_SIZE
1880 
1881 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1882 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1883 of the test still passes, and the third clause (AO.address != 0) is necessary
1884 to get the correct result:  AO.address is 0 in this case, so the macro
1885 correctly reports that P is not controlled by obmalloc (despite that P lies in
1886 slice AO.address : AO.address + ARENA_SIZE).
1887 
1888 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1889 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1890 corresponded to a currently-allocated arena, so the "P is not controlled by
1891 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1892 was impossible.
1893 
1894 Note that the logic is excruciating, and reading up possibly uninitialized
1895 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1896 creates problems for some memory debuggers.  The overwhelming advantage is
1897 that this test determines whether an arbitrary address is controlled by
1898 obmalloc in a small constant time, independent of the number of arenas
1899 obmalloc controls.  Since this test is needed at every entry point, it's
1900 extremely desirable that it be this fast.
1901 */
1902 
1903 static bool _Py_NO_SANITIZE_ADDRESS
1904             _Py_NO_SANITIZE_THREAD
1905             _Py_NO_SANITIZE_MEMORY
address_in_range(OMState * state,void * p,poolp pool)1906 address_in_range(OMState *state, void *p, poolp pool)
1907 {
1908     // Since address_in_range may be reading from memory which was not allocated
1909     // by Python, it is important that pool->arenaindex is read only once, as
1910     // another thread may be concurrently modifying the value without holding
1911     // the GIL. The following dance forces the compiler to read pool->arenaindex
1912     // only once.
1913     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1914     return arenaindex < maxarenas &&
1915         (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
1916         allarenas[arenaindex].address != 0;
1917 }
1918 
1919 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1920 
1921 /*==========================================================================*/
1922 
1923 // Called when freelist is exhausted.  Extend the freelist if there is
1924 // space for a block.  Otherwise, remove this pool from usedpools.
1925 static void
pymalloc_pool_extend(poolp pool,uint size)1926 pymalloc_pool_extend(poolp pool, uint size)
1927 {
1928     if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1929         /* There is room for another block. */
1930         pool->freeblock = (pymem_block*)pool + pool->nextoffset;
1931         pool->nextoffset += INDEX2SIZE(size);
1932         *(pymem_block **)(pool->freeblock) = NULL;
1933         return;
1934     }
1935 
1936     /* Pool is full, unlink from used pools. */
1937     poolp next;
1938     next = pool->nextpool;
1939     pool = pool->prevpool;
1940     next->prevpool = pool;
1941     pool->nextpool = next;
1942 }
1943 
1944 /* called when pymalloc_alloc can not allocate a block from usedpool.
1945  * This function takes new pool and allocate a block from it.
1946  */
1947 static void*
allocate_from_new_pool(OMState * state,uint size)1948 allocate_from_new_pool(OMState *state, uint size)
1949 {
1950     /* There isn't a pool of the right size class immediately
1951      * available:  use a free pool.
1952      */
1953     if (UNLIKELY(usable_arenas == NULL)) {
1954         /* No arena has a free pool:  allocate a new arena. */
1955 #ifdef WITH_MEMORY_LIMITS
1956         if (narenas_currently_allocated >= MAX_ARENAS) {
1957             return NULL;
1958         }
1959 #endif
1960         usable_arenas = new_arena(state);
1961         if (usable_arenas == NULL) {
1962             return NULL;
1963         }
1964         usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1965         assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1966         nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1967     }
1968     assert(usable_arenas->address != 0);
1969 
1970     /* This arena already had the smallest nfreepools value, so decreasing
1971      * nfreepools doesn't change that, and we don't need to rearrange the
1972      * usable_arenas list.  However, if the arena becomes wholly allocated,
1973      * we need to remove its arena_object from usable_arenas.
1974      */
1975     assert(usable_arenas->nfreepools > 0);
1976     if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1977         /* It's the last of this size, so there won't be any. */
1978         nfp2lasta[usable_arenas->nfreepools] = NULL;
1979     }
1980     /* If any free pools will remain, it will be the new smallest. */
1981     if (usable_arenas->nfreepools > 1) {
1982         assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1983         nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1984     }
1985 
1986     /* Try to get a cached free pool. */
1987     poolp pool = usable_arenas->freepools;
1988     if (LIKELY(pool != NULL)) {
1989         /* Unlink from cached pools. */
1990         usable_arenas->freepools = pool->nextpool;
1991         usable_arenas->nfreepools--;
1992         if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1993             /* Wholly allocated:  remove. */
1994             assert(usable_arenas->freepools == NULL);
1995             assert(usable_arenas->nextarena == NULL ||
1996                    usable_arenas->nextarena->prevarena ==
1997                    usable_arenas);
1998             usable_arenas = usable_arenas->nextarena;
1999             if (usable_arenas != NULL) {
2000                 usable_arenas->prevarena = NULL;
2001                 assert(usable_arenas->address != 0);
2002             }
2003         }
2004         else {
2005             /* nfreepools > 0:  it must be that freepools
2006              * isn't NULL, or that we haven't yet carved
2007              * off all the arena's pools for the first
2008              * time.
2009              */
2010             assert(usable_arenas->freepools != NULL ||
2011                    usable_arenas->pool_address <=
2012                    (pymem_block*)usable_arenas->address +
2013                        ARENA_SIZE - POOL_SIZE);
2014         }
2015     }
2016     else {
2017         /* Carve off a new pool. */
2018         assert(usable_arenas->nfreepools > 0);
2019         assert(usable_arenas->freepools == NULL);
2020         pool = (poolp)usable_arenas->pool_address;
2021         assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2022                                  ARENA_SIZE - POOL_SIZE);
2023         pool->arenaindex = (uint)(usable_arenas - allarenas);
2024         assert(&allarenas[pool->arenaindex] == usable_arenas);
2025         pool->szidx = DUMMY_SIZE_IDX;
2026         usable_arenas->pool_address += POOL_SIZE;
2027         --usable_arenas->nfreepools;
2028 
2029         if (usable_arenas->nfreepools == 0) {
2030             assert(usable_arenas->nextarena == NULL ||
2031                    usable_arenas->nextarena->prevarena ==
2032                    usable_arenas);
2033             /* Unlink the arena:  it is completely allocated. */
2034             usable_arenas = usable_arenas->nextarena;
2035             if (usable_arenas != NULL) {
2036                 usable_arenas->prevarena = NULL;
2037                 assert(usable_arenas->address != 0);
2038             }
2039         }
2040     }
2041 
2042     /* Frontlink to used pools. */
2043     pymem_block *bp;
2044     poolp next = usedpools[size + size]; /* == prev */
2045     pool->nextpool = next;
2046     pool->prevpool = next;
2047     next->nextpool = pool;
2048     next->prevpool = pool;
2049     pool->ref.count = 1;
2050     if (pool->szidx == size) {
2051         /* Luckily, this pool last contained blocks
2052          * of the same size class, so its header
2053          * and free list are already initialized.
2054          */
2055         bp = pool->freeblock;
2056         assert(bp != NULL);
2057         pool->freeblock = *(pymem_block **)bp;
2058         return bp;
2059     }
2060     /*
2061      * Initialize the pool header, set up the free list to
2062      * contain just the second block, and return the first
2063      * block.
2064      */
2065     pool->szidx = size;
2066     size = INDEX2SIZE(size);
2067     bp = (pymem_block *)pool + POOL_OVERHEAD;
2068     pool->nextoffset = POOL_OVERHEAD + (size << 1);
2069     pool->maxnextoffset = POOL_SIZE - size;
2070     pool->freeblock = bp + size;
2071     *(pymem_block **)(pool->freeblock) = NULL;
2072     return bp;
2073 }
2074 
2075 /* pymalloc allocator
2076 
2077    Return a pointer to newly allocated memory if pymalloc allocated memory.
2078 
2079    Return NULL if pymalloc failed to allocate the memory block: on bigger
2080    requests, on error in the code below (as a last chance to serve the request)
2081    or when the max memory limit has been reached.
2082 */
2083 static inline void*
pymalloc_alloc(OMState * state,void * Py_UNUSED (ctx),size_t nbytes)2084 pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2085 {
2086 #ifdef WITH_VALGRIND
2087     if (UNLIKELY(running_on_valgrind == -1)) {
2088         running_on_valgrind = RUNNING_ON_VALGRIND;
2089     }
2090     if (UNLIKELY(running_on_valgrind)) {
2091         return NULL;
2092     }
2093 #endif
2094 
2095     if (UNLIKELY(nbytes == 0)) {
2096         return NULL;
2097     }
2098     if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2099         return NULL;
2100     }
2101 
2102     uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2103     poolp pool = usedpools[size + size];
2104     pymem_block *bp;
2105 
2106     if (LIKELY(pool != pool->nextpool)) {
2107         /*
2108          * There is a used pool for this size class.
2109          * Pick up the head block of its free list.
2110          */
2111         ++pool->ref.count;
2112         bp = pool->freeblock;
2113         assert(bp != NULL);
2114 
2115         if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2116             // Reached the end of the free list, try to extend it.
2117             pymalloc_pool_extend(pool, size);
2118         }
2119     }
2120     else {
2121         /* There isn't a pool of the right size class immediately
2122          * available:  use a free pool.
2123          */
2124         bp = allocate_from_new_pool(state, size);
2125     }
2126 
2127     return (void *)bp;
2128 }
2129 
2130 
2131 void *
_PyObject_Malloc(void * ctx,size_t nbytes)2132 _PyObject_Malloc(void *ctx, size_t nbytes)
2133 {
2134     OMState *state = get_state();
2135     void* ptr = pymalloc_alloc(state, ctx, nbytes);
2136     if (LIKELY(ptr != NULL)) {
2137         return ptr;
2138     }
2139 
2140     ptr = PyMem_RawMalloc(nbytes);
2141     if (ptr != NULL) {
2142         raw_allocated_blocks++;
2143     }
2144     return ptr;
2145 }
2146 
2147 
2148 void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)2149 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2150 {
2151     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2152     size_t nbytes = nelem * elsize;
2153 
2154     OMState *state = get_state();
2155     void* ptr = pymalloc_alloc(state, ctx, nbytes);
2156     if (LIKELY(ptr != NULL)) {
2157         memset(ptr, 0, nbytes);
2158         return ptr;
2159     }
2160 
2161     ptr = PyMem_RawCalloc(nelem, elsize);
2162     if (ptr != NULL) {
2163         raw_allocated_blocks++;
2164     }
2165     return ptr;
2166 }
2167 
2168 
2169 static void
insert_to_usedpool(OMState * state,poolp pool)2170 insert_to_usedpool(OMState *state, poolp pool)
2171 {
2172     assert(pool->ref.count > 0);            /* else the pool is empty */
2173 
2174     uint size = pool->szidx;
2175     poolp next = usedpools[size + size];
2176     poolp prev = next->prevpool;
2177 
2178     /* insert pool before next:   prev <-> pool <-> next */
2179     pool->nextpool = next;
2180     pool->prevpool = prev;
2181     next->prevpool = pool;
2182     prev->nextpool = pool;
2183 }
2184 
2185 static void
insert_to_freepool(OMState * state,poolp pool)2186 insert_to_freepool(OMState *state, poolp pool)
2187 {
2188     poolp next = pool->nextpool;
2189     poolp prev = pool->prevpool;
2190     next->prevpool = prev;
2191     prev->nextpool = next;
2192 
2193     /* Link the pool to freepools.  This is a singly-linked
2194      * list, and pool->prevpool isn't used there.
2195      */
2196     struct arena_object *ao = &allarenas[pool->arenaindex];
2197     pool->nextpool = ao->freepools;
2198     ao->freepools = pool;
2199     uint nf = ao->nfreepools;
2200     /* If this is the rightmost arena with this number of free pools,
2201      * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2202      * are no arenas in usable_arenas with that value.
2203      */
2204     struct arena_object* lastnf = nfp2lasta[nf];
2205     assert((nf == 0 && lastnf == NULL) ||
2206            (nf > 0 &&
2207             lastnf != NULL &&
2208             lastnf->nfreepools == nf &&
2209             (lastnf->nextarena == NULL ||
2210              nf < lastnf->nextarena->nfreepools)));
2211     if (lastnf == ao) {  /* it is the rightmost */
2212         struct arena_object* p = ao->prevarena;
2213         nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2214     }
2215     ao->nfreepools = ++nf;
2216 
2217     /* All the rest is arena management.  We just freed
2218      * a pool, and there are 4 cases for arena mgmt:
2219      * 1. If all the pools are free, return the arena to
2220      *    the system free().  Except if this is the last
2221      *    arena in the list, keep it to avoid thrashing:
2222      *    keeping one wholly free arena in the list avoids
2223      *    pathological cases where a simple loop would
2224      *    otherwise provoke needing to allocate and free an
2225      *    arena on every iteration.  See bpo-37257.
2226      * 2. If this is the only free pool in the arena,
2227      *    add the arena back to the `usable_arenas` list.
2228      * 3. If the "next" arena has a smaller count of free
2229      *    pools, we have to "slide this arena right" to
2230      *    restore that usable_arenas is sorted in order of
2231      *    nfreepools.
2232      * 4. Else there's nothing more to do.
2233      */
2234     if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2235         /* Case 1.  First unlink ao from usable_arenas.
2236          */
2237         assert(ao->prevarena == NULL ||
2238                ao->prevarena->address != 0);
2239         assert(ao ->nextarena == NULL ||
2240                ao->nextarena->address != 0);
2241 
2242         /* Fix the pointer in the prevarena, or the
2243          * usable_arenas pointer.
2244          */
2245         if (ao->prevarena == NULL) {
2246             usable_arenas = ao->nextarena;
2247             assert(usable_arenas == NULL ||
2248                    usable_arenas->address != 0);
2249         }
2250         else {
2251             assert(ao->prevarena->nextarena == ao);
2252             ao->prevarena->nextarena =
2253                 ao->nextarena;
2254         }
2255         /* Fix the pointer in the nextarena. */
2256         if (ao->nextarena != NULL) {
2257             assert(ao->nextarena->prevarena == ao);
2258             ao->nextarena->prevarena =
2259                 ao->prevarena;
2260         }
2261         /* Record that this arena_object slot is
2262          * available to be reused.
2263          */
2264         ao->nextarena = unused_arena_objects;
2265         unused_arena_objects = ao;
2266 
2267 #if WITH_PYMALLOC_RADIX_TREE
2268         /* mark arena region as not under control of obmalloc */
2269         arena_map_mark_used(state, ao->address, 0);
2270 #endif
2271 
2272         /* Free the entire arena. */
2273         _PyObject_Arena.free(_PyObject_Arena.ctx,
2274                              (void *)ao->address, ARENA_SIZE);
2275         ao->address = 0;                        /* mark unassociated */
2276         --narenas_currently_allocated;
2277 
2278         return;
2279     }
2280 
2281     if (nf == 1) {
2282         /* Case 2.  Put ao at the head of
2283          * usable_arenas.  Note that because
2284          * ao->nfreepools was 0 before, ao isn't
2285          * currently on the usable_arenas list.
2286          */
2287         ao->nextarena = usable_arenas;
2288         ao->prevarena = NULL;
2289         if (usable_arenas)
2290             usable_arenas->prevarena = ao;
2291         usable_arenas = ao;
2292         assert(usable_arenas->address != 0);
2293         if (nfp2lasta[1] == NULL) {
2294             nfp2lasta[1] = ao;
2295         }
2296 
2297         return;
2298     }
2299 
2300     /* If this arena is now out of order, we need to keep
2301      * the list sorted.  The list is kept sorted so that
2302      * the "most full" arenas are used first, which allows
2303      * the nearly empty arenas to be completely freed.  In
2304      * a few un-scientific tests, it seems like this
2305      * approach allowed a lot more memory to be freed.
2306      */
2307     /* If this is the only arena with nf, record that. */
2308     if (nfp2lasta[nf] == NULL) {
2309         nfp2lasta[nf] = ao;
2310     } /* else the rightmost with nf doesn't change */
2311     /* If this was the rightmost of the old size, it remains in place. */
2312     if (ao == lastnf) {
2313         /* Case 4.  Nothing to do. */
2314         return;
2315     }
2316     /* If ao were the only arena in the list, the last block would have
2317      * gotten us out.
2318      */
2319     assert(ao->nextarena != NULL);
2320 
2321     /* Case 3:  We have to move the arena towards the end of the list,
2322      * because it has more free pools than the arena to its right.  It needs
2323      * to move to follow lastnf.
2324      * First unlink ao from usable_arenas.
2325      */
2326     if (ao->prevarena != NULL) {
2327         /* ao isn't at the head of the list */
2328         assert(ao->prevarena->nextarena == ao);
2329         ao->prevarena->nextarena = ao->nextarena;
2330     }
2331     else {
2332         /* ao is at the head of the list */
2333         assert(usable_arenas == ao);
2334         usable_arenas = ao->nextarena;
2335     }
2336     ao->nextarena->prevarena = ao->prevarena;
2337     /* And insert after lastnf. */
2338     ao->prevarena = lastnf;
2339     ao->nextarena = lastnf->nextarena;
2340     if (ao->nextarena != NULL) {
2341         ao->nextarena->prevarena = ao;
2342     }
2343     lastnf->nextarena = ao;
2344     /* Verify that the swaps worked. */
2345     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2346     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2347     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2348     assert((usable_arenas == ao && ao->prevarena == NULL)
2349            || ao->prevarena->nextarena == ao);
2350 }
2351 
2352 /* Free a memory block allocated by pymalloc_alloc().
2353    Return 1 if it was freed.
2354    Return 0 if the block was not allocated by pymalloc_alloc(). */
2355 static inline int
pymalloc_free(OMState * state,void * Py_UNUSED (ctx),void * p)2356 pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2357 {
2358     assert(p != NULL);
2359 
2360 #ifdef WITH_VALGRIND
2361     if (UNLIKELY(running_on_valgrind > 0)) {
2362         return 0;
2363     }
2364 #endif
2365 
2366     poolp pool = POOL_ADDR(p);
2367     if (UNLIKELY(!address_in_range(state, p, pool))) {
2368         return 0;
2369     }
2370     /* We allocated this address. */
2371 
2372     /* Link p to the start of the pool's freeblock list.  Since
2373      * the pool had at least the p block outstanding, the pool
2374      * wasn't empty (so it's already in a usedpools[] list, or
2375      * was full and is in no list -- it's not in the freeblocks
2376      * list in any case).
2377      */
2378     assert(pool->ref.count > 0);            /* else it was empty */
2379     pymem_block *lastfree = pool->freeblock;
2380     *(pymem_block **)p = lastfree;
2381     pool->freeblock = (pymem_block *)p;
2382     pool->ref.count--;
2383 
2384     if (UNLIKELY(lastfree == NULL)) {
2385         /* Pool was full, so doesn't currently live in any list:
2386          * link it to the front of the appropriate usedpools[] list.
2387          * This mimics LRU pool usage for new allocations and
2388          * targets optimal filling when several pools contain
2389          * blocks of the same size class.
2390          */
2391         insert_to_usedpool(state, pool);
2392         return 1;
2393     }
2394 
2395     /* freeblock wasn't NULL, so the pool wasn't full,
2396      * and the pool is in a usedpools[] list.
2397      */
2398     if (LIKELY(pool->ref.count != 0)) {
2399         /* pool isn't empty:  leave it in usedpools */
2400         return 1;
2401     }
2402 
2403     /* Pool is now empty:  unlink from usedpools, and
2404      * link to the front of freepools.  This ensures that
2405      * previously freed pools will be allocated later
2406      * (being not referenced, they are perhaps paged out).
2407      */
2408     insert_to_freepool(state, pool);
2409     return 1;
2410 }
2411 
2412 
2413 void
_PyObject_Free(void * ctx,void * p)2414 _PyObject_Free(void *ctx, void *p)
2415 {
2416     /* PyObject_Free(NULL) has no effect */
2417     if (p == NULL) {
2418         return;
2419     }
2420 
2421     OMState *state = get_state();
2422     if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2423         /* pymalloc didn't allocate this address */
2424         PyMem_RawFree(p);
2425         raw_allocated_blocks--;
2426     }
2427 }
2428 
2429 
2430 /* pymalloc realloc.
2431 
2432    If nbytes==0, then as the Python docs promise, we do not treat this like
2433    free(p), and return a non-NULL result.
2434 
2435    Return 1 if pymalloc reallocated memory and wrote the new pointer into
2436    newptr_p.
2437 
2438    Return 0 if pymalloc didn't allocated p. */
2439 static int
pymalloc_realloc(OMState * state,void * ctx,void ** newptr_p,void * p,size_t nbytes)2440 pymalloc_realloc(OMState *state, void *ctx,
2441                  void **newptr_p, void *p, size_t nbytes)
2442 {
2443     void *bp;
2444     poolp pool;
2445     size_t size;
2446 
2447     assert(p != NULL);
2448 
2449 #ifdef WITH_VALGRIND
2450     /* Treat running_on_valgrind == -1 the same as 0 */
2451     if (UNLIKELY(running_on_valgrind > 0)) {
2452         return 0;
2453     }
2454 #endif
2455 
2456     pool = POOL_ADDR(p);
2457     if (!address_in_range(state, p, pool)) {
2458         /* pymalloc is not managing this block.
2459 
2460            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2461            over this block.  However, if we do, we need to copy the valid data
2462            from the C-managed block to one of our blocks, and there's no
2463            portable way to know how much of the memory space starting at p is
2464            valid.
2465 
2466            As bug 1185883 pointed out the hard way, it's possible that the
2467            C-managed block is "at the end" of allocated VM space, so that a
2468            memory fault can occur if we try to copy nbytes bytes starting at p.
2469            Instead we punt: let C continue to manage this block. */
2470         return 0;
2471     }
2472 
2473     /* pymalloc is in charge of this block */
2474     size = INDEX2SIZE(pool->szidx);
2475     if (nbytes <= size) {
2476         /* The block is staying the same or shrinking.
2477 
2478            If it's shrinking, there's a tradeoff: it costs cycles to copy the
2479            block to a smaller size class, but it wastes memory not to copy it.
2480 
2481            The compromise here is to copy on shrink only if at least 25% of
2482            size can be shaved off. */
2483         if (4 * nbytes > 3 * size) {
2484             /* It's the same, or shrinking and new/old > 3/4. */
2485             *newptr_p = p;
2486             return 1;
2487         }
2488         size = nbytes;
2489     }
2490 
2491     bp = _PyObject_Malloc(ctx, nbytes);
2492     if (bp != NULL) {
2493         memcpy(bp, p, size);
2494         _PyObject_Free(ctx, p);
2495     }
2496     *newptr_p = bp;
2497     return 1;
2498 }
2499 
2500 
2501 void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2502 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2503 {
2504     void *ptr2;
2505 
2506     if (ptr == NULL) {
2507         return _PyObject_Malloc(ctx, nbytes);
2508     }
2509 
2510     OMState *state = get_state();
2511     if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2512         return ptr2;
2513     }
2514 
2515     return PyMem_RawRealloc(ptr, nbytes);
2516 }
2517 
2518 #else   /* ! WITH_PYMALLOC */
2519 
2520 /*==========================================================================*/
2521 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2522  * only be used by extensions that are compiled with pymalloc enabled. */
2523 
2524 Py_ssize_t
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState * Py_UNUSED (interp))2525 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2526 {
2527     return 0;
2528 }
2529 
2530 Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)2531 _Py_GetGlobalAllocatedBlocks(void)
2532 {
2533     return 0;
2534 }
2535 
2536 void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState * Py_UNUSED (interp))2537 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2538 {
2539     return;
2540 }
2541 
2542 void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState * Py_UNUSED (runtime))2543 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2544 {
2545     return;
2546 }
2547 
2548 #endif /* WITH_PYMALLOC */
2549 
2550 
2551 /*==========================================================================*/
2552 /* A x-platform debugging allocator.  This doesn't manage memory directly,
2553  * it wraps a real allocator, adding extra debugging info to the memory blocks.
2554  */
2555 
2556 /* Uncomment this define to add the "serialno" field */
2557 /* #define PYMEM_DEBUG_SERIALNO */
2558 
2559 #ifdef PYMEM_DEBUG_SERIALNO
2560 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2561 
2562 /* serialno is always incremented via calling this routine.  The point is
2563  * to supply a single place to set a breakpoint.
2564  */
2565 static void
bumpserialno(void)2566 bumpserialno(void)
2567 {
2568     ++serialno;
2569 }
2570 #endif
2571 
2572 #define SST SIZEOF_SIZE_T
2573 
2574 #ifdef PYMEM_DEBUG_SERIALNO
2575 #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2576 #else
2577 #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2578 #endif
2579 
2580 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2581 static size_t
read_size_t(const void * p)2582 read_size_t(const void *p)
2583 {
2584     const uint8_t *q = (const uint8_t *)p;
2585     size_t result = *q++;
2586     int i;
2587 
2588     for (i = SST; --i > 0; ++q)
2589         result = (result << 8) | *q;
2590     return result;
2591 }
2592 
2593 /* Write n as a big-endian size_t, MSB at address p, LSB at
2594  * p + sizeof(size_t) - 1.
2595  */
2596 static void
write_size_t(void * p,size_t n)2597 write_size_t(void *p, size_t n)
2598 {
2599     uint8_t *q = (uint8_t *)p + SST - 1;
2600     int i;
2601 
2602     for (i = SST; --i >= 0; --q) {
2603         *q = (uint8_t)(n & 0xff);
2604         n >>= 8;
2605     }
2606 }
2607 
2608 static void
fill_mem_debug(debug_alloc_api_t * api,void * data,int c,size_t nbytes,bool is_alloc)2609 fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2610                bool is_alloc)
2611 {
2612 #ifdef Py_GIL_DISABLED
2613     if (api->api_id == 'o') {
2614         // Don't overwrite the first few bytes of a PyObject allocation in the
2615         // free-threaded build
2616         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2617         size_t debug_offset;
2618         if (is_alloc) {
2619             debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2620         }
2621         else {
2622             char *alloc = (char *)data - 2*SST;  // start of the allocation
2623             debug_offset = _mi_ptr_page(alloc)->debug_offset;
2624         }
2625         debug_offset -= 2*SST;  // account for pymalloc extra bytes
2626         if (debug_offset < nbytes) {
2627             memset((char *)data + debug_offset, c, nbytes - debug_offset);
2628         }
2629         return;
2630     }
2631 #endif
2632     memset(data, c, nbytes);
2633 }
2634 
2635 /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2636    fills them with useful stuff, here calling the underlying malloc's result p:
2637 
2638 p[0: S]
2639     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2640     to read in a memory dump).
2641 p[S]
2642     API ID.  See PEP 445.  This is a character, but seems undocumented.
2643 p[S+1: 2*S]
2644     Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2645 p[2*S: 2*S+n]
2646     The requested memory, filled with copies of PYMEM_CLEANBYTE.
2647     Used to catch reference to uninitialized memory.
2648     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2649     handled the request itself.
2650 p[2*S+n: 2*S+n+S]
2651     Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2652 p[2*S+n+S: 2*S+n+2*S]
2653     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2654     and _PyMem_DebugRealloc.
2655     This is a big-endian size_t.
2656     If "bad memory" is detected later, the serial number gives an
2657     excellent way to set a breakpoint on the next run, to capture the
2658     instant at which this block was passed out.
2659 
2660 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2661 for 3 * S extra bytes, and omits the last serialno field.
2662 */
2663 
2664 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2665 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2666 {
2667     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2668     uint8_t *p;           /* base address of malloc'ed pad block */
2669     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2670     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2671     size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2672 
2673     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2674         /* integer overflow: can't represent total as a Py_ssize_t */
2675         return NULL;
2676     }
2677     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2678 
2679     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2680                 ^--- p    ^--- data   ^--- tail
2681        S: nbytes stored as size_t
2682        I: API identifier (1 byte)
2683        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2684        C: Clean bytes used later to store actual data
2685        N: Serial number stored as size_t
2686 
2687        If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2688        is omitted. */
2689 
2690     if (use_calloc) {
2691         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2692     }
2693     else {
2694         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2695     }
2696     if (p == NULL) {
2697         return NULL;
2698     }
2699     data = p + 2*SST;
2700 
2701 #ifdef PYMEM_DEBUG_SERIALNO
2702     bumpserialno();
2703 #endif
2704 
2705     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2706     write_size_t(p, nbytes);
2707     p[SST] = (uint8_t)api->api_id;
2708     memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2709 
2710     if (nbytes > 0 && !use_calloc) {
2711         fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
2712     }
2713 
2714     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2715     tail = data + nbytes;
2716     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2717 #ifdef PYMEM_DEBUG_SERIALNO
2718     write_size_t(tail + SST, serialno);
2719 #endif
2720 
2721     return data;
2722 }
2723 
2724 void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2725 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2726 {
2727     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2728 }
2729 
2730 void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2731 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2732 {
2733     size_t nbytes;
2734     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2735     nbytes = nelem * elsize;
2736     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2737 }
2738 
2739 
2740 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2741    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2742    Then fills the original bytes with PYMEM_DEADBYTE.
2743    Then calls the underlying free.
2744 */
2745 void
_PyMem_DebugRawFree(void * ctx,void * p)2746 _PyMem_DebugRawFree(void *ctx, void *p)
2747 {
2748     /* PyMem_Free(NULL) has no effect */
2749     if (p == NULL) {
2750         return;
2751     }
2752 
2753     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2754     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2755     size_t nbytes;
2756 
2757     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2758     nbytes = read_size_t(q);
2759     nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
2760     memset(q, PYMEM_DEADBYTE, 2*SST);
2761     fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
2762     api->alloc.free(api->alloc.ctx, q);
2763 }
2764 
2765 
2766 void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2767 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2768 {
2769     if (p == NULL) {
2770         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2771     }
2772 
2773     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2774     uint8_t *head;        /* base address of malloc'ed pad block */
2775     uint8_t *data;        /* pointer to data bytes */
2776     uint8_t *r;
2777     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2778     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2779     size_t original_nbytes;
2780 #define ERASED_SIZE 64
2781 
2782     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2783 
2784     data = (uint8_t *)p;
2785     head = data - 2*SST;
2786     original_nbytes = read_size_t(head);
2787     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2788         /* integer overflow: can't represent total as a Py_ssize_t */
2789         return NULL;
2790     }
2791     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2792 
2793     tail = data + original_nbytes;
2794 #ifdef PYMEM_DEBUG_SERIALNO
2795     size_t block_serialno = read_size_t(tail + SST);
2796 #endif
2797 #ifndef Py_GIL_DISABLED
2798     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2799        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2800      */
2801     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2802     if (original_nbytes <= sizeof(save)) {
2803         memcpy(save, data, original_nbytes);
2804         memset(data - 2 * SST, PYMEM_DEADBYTE,
2805                original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2806     }
2807     else {
2808         memcpy(save, data, ERASED_SIZE);
2809         memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2810         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2811         memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2812                ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2813     }
2814 #endif
2815 
2816     /* Resize and add decorations. */
2817     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2818     if (r == NULL) {
2819         /* if realloc() failed: rewrite header and footer which have
2820            just been erased */
2821         nbytes = original_nbytes;
2822     }
2823     else {
2824         head = r;
2825 #ifdef PYMEM_DEBUG_SERIALNO
2826         bumpserialno();
2827         block_serialno = serialno;
2828 #endif
2829     }
2830     data = head + 2*SST;
2831 
2832     write_size_t(head, nbytes);
2833     head[SST] = (uint8_t)api->api_id;
2834     memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2835 
2836     tail = data + nbytes;
2837     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2838 #ifdef PYMEM_DEBUG_SERIALNO
2839     write_size_t(tail + SST, block_serialno);
2840 #endif
2841 
2842 #ifndef Py_GIL_DISABLED
2843     /* Restore saved bytes. */
2844     if (original_nbytes <= sizeof(save)) {
2845         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2846     }
2847     else {
2848         size_t i = original_nbytes - ERASED_SIZE;
2849         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2850         if (nbytes > i) {
2851             memcpy(data + i, &save[ERASED_SIZE],
2852                    Py_MIN(nbytes - i, ERASED_SIZE));
2853         }
2854     }
2855 #endif
2856 
2857     if (r == NULL) {
2858         return NULL;
2859     }
2860 
2861     if (nbytes > original_nbytes) {
2862         /* growing: mark new extra memory clean */
2863         memset(data + original_nbytes, PYMEM_CLEANBYTE,
2864                nbytes - original_nbytes);
2865     }
2866 
2867     return data;
2868 }
2869 
2870 static inline void
_PyMem_DebugCheckGIL(const char * func)2871 _PyMem_DebugCheckGIL(const char *func)
2872 {
2873     if (!PyGILState_Check()) {
2874         _Py_FatalErrorFunc(func,
2875                            "Python memory allocator called "
2876                            "without holding the GIL");
2877     }
2878 }
2879 
2880 void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2881 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2882 {
2883     _PyMem_DebugCheckGIL(__func__);
2884     return _PyMem_DebugRawMalloc(ctx, nbytes);
2885 }
2886 
2887 void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2888 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2889 {
2890     _PyMem_DebugCheckGIL(__func__);
2891     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2892 }
2893 
2894 
2895 void
_PyMem_DebugFree(void * ctx,void * ptr)2896 _PyMem_DebugFree(void *ctx, void *ptr)
2897 {
2898     _PyMem_DebugCheckGIL(__func__);
2899     _PyMem_DebugRawFree(ctx, ptr);
2900 }
2901 
2902 
2903 void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2904 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2905 {
2906     _PyMem_DebugCheckGIL(__func__);
2907     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2908 }
2909 
2910 /* Check the forbidden bytes on both ends of the memory allocated for p.
2911  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2912  * and call Py_FatalError to kill the program.
2913  * The API id, is also checked.
2914  */
2915 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2916 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2917 {
2918     assert(p != NULL);
2919 
2920     const uint8_t *q = (const uint8_t *)p;
2921     size_t nbytes;
2922     const uint8_t *tail;
2923     int i;
2924     char id;
2925 
2926     /* Check the API id */
2927     id = (char)q[-SST];
2928     if (id != api) {
2929         _PyObject_DebugDumpAddress(p);
2930         _Py_FatalErrorFormat(func,
2931                              "bad ID: Allocated using API '%c', "
2932                              "verified using API '%c'",
2933                              id, api);
2934     }
2935 
2936     /* Check the stuff at the start of p first:  if there's underwrite
2937      * corruption, the number-of-bytes field may be nuts, and checking
2938      * the tail could lead to a segfault then.
2939      */
2940     for (i = SST-1; i >= 1; --i) {
2941         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2942             _PyObject_DebugDumpAddress(p);
2943             _Py_FatalErrorFunc(func, "bad leading pad byte");
2944         }
2945     }
2946 
2947     nbytes = read_size_t(q - 2*SST);
2948     tail = q + nbytes;
2949     for (i = 0; i < SST; ++i) {
2950         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2951             _PyObject_DebugDumpAddress(p);
2952             _Py_FatalErrorFunc(func, "bad trailing pad byte");
2953         }
2954     }
2955 }
2956 
2957 /* Display info to stderr about the memory block at p. */
2958 static void
_PyObject_DebugDumpAddress(const void * p)2959 _PyObject_DebugDumpAddress(const void *p)
2960 {
2961     const uint8_t *q = (const uint8_t *)p;
2962     const uint8_t *tail;
2963     size_t nbytes;
2964     int i;
2965     int ok;
2966     char id;
2967 
2968     fprintf(stderr, "Debug memory block at address p=%p:", p);
2969     if (p == NULL) {
2970         fprintf(stderr, "\n");
2971         return;
2972     }
2973     id = (char)q[-SST];
2974     fprintf(stderr, " API '%c'\n", id);
2975 
2976     nbytes = read_size_t(q - 2*SST);
2977     fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2978 
2979     /* In case this is nuts, check the leading pad bytes first. */
2980     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2981     ok = 1;
2982     for (i = 1; i <= SST-1; ++i) {
2983         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2984             ok = 0;
2985             break;
2986         }
2987     }
2988     if (ok)
2989         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2990     else {
2991         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2992             PYMEM_FORBIDDENBYTE);
2993         for (i = SST-1; i >= 1; --i) {
2994             const uint8_t byte = *(q-i);
2995             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2996             if (byte != PYMEM_FORBIDDENBYTE)
2997                 fputs(" *** OUCH", stderr);
2998             fputc('\n', stderr);
2999         }
3000 
3001         fputs("    Because memory is corrupted at the start, the "
3002               "count of bytes requested\n"
3003               "       may be bogus, and checking the trailing pad "
3004               "bytes may segfault.\n", stderr);
3005     }
3006 
3007     tail = q + nbytes;
3008     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3009     ok = 1;
3010     for (i = 0; i < SST; ++i) {
3011         if (tail[i] != PYMEM_FORBIDDENBYTE) {
3012             ok = 0;
3013             break;
3014         }
3015     }
3016     if (ok)
3017         fputs("FORBIDDENBYTE, as expected.\n", stderr);
3018     else {
3019         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3020                 PYMEM_FORBIDDENBYTE);
3021         for (i = 0; i < SST; ++i) {
3022             const uint8_t byte = tail[i];
3023             fprintf(stderr, "        at tail+%d: 0x%02x",
3024                     i, byte);
3025             if (byte != PYMEM_FORBIDDENBYTE)
3026                 fputs(" *** OUCH", stderr);
3027             fputc('\n', stderr);
3028         }
3029     }
3030 
3031 #ifdef PYMEM_DEBUG_SERIALNO
3032     size_t serial = read_size_t(tail + SST);
3033     fprintf(stderr,
3034             "    The block was made by call #%zu to debug malloc/realloc.\n",
3035             serial);
3036 #endif
3037 
3038     if (nbytes > 0) {
3039         i = 0;
3040         fputs("    Data at p:", stderr);
3041         /* print up to 8 bytes at the start */
3042         while (q < tail && i < 8) {
3043             fprintf(stderr, " %02x", *q);
3044             ++i;
3045             ++q;
3046         }
3047         /* and up to 8 at the end */
3048         if (q < tail) {
3049             if (tail - q > 8) {
3050                 fputs(" ...", stderr);
3051                 q = tail - 8;
3052             }
3053             while (q < tail) {
3054                 fprintf(stderr, " %02x", *q);
3055                 ++q;
3056             }
3057         }
3058         fputc('\n', stderr);
3059     }
3060     fputc('\n', stderr);
3061 
3062     fflush(stderr);
3063     _PyMem_DumpTraceback(fileno(stderr), p);
3064 }
3065 
3066 
3067 static size_t
printone(FILE * out,const char * msg,size_t value)3068 printone(FILE *out, const char* msg, size_t value)
3069 {
3070     int i, k;
3071     char buf[100];
3072     size_t origvalue = value;
3073 
3074     fputs(msg, out);
3075     for (i = (int)strlen(msg); i < 35; ++i)
3076         fputc(' ', out);
3077     fputc('=', out);
3078 
3079     /* Write the value with commas. */
3080     i = 22;
3081     buf[i--] = '\0';
3082     buf[i--] = '\n';
3083     k = 3;
3084     do {
3085         size_t nextvalue = value / 10;
3086         unsigned int digit = (unsigned int)(value - nextvalue * 10);
3087         value = nextvalue;
3088         buf[i--] = (char)(digit + '0');
3089         --k;
3090         if (k == 0 && value && i >= 0) {
3091             k = 3;
3092             buf[i--] = ',';
3093         }
3094     } while (value && i >= 0);
3095 
3096     while (i >= 0)
3097         buf[i--] = ' ';
3098     fputs(buf, out);
3099 
3100     return origvalue;
3101 }
3102 
3103 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)3104 _PyDebugAllocatorStats(FILE *out,
3105                        const char *block_name, int num_blocks, size_t sizeof_block)
3106 {
3107     char buf1[128];
3108     char buf2[128];
3109     PyOS_snprintf(buf1, sizeof(buf1),
3110                   "%d %ss * %zd bytes each",
3111                   num_blocks, block_name, sizeof_block);
3112     PyOS_snprintf(buf2, sizeof(buf2),
3113                   "%48s ", buf1);
3114     (void)printone(out, buf2, num_blocks * sizeof_block);
3115 }
3116 
3117 // Return true if the obmalloc state structure is heap allocated,
3118 // by PyMem_RawCalloc().  For the main interpreter, this structure
3119 // allocated in the BSS.  Allocating that way gives some memory savings
3120 // and a small performance win (at least on a demand paged OS).  On
3121 // 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3122 // memory is for the arena_map_top array.  Since normally only one entry
3123 // of that array is used, only one page of resident memory is actually
3124 // used, rather than the full 256 kB.
_PyMem_obmalloc_state_on_heap(PyInterpreterState * interp)3125 bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3126 {
3127 #if WITH_PYMALLOC
3128     return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3129 #else
3130     return false;
3131 #endif
3132 }
3133 
3134 #ifdef WITH_PYMALLOC
3135 static void
init_obmalloc_pools(PyInterpreterState * interp)3136 init_obmalloc_pools(PyInterpreterState *interp)
3137 {
3138     // initialize the obmalloc->pools structure.  This must be done
3139     // before the obmalloc alloc/free functions can be called.
3140     poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3141         _obmalloc_pools_INIT(interp->obmalloc->pools);
3142     memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3143 }
3144 #endif /* WITH_PYMALLOC */
3145 
_PyMem_init_obmalloc(PyInterpreterState * interp)3146 int _PyMem_init_obmalloc(PyInterpreterState *interp)
3147 {
3148 #ifdef WITH_PYMALLOC
3149     /* Initialize obmalloc, but only for subinterpreters,
3150        since the main interpreter is initialized statically. */
3151     if (_Py_IsMainInterpreter(interp)
3152             || _PyInterpreterState_HasFeature(interp,
3153                                               Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3154         interp->obmalloc = &obmalloc_state_main;
3155         if (!obmalloc_state_initialized) {
3156             init_obmalloc_pools(interp);
3157             obmalloc_state_initialized = true;
3158         }
3159     } else {
3160         interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3161         if (interp->obmalloc == NULL) {
3162             return -1;
3163         }
3164         init_obmalloc_pools(interp);
3165     }
3166 #endif /* WITH_PYMALLOC */
3167     return 0; // success
3168 }
3169 
3170 
3171 #ifdef WITH_PYMALLOC
3172 
3173 static void
free_obmalloc_arenas(PyInterpreterState * interp)3174 free_obmalloc_arenas(PyInterpreterState *interp)
3175 {
3176     OMState *state = interp->obmalloc;
3177     for (uint i = 0; i < maxarenas; ++i) {
3178         // free each obmalloc memory arena
3179         struct arena_object *ao = &allarenas[i];
3180         _PyObject_Arena.free(_PyObject_Arena.ctx,
3181                              (void *)ao->address, ARENA_SIZE);
3182     }
3183     // free the array containing pointers to all arenas
3184     PyMem_RawFree(allarenas);
3185 #if WITH_PYMALLOC_RADIX_TREE
3186 #ifdef USE_INTERIOR_NODES
3187     // Free the middle and bottom nodes of the radix tree.  These are allocated
3188     // by arena_map_mark_used() but not freed when arenas are freed.
3189     for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3190          arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3191          if (mid == NULL) {
3192              continue;
3193          }
3194          for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3195             arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3196             if (bot == NULL) {
3197                 continue;
3198             }
3199             PyMem_RawFree(bot);
3200          }
3201          PyMem_RawFree(mid);
3202     }
3203 #endif
3204 #endif
3205 }
3206 
3207 #ifdef Py_DEBUG
3208 /* Is target in the list?  The list is traversed via the nextpool pointers.
3209  * The list may be NULL-terminated, or circular.  Return 1 if target is in
3210  * list, else 0.
3211  */
3212 static int
pool_is_in_list(const poolp target,poolp list)3213 pool_is_in_list(const poolp target, poolp list)
3214 {
3215     poolp origlist = list;
3216     assert(target != NULL);
3217     if (list == NULL)
3218         return 0;
3219     do {
3220         if (target == list)
3221             return 1;
3222         list = list->nextpool;
3223     } while (list != NULL && list != origlist);
3224     return 0;
3225 }
3226 #endif
3227 
3228 #ifdef WITH_MIMALLOC
3229 struct _alloc_stats {
3230     size_t allocated_blocks;
3231     size_t allocated_bytes;
3232     size_t allocated_with_overhead;
3233     size_t bytes_reserved;
3234     size_t bytes_committed;
3235 };
3236 
_collect_alloc_stats(const mi_heap_t * heap,const mi_heap_area_t * area,void * block,size_t block_size,void * arg)3237 static bool _collect_alloc_stats(
3238     const mi_heap_t* heap, const mi_heap_area_t* area,
3239     void* block, size_t block_size, void* arg)
3240 {
3241     struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3242     stats->allocated_blocks += area->used;
3243     stats->allocated_bytes += area->used * area->block_size;
3244     stats->allocated_with_overhead += area->used * area->full_block_size;
3245     stats->bytes_reserved += area->reserved;
3246     stats->bytes_committed += area->committed;
3247     return 1;
3248 }
3249 
3250 static void
py_mimalloc_print_stats(FILE * out)3251 py_mimalloc_print_stats(FILE *out)
3252 {
3253     fprintf(out, "Small block threshold = %zd, in %u size classes.\n",
3254         MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3255     fprintf(out, "Medium block threshold = %zd\n",
3256             MI_MEDIUM_OBJ_SIZE_MAX);
3257     fprintf(out, "Large object max size = %zd\n",
3258             MI_LARGE_OBJ_SIZE_MAX);
3259 
3260     mi_heap_t *heap = mi_heap_get_default();
3261     struct _alloc_stats stats;
3262     memset(&stats, 0, sizeof(stats));
3263     mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3264 
3265     fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3266     fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3267     fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3268     fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3269     fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3270 }
3271 #endif
3272 
3273 
3274 static void
pymalloc_print_stats(FILE * out)3275 pymalloc_print_stats(FILE *out)
3276 {
3277     OMState *state = get_state();
3278 
3279     uint i;
3280     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3281     /* # of pools, allocated blocks, and free blocks per class index */
3282     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3283     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3284     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3285     /* total # of allocated bytes in used and full pools */
3286     size_t allocated_bytes = 0;
3287     /* total # of available bytes in used pools */
3288     size_t available_bytes = 0;
3289     /* # of free pools + pools not yet carved out of current arena */
3290     uint numfreepools = 0;
3291     /* # of bytes for arena alignment padding */
3292     size_t arena_alignment = 0;
3293     /* # of bytes in used and full pools used for pool_headers */
3294     size_t pool_header_bytes = 0;
3295     /* # of bytes in used and full pools wasted due to quantization,
3296      * i.e. the necessarily leftover space at the ends of used and
3297      * full pools.
3298      */
3299     size_t quantization = 0;
3300     /* # of arenas actually allocated. */
3301     size_t narenas = 0;
3302     /* running total -- should equal narenas * ARENA_SIZE */
3303     size_t total;
3304     char buf[128];
3305 
3306     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3307             SMALL_REQUEST_THRESHOLD, numclasses);
3308 
3309     for (i = 0; i < numclasses; ++i)
3310         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3311 
3312     /* Because full pools aren't linked to from anything, it's easiest
3313      * to march over all the arenas.  If we're lucky, most of the memory
3314      * will be living in full pools -- would be a shame to miss them.
3315      */
3316     for (i = 0; i < maxarenas; ++i) {
3317         uintptr_t base = allarenas[i].address;
3318 
3319         /* Skip arenas which are not allocated. */
3320         if (allarenas[i].address == (uintptr_t)NULL)
3321             continue;
3322         narenas += 1;
3323 
3324         numfreepools += allarenas[i].nfreepools;
3325 
3326         /* round up to pool alignment */
3327         if (base & (uintptr_t)POOL_SIZE_MASK) {
3328             arena_alignment += POOL_SIZE;
3329             base &= ~(uintptr_t)POOL_SIZE_MASK;
3330             base += POOL_SIZE;
3331         }
3332 
3333         /* visit every pool in the arena */
3334         assert(base <= (uintptr_t) allarenas[i].pool_address);
3335         for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3336             poolp p = (poolp)base;
3337             const uint sz = p->szidx;
3338             uint freeblocks;
3339 
3340             if (p->ref.count == 0) {
3341                 /* currently unused */
3342 #ifdef Py_DEBUG
3343                 assert(pool_is_in_list(p, allarenas[i].freepools));
3344 #endif
3345                 continue;
3346             }
3347             ++numpools[sz];
3348             numblocks[sz] += p->ref.count;
3349             freeblocks = NUMBLOCKS(sz) - p->ref.count;
3350             numfreeblocks[sz] += freeblocks;
3351 #ifdef Py_DEBUG
3352             if (freeblocks > 0)
3353                 assert(pool_is_in_list(p, usedpools[sz + sz]));
3354 #endif
3355         }
3356     }
3357     assert(narenas == narenas_currently_allocated);
3358 
3359     fputc('\n', out);
3360     fputs("class   size   num pools   blocks in use  avail blocks\n"
3361           "-----   ----   ---------   -------------  ------------\n",
3362           out);
3363 
3364     for (i = 0; i < numclasses; ++i) {
3365         size_t p = numpools[i];
3366         size_t b = numblocks[i];
3367         size_t f = numfreeblocks[i];
3368         uint size = INDEX2SIZE(i);
3369         if (p == 0) {
3370             assert(b == 0 && f == 0);
3371             continue;
3372         }
3373         fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3374                 i, size, p, b, f);
3375         allocated_bytes += b * size;
3376         available_bytes += f * size;
3377         pool_header_bytes += p * POOL_OVERHEAD;
3378         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3379     }
3380     fputc('\n', out);
3381 #ifdef PYMEM_DEBUG_SERIALNO
3382     if (_PyMem_DebugEnabled()) {
3383         (void)printone(out, "# times object malloc called", serialno);
3384     }
3385 #endif
3386     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3387     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3388     (void)printone(out, "# arenas highwater mark", narenas_highwater);
3389     (void)printone(out, "# arenas allocated current", narenas);
3390 
3391     PyOS_snprintf(buf, sizeof(buf),
3392                   "%zu arenas * %d bytes/arena",
3393                   narenas, ARENA_SIZE);
3394     (void)printone(out, buf, narenas * ARENA_SIZE);
3395 
3396     fputc('\n', out);
3397 
3398     /* Account for what all of those arena bytes are being used for. */
3399     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3400     total += printone(out, "# bytes in available blocks", available_bytes);
3401 
3402     PyOS_snprintf(buf, sizeof(buf),
3403         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3404     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3405 
3406     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3407     total += printone(out, "# bytes lost to quantization", quantization);
3408     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3409     (void)printone(out, "Total", total);
3410     assert(narenas * ARENA_SIZE == total);
3411 
3412 #if WITH_PYMALLOC_RADIX_TREE
3413     fputs("\narena map counts\n", out);
3414 #ifdef USE_INTERIOR_NODES
3415     (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3416     (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3417     fputc('\n', out);
3418 #endif
3419     total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3420 #ifdef USE_INTERIOR_NODES
3421     total += printone(out, "# bytes lost to arena map mid",
3422                       sizeof(arena_map_mid_t) * arena_map_mid_count);
3423     total += printone(out, "# bytes lost to arena map bot",
3424                       sizeof(arena_map_bot_t) * arena_map_bot_count);
3425     (void)printone(out, "Total", total);
3426 #endif
3427 #endif
3428 
3429 }
3430 
3431 /* Print summary info to "out" about the state of pymalloc's structures.
3432  * In Py_DEBUG mode, also perform some expensive internal consistency
3433  * checks.
3434  *
3435  * Return 0 if the memory debug hooks are not installed or no statistics was
3436  * written into out, return 1 otherwise.
3437  */
3438 int
_PyObject_DebugMallocStats(FILE * out)3439 _PyObject_DebugMallocStats(FILE *out)
3440 {
3441 #ifdef WITH_MIMALLOC
3442     if (_PyMem_MimallocEnabled()) {
3443         py_mimalloc_print_stats(out);
3444         return 1;
3445     }
3446     else
3447 #endif
3448     if (_PyMem_PymallocEnabled()) {
3449         pymalloc_print_stats(out);
3450         return 1;
3451     }
3452     else {
3453         return 0;
3454     }
3455 }
3456 
3457 #endif /* #ifdef WITH_PYMALLOC */
3458