1 /* Python's malloc wrappers (see pymem.h) */
2
3 #include "Python.h"
4 #include "pycore_code.h" // stats
5 #include "pycore_object.h" // _PyDebugAllocatorStats() definition
6 #include "pycore_obmalloc.h"
7 #include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
8 #include "pycore_pymem.h"
9 #include "pycore_pystate.h" // _PyInterpreterState_GET
10 #include "pycore_obmalloc_init.h"
11
12 #include <stdlib.h> // malloc()
13 #include <stdbool.h>
14 #ifdef WITH_MIMALLOC
15 // Forward declarations of functions used in our mimalloc modifications
16 static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
17 static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
18 static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
19 static void _PyMem_mi_page_reclaimed(mi_page_t *page);
20 static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
21 # include "pycore_mimalloc.h"
22 # include "mimalloc/static.c"
23 # include "mimalloc/internal.h" // for stats
24 #endif
25
26 #if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
27 # error "Py_GIL_DISABLED requires WITH_MIMALLOC"
28 #endif
29
30 #undef uint
31 #define uint pymem_uint
32
33
34 /* Defined in tracemalloc.c */
35 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
36
37 static void _PyObject_DebugDumpAddress(const void *p);
38 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
39
40
41 static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
42 static void set_up_debug_hooks_unlocked(void);
43 static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
44 static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
45
46
47 /***************************************/
48 /* low-level allocator implementations */
49 /***************************************/
50
51 /* the default raw allocator (wraps malloc) */
52
53 void *
_PyMem_RawMalloc(void * Py_UNUSED (ctx),size_t size)54 _PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
55 {
56 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
57 for malloc(0), which would be treated as an error. Some platforms would
58 return a pointer with no memory behind it, which would break pymalloc.
59 To solve these problems, allocate an extra byte. */
60 if (size == 0)
61 size = 1;
62 return malloc(size);
63 }
64
65 void *
_PyMem_RawCalloc(void * Py_UNUSED (ctx),size_t nelem,size_t elsize)66 _PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
67 {
68 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
69 for calloc(0, 0), which would be treated as an error. Some platforms
70 would return a pointer with no memory behind it, which would break
71 pymalloc. To solve these problems, allocate an extra byte. */
72 if (nelem == 0 || elsize == 0) {
73 nelem = 1;
74 elsize = 1;
75 }
76 return calloc(nelem, elsize);
77 }
78
79 void *
_PyMem_RawRealloc(void * Py_UNUSED (ctx),void * ptr,size_t size)80 _PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
81 {
82 if (size == 0)
83 size = 1;
84 return realloc(ptr, size);
85 }
86
87 void
_PyMem_RawFree(void * Py_UNUSED (ctx),void * ptr)88 _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
89 {
90 free(ptr);
91 }
92
93 #ifdef WITH_MIMALLOC
94
95 static void
_PyMem_mi_page_clear_qsbr(mi_page_t * page)96 _PyMem_mi_page_clear_qsbr(mi_page_t *page)
97 {
98 #ifdef Py_GIL_DISABLED
99 // Clear the QSBR goal and remove the page from the QSBR linked list.
100 page->qsbr_goal = 0;
101 if (page->qsbr_node.next != NULL) {
102 llist_remove(&page->qsbr_node);
103 }
104 #endif
105 }
106
107 // Check if an empty, newly reclaimed page is safe to free now.
108 static bool
_PyMem_mi_page_is_safe_to_free(mi_page_t * page)109 _PyMem_mi_page_is_safe_to_free(mi_page_t *page)
110 {
111 assert(mi_page_all_free(page));
112 #ifdef Py_GIL_DISABLED
113 assert(page->qsbr_node.next == NULL);
114 if (page->use_qsbr && page->qsbr_goal != 0) {
115 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
116 if (tstate == NULL) {
117 return false;
118 }
119 return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
120 }
121 #endif
122 return true;
123
124 }
125
126 static bool
_PyMem_mi_page_maybe_free(mi_page_t * page,mi_page_queue_t * pq,bool force)127 _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
128 {
129 #ifdef Py_GIL_DISABLED
130 assert(mi_page_all_free(page));
131 if (page->use_qsbr) {
132 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
133 if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
134 _PyMem_mi_page_clear_qsbr(page);
135 _mi_page_free(page, pq, force);
136 return true;
137 }
138
139 _PyMem_mi_page_clear_qsbr(page);
140 page->retire_expire = 0;
141 page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
142 llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
143 return false;
144 }
145 #endif
146 _mi_page_free(page, pq, force);
147 return true;
148 }
149
150 static void
_PyMem_mi_page_reclaimed(mi_page_t * page)151 _PyMem_mi_page_reclaimed(mi_page_t *page)
152 {
153 #ifdef Py_GIL_DISABLED
154 assert(page->qsbr_node.next == NULL);
155 if (page->qsbr_goal != 0) {
156 if (mi_page_all_free(page)) {
157 assert(page->qsbr_node.next == NULL);
158 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
159 page->retire_expire = 0;
160 llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
161 }
162 else {
163 page->qsbr_goal = 0;
164 }
165 }
166 #endif
167 }
168
169 static void
_PyMem_mi_heap_collect_qsbr(mi_heap_t * heap)170 _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
171 {
172 #ifdef Py_GIL_DISABLED
173 if (!heap->page_use_qsbr) {
174 return;
175 }
176
177 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
178 struct llist_node *head = &tstate->mimalloc.page_list;
179 if (llist_empty(head)) {
180 return;
181 }
182
183 struct llist_node *node;
184 llist_for_each_safe(node, head) {
185 mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
186 if (!mi_page_all_free(page)) {
187 // We allocated from this page some point after the delayed free
188 _PyMem_mi_page_clear_qsbr(page);
189 continue;
190 }
191
192 if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
193 return;
194 }
195
196 _PyMem_mi_page_clear_qsbr(page);
197 _mi_page_free(page, mi_page_queue_of(page), false);
198 }
199 #endif
200 }
201
202 void *
_PyMem_MiMalloc(void * ctx,size_t size)203 _PyMem_MiMalloc(void *ctx, size_t size)
204 {
205 #ifdef Py_GIL_DISABLED
206 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
207 mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
208 return mi_heap_malloc(heap, size);
209 #else
210 return mi_malloc(size);
211 #endif
212 }
213
214 void *
_PyMem_MiCalloc(void * ctx,size_t nelem,size_t elsize)215 _PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
216 {
217 #ifdef Py_GIL_DISABLED
218 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
219 mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
220 return mi_heap_calloc(heap, nelem, elsize);
221 #else
222 return mi_calloc(nelem, elsize);
223 #endif
224 }
225
226 void *
_PyMem_MiRealloc(void * ctx,void * ptr,size_t size)227 _PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
228 {
229 #ifdef Py_GIL_DISABLED
230 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
231 mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
232 return mi_heap_realloc(heap, ptr, size);
233 #else
234 return mi_realloc(ptr, size);
235 #endif
236 }
237
238 void
_PyMem_MiFree(void * ctx,void * ptr)239 _PyMem_MiFree(void *ctx, void *ptr)
240 {
241 mi_free(ptr);
242 }
243
244 void *
_PyObject_MiMalloc(void * ctx,size_t nbytes)245 _PyObject_MiMalloc(void *ctx, size_t nbytes)
246 {
247 #ifdef Py_GIL_DISABLED
248 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
249 mi_heap_t *heap = tstate->mimalloc.current_object_heap;
250 return mi_heap_malloc(heap, nbytes);
251 #else
252 return mi_malloc(nbytes);
253 #endif
254 }
255
256 void *
_PyObject_MiCalloc(void * ctx,size_t nelem,size_t elsize)257 _PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
258 {
259 #ifdef Py_GIL_DISABLED
260 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
261 mi_heap_t *heap = tstate->mimalloc.current_object_heap;
262 return mi_heap_calloc(heap, nelem, elsize);
263 #else
264 return mi_calloc(nelem, elsize);
265 #endif
266 }
267
268
269 void *
_PyObject_MiRealloc(void * ctx,void * ptr,size_t nbytes)270 _PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
271 {
272 #ifdef Py_GIL_DISABLED
273 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
274 mi_heap_t *heap = tstate->mimalloc.current_object_heap;
275 return mi_heap_realloc(heap, ptr, nbytes);
276 #else
277 return mi_realloc(ptr, nbytes);
278 #endif
279 }
280
281 void
_PyObject_MiFree(void * ctx,void * ptr)282 _PyObject_MiFree(void *ctx, void *ptr)
283 {
284 mi_free(ptr);
285 }
286
287 #endif // WITH_MIMALLOC
288
289
290 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
291
292
293 #ifdef WITH_MIMALLOC
294 # define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
295 # define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
296 #endif
297
298 /* the pymalloc allocator */
299
300 // The actual implementation is further down.
301
302 #if defined(WITH_PYMALLOC)
303 void* _PyObject_Malloc(void *ctx, size_t size);
304 void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
305 void _PyObject_Free(void *ctx, void *p);
306 void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
307 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
308 #endif // WITH_PYMALLOC
309
310 #if defined(Py_GIL_DISABLED)
311 // Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
312 # define PYRAW_ALLOC MALLOC_ALLOC
313 # define PYMEM_ALLOC MIMALLOC_ALLOC
314 # define PYOBJ_ALLOC MIMALLOC_OBJALLOC
315 #elif defined(WITH_PYMALLOC)
316 # define PYRAW_ALLOC MALLOC_ALLOC
317 # define PYMEM_ALLOC PYMALLOC_ALLOC
318 # define PYOBJ_ALLOC PYMALLOC_ALLOC
319 #else
320 # define PYRAW_ALLOC MALLOC_ALLOC
321 # define PYMEM_ALLOC MALLOC_ALLOC
322 # define PYOBJ_ALLOC MALLOC_ALLOC
323 #endif
324
325
326 /* the default debug allocators */
327
328 // The actual implementation is further down.
329
330 void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
331 void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
332 void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
333 void _PyMem_DebugRawFree(void *ctx, void *ptr);
334
335 void* _PyMem_DebugMalloc(void *ctx, size_t size);
336 void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
337 void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
338 void _PyMem_DebugFree(void *ctx, void *p);
339
340 #define PYDBGRAW_ALLOC \
341 {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
342 #define PYDBGMEM_ALLOC \
343 {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
344 #define PYDBGOBJ_ALLOC \
345 {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
346
347 /* the low-level virtual memory allocator */
348
349 #ifdef WITH_PYMALLOC
350 # ifdef MS_WINDOWS
351 # include <windows.h>
352 # elif defined(HAVE_MMAP)
353 # include <sys/mman.h>
354 # ifdef MAP_ANONYMOUS
355 # define ARENAS_USE_MMAP
356 # endif
357 # endif
358 #endif
359
360 void *
_PyMem_ArenaAlloc(void * Py_UNUSED (ctx),size_t size)361 _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
362 {
363 #ifdef MS_WINDOWS
364 return VirtualAlloc(NULL, size,
365 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
366 #elif defined(ARENAS_USE_MMAP)
367 void *ptr;
368 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
369 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
370 if (ptr == MAP_FAILED)
371 return NULL;
372 assert(ptr != NULL);
373 return ptr;
374 #else
375 return malloc(size);
376 #endif
377 }
378
379 void
_PyMem_ArenaFree(void * Py_UNUSED (ctx),void * ptr,size_t size)380 _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
381 #if defined(ARENAS_USE_MMAP)
382 size_t size
383 #else
384 size_t Py_UNUSED(size)
385 #endif
386 )
387 {
388 #ifdef MS_WINDOWS
389 /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
390 if (ptr == NULL) {
391 return;
392 }
393 VirtualFree(ptr, 0, MEM_RELEASE);
394 #elif defined(ARENAS_USE_MMAP)
395 /* Unlike free(), munmap() does not special-case NULL to noop. */
396 if (ptr == NULL) {
397 return;
398 }
399 munmap(ptr, size);
400 #else
401 free(ptr);
402 #endif
403 }
404
405 /*******************************************/
406 /* end low-level allocator implementations */
407 /*******************************************/
408
409
410 #if defined(__has_feature) /* Clang */
411 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
412 # define _Py_NO_SANITIZE_ADDRESS \
413 __attribute__((no_sanitize("address")))
414 # endif
415 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
416 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
417 # endif
418 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
419 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
420 # endif
421 #elif defined(__GNUC__)
422 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
423 # define _Py_NO_SANITIZE_ADDRESS \
424 __attribute__((no_sanitize_address))
425 # endif
426 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
427 // is provided only since GCC 7.
428 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
429 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
430 # endif
431 #endif
432
433 #ifndef _Py_NO_SANITIZE_ADDRESS
434 # define _Py_NO_SANITIZE_ADDRESS
435 #endif
436 #ifndef _Py_NO_SANITIZE_THREAD
437 # define _Py_NO_SANITIZE_THREAD
438 #endif
439 #ifndef _Py_NO_SANITIZE_MEMORY
440 # define _Py_NO_SANITIZE_MEMORY
441 #endif
442
443
444 #define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
445 #define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
446 #define _PyMem (_PyRuntime.allocators.standard.mem)
447 #define _PyObject (_PyRuntime.allocators.standard.obj)
448 #define _PyMem_Debug (_PyRuntime.allocators.debug)
449 #define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
450
451
452 /***************************/
453 /* managing the allocators */
454 /***************************/
455
456 static int
set_default_allocator_unlocked(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)457 set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
458 PyMemAllocatorEx *old_alloc)
459 {
460 if (old_alloc != NULL) {
461 get_allocator_unlocked(domain, old_alloc);
462 }
463
464
465 PyMemAllocatorEx new_alloc;
466 switch(domain)
467 {
468 case PYMEM_DOMAIN_RAW:
469 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
470 break;
471 case PYMEM_DOMAIN_MEM:
472 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
473 break;
474 case PYMEM_DOMAIN_OBJ:
475 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
476 break;
477 default:
478 /* unknown domain */
479 return -1;
480 }
481 set_allocator_unlocked(domain, &new_alloc);
482 if (debug) {
483 set_up_debug_hooks_domain_unlocked(domain);
484 }
485 return 0;
486 }
487
488
489 #ifdef Py_DEBUG
490 static const int pydebug = 1;
491 #else
492 static const int pydebug = 0;
493 #endif
494
495 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)496 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
497 PyMemAllocatorEx *old_alloc)
498 {
499 PyMutex_Lock(&ALLOCATORS_MUTEX);
500 int res = set_default_allocator_unlocked(domain, pydebug, old_alloc);
501 PyMutex_Unlock(&ALLOCATORS_MUTEX);
502 return res;
503 }
504
505
506 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)507 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
508 {
509 if (name == NULL || *name == '\0') {
510 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
511 nameions): use default memory allocators */
512 *allocator = PYMEM_ALLOCATOR_DEFAULT;
513 }
514 else if (strcmp(name, "default") == 0) {
515 *allocator = PYMEM_ALLOCATOR_DEFAULT;
516 }
517 else if (strcmp(name, "debug") == 0) {
518 *allocator = PYMEM_ALLOCATOR_DEBUG;
519 }
520 #if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
521 else if (strcmp(name, "pymalloc") == 0) {
522 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
523 }
524 else if (strcmp(name, "pymalloc_debug") == 0) {
525 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
526 }
527 #endif
528 #ifdef WITH_MIMALLOC
529 else if (strcmp(name, "mimalloc") == 0) {
530 *allocator = PYMEM_ALLOCATOR_MIMALLOC;
531 }
532 else if (strcmp(name, "mimalloc_debug") == 0) {
533 *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
534 }
535 #endif
536 #ifndef Py_GIL_DISABLED
537 else if (strcmp(name, "malloc") == 0) {
538 *allocator = PYMEM_ALLOCATOR_MALLOC;
539 }
540 else if (strcmp(name, "malloc_debug") == 0) {
541 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
542 }
543 #endif
544 else {
545 /* unknown allocator */
546 return -1;
547 }
548 return 0;
549 }
550
551
552 static int
set_up_allocators_unlocked(PyMemAllocatorName allocator)553 set_up_allocators_unlocked(PyMemAllocatorName allocator)
554 {
555 switch (allocator) {
556 case PYMEM_ALLOCATOR_NOT_SET:
557 /* do nothing */
558 break;
559
560 case PYMEM_ALLOCATOR_DEFAULT:
561 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
562 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
563 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
564 _PyRuntime.allocators.is_debug_enabled = pydebug;
565 break;
566
567 case PYMEM_ALLOCATOR_DEBUG:
568 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
569 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
570 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
571 _PyRuntime.allocators.is_debug_enabled = 1;
572 break;
573
574 #ifdef WITH_PYMALLOC
575 case PYMEM_ALLOCATOR_PYMALLOC:
576 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
577 {
578 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
579 set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
580
581 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
582 set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
583 set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
584
585 int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
586 _PyRuntime.allocators.is_debug_enabled = is_debug;
587 if (is_debug) {
588 set_up_debug_hooks_unlocked();
589 }
590 break;
591 }
592 #endif
593 #ifdef WITH_MIMALLOC
594 case PYMEM_ALLOCATOR_MIMALLOC:
595 case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
596 {
597 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
598 set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
599
600 PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
601 set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
602
603 PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
604 set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
605
606 int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
607 _PyRuntime.allocators.is_debug_enabled = is_debug;
608 if (is_debug) {
609 set_up_debug_hooks_unlocked();
610 }
611
612 break;
613 }
614 #endif
615
616 case PYMEM_ALLOCATOR_MALLOC:
617 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
618 {
619 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
620 set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
621 set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
622 set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
623
624 int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
625 _PyRuntime.allocators.is_debug_enabled = is_debug;
626 if (is_debug) {
627 set_up_debug_hooks_unlocked();
628 }
629 break;
630 }
631
632 default:
633 /* unknown allocator */
634 return -1;
635 }
636
637 return 0;
638 }
639
640 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)641 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
642 {
643 PyMutex_Lock(&ALLOCATORS_MUTEX);
644 int res = set_up_allocators_unlocked(allocator);
645 PyMutex_Unlock(&ALLOCATORS_MUTEX);
646 return res;
647 }
648
649
650 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)651 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
652 {
653 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
654 }
655
656
657 static const char*
get_current_allocator_name_unlocked(void)658 get_current_allocator_name_unlocked(void)
659 {
660 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
661 #ifdef WITH_PYMALLOC
662 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
663 #endif
664 #ifdef WITH_MIMALLOC
665 PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
666 PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
667 #endif
668
669 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
670 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
671 pymemallocator_eq(&_PyObject, &malloc_alloc))
672 {
673 return "malloc";
674 }
675 #ifdef WITH_PYMALLOC
676 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
677 pymemallocator_eq(&_PyMem, &pymalloc) &&
678 pymemallocator_eq(&_PyObject, &pymalloc))
679 {
680 return "pymalloc";
681 }
682 #endif
683 #ifdef WITH_MIMALLOC
684 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
685 pymemallocator_eq(&_PyMem, &mimalloc) &&
686 pymemallocator_eq(&_PyObject, &mimalloc_obj))
687 {
688 return "mimalloc";
689 }
690 #endif
691
692 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
693 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
694 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
695
696 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
697 pymemallocator_eq(&_PyMem, &dbg_mem) &&
698 pymemallocator_eq(&_PyObject, &dbg_obj))
699 {
700 /* Debug hooks installed */
701 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
702 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
703 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
704 {
705 return "malloc_debug";
706 }
707 #ifdef WITH_PYMALLOC
708 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
709 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
710 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
711 {
712 return "pymalloc_debug";
713 }
714 #endif
715 #ifdef WITH_MIMALLOC
716 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
717 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
718 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
719 {
720 return "mimalloc_debug";
721 }
722 #endif
723 }
724 return NULL;
725 }
726
727 const char*
_PyMem_GetCurrentAllocatorName(void)728 _PyMem_GetCurrentAllocatorName(void)
729 {
730 PyMutex_Lock(&ALLOCATORS_MUTEX);
731 const char *name = get_current_allocator_name_unlocked();
732 PyMutex_Unlock(&ALLOCATORS_MUTEX);
733 return name;
734 }
735
736
737 int
_PyMem_DebugEnabled(void)738 _PyMem_DebugEnabled(void)
739 {
740 return _PyRuntime.allocators.is_debug_enabled;
741 }
742
743 #ifdef WITH_PYMALLOC
744 static int
_PyMem_PymallocEnabled(void)745 _PyMem_PymallocEnabled(void)
746 {
747 if (_PyMem_DebugEnabled()) {
748 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
749 }
750 else {
751 return (_PyObject.malloc == _PyObject_Malloc);
752 }
753 }
754
755 #ifdef WITH_MIMALLOC
756 static int
_PyMem_MimallocEnabled(void)757 _PyMem_MimallocEnabled(void)
758 {
759 #ifdef Py_GIL_DISABLED
760 return 1;
761 #else
762 if (_PyMem_DebugEnabled()) {
763 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
764 }
765 else {
766 return (_PyObject.malloc == _PyObject_MiMalloc);
767 }
768 #endif
769 }
770 #endif // WITH_MIMALLOC
771
772 #endif // WITH_PYMALLOC
773
774
775 static void
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)776 set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
777 {
778 PyMemAllocatorEx alloc;
779
780 if (domain == PYMEM_DOMAIN_RAW) {
781 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
782 return;
783 }
784
785 get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
786 alloc.ctx = &_PyMem_Debug.raw;
787 alloc.malloc = _PyMem_DebugRawMalloc;
788 alloc.calloc = _PyMem_DebugRawCalloc;
789 alloc.realloc = _PyMem_DebugRawRealloc;
790 alloc.free = _PyMem_DebugRawFree;
791 set_allocator_unlocked(domain, &alloc);
792 }
793 else if (domain == PYMEM_DOMAIN_MEM) {
794 if (_PyMem.malloc == _PyMem_DebugMalloc) {
795 return;
796 }
797
798 get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
799 alloc.ctx = &_PyMem_Debug.mem;
800 alloc.malloc = _PyMem_DebugMalloc;
801 alloc.calloc = _PyMem_DebugCalloc;
802 alloc.realloc = _PyMem_DebugRealloc;
803 alloc.free = _PyMem_DebugFree;
804 set_allocator_unlocked(domain, &alloc);
805 }
806 else if (domain == PYMEM_DOMAIN_OBJ) {
807 if (_PyObject.malloc == _PyMem_DebugMalloc) {
808 return;
809 }
810
811 get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
812 alloc.ctx = &_PyMem_Debug.obj;
813 alloc.malloc = _PyMem_DebugMalloc;
814 alloc.calloc = _PyMem_DebugCalloc;
815 alloc.realloc = _PyMem_DebugRealloc;
816 alloc.free = _PyMem_DebugFree;
817 set_allocator_unlocked(domain, &alloc);
818 }
819 }
820
821
822 static void
set_up_debug_hooks_unlocked(void)823 set_up_debug_hooks_unlocked(void)
824 {
825 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
826 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
827 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
828 _PyRuntime.allocators.is_debug_enabled = 1;
829 }
830
831 void
PyMem_SetupDebugHooks(void)832 PyMem_SetupDebugHooks(void)
833 {
834 PyMutex_Lock(&ALLOCATORS_MUTEX);
835 set_up_debug_hooks_unlocked();
836 PyMutex_Unlock(&ALLOCATORS_MUTEX);
837 }
838
839 static void
get_allocator_unlocked(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)840 get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
841 {
842 switch(domain)
843 {
844 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
845 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
846 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
847 default:
848 /* unknown domain: set all attributes to NULL */
849 allocator->ctx = NULL;
850 allocator->malloc = NULL;
851 allocator->calloc = NULL;
852 allocator->realloc = NULL;
853 allocator->free = NULL;
854 }
855 }
856
857 static void
set_allocator_unlocked(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)858 set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
859 {
860 switch(domain)
861 {
862 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
863 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
864 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
865 /* ignore unknown domain */
866 }
867 }
868
869 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)870 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
871 {
872 PyMutex_Lock(&ALLOCATORS_MUTEX);
873 get_allocator_unlocked(domain, allocator);
874 PyMutex_Unlock(&ALLOCATORS_MUTEX);
875 }
876
877 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)878 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
879 {
880 PyMutex_Lock(&ALLOCATORS_MUTEX);
881 set_allocator_unlocked(domain, allocator);
882 PyMutex_Unlock(&ALLOCATORS_MUTEX);
883 }
884
885 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)886 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
887 {
888 PyMutex_Lock(&ALLOCATORS_MUTEX);
889 *allocator = _PyObject_Arena;
890 PyMutex_Unlock(&ALLOCATORS_MUTEX);
891 }
892
893 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)894 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
895 {
896 PyMutex_Lock(&ALLOCATORS_MUTEX);
897 _PyObject_Arena = *allocator;
898 PyMutex_Unlock(&ALLOCATORS_MUTEX);
899 }
900
901
902 /* Note that there is a possible, but very unlikely, race in any place
903 * below where we call one of the allocator functions. We access two
904 * fields in each case: "malloc", etc. and "ctx".
905 *
906 * It is unlikely that the allocator will be changed while one of those
907 * calls is happening, much less in that very narrow window.
908 * Furthermore, the likelihood of a race is drastically reduced by the
909 * fact that the allocator may not be changed after runtime init
910 * (except with a wrapper).
911 *
912 * With the above in mind, we currently don't worry about locking
913 * around these uses of the runtime-global allocators state. */
914
915
916 /*************************/
917 /* the "arena" allocator */
918 /*************************/
919
920 void *
_PyObject_VirtualAlloc(size_t size)921 _PyObject_VirtualAlloc(size_t size)
922 {
923 return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
924 }
925
926 void
_PyObject_VirtualFree(void * obj,size_t size)927 _PyObject_VirtualFree(void *obj, size_t size)
928 {
929 _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
930 }
931
932
933 /***********************/
934 /* the "raw" allocator */
935 /***********************/
936
937 void *
PyMem_RawMalloc(size_t size)938 PyMem_RawMalloc(size_t size)
939 {
940 /*
941 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
942 * Most python internals blindly use a signed Py_ssize_t to track
943 * things without checking for overflows or negatives.
944 * As size_t is unsigned, checking for size < 0 is not required.
945 */
946 if (size > (size_t)PY_SSIZE_T_MAX)
947 return NULL;
948 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
949 }
950
951 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)952 PyMem_RawCalloc(size_t nelem, size_t elsize)
953 {
954 /* see PyMem_RawMalloc() */
955 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
956 return NULL;
957 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
958 }
959
960 void*
PyMem_RawRealloc(void * ptr,size_t new_size)961 PyMem_RawRealloc(void *ptr, size_t new_size)
962 {
963 /* see PyMem_RawMalloc() */
964 if (new_size > (size_t)PY_SSIZE_T_MAX)
965 return NULL;
966 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
967 }
968
PyMem_RawFree(void * ptr)969 void PyMem_RawFree(void *ptr)
970 {
971 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
972 }
973
974
975 /***********************/
976 /* the "mem" allocator */
977 /***********************/
978
979 void *
PyMem_Malloc(size_t size)980 PyMem_Malloc(size_t size)
981 {
982 /* see PyMem_RawMalloc() */
983 if (size > (size_t)PY_SSIZE_T_MAX)
984 return NULL;
985 OBJECT_STAT_INC_COND(allocations512, size < 512);
986 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
987 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
988 OBJECT_STAT_INC(allocations);
989 return _PyMem.malloc(_PyMem.ctx, size);
990 }
991
992 void *
PyMem_Calloc(size_t nelem,size_t elsize)993 PyMem_Calloc(size_t nelem, size_t elsize)
994 {
995 /* see PyMem_RawMalloc() */
996 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
997 return NULL;
998 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
999 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1000 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1001 OBJECT_STAT_INC(allocations);
1002 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1003 }
1004
1005 void *
PyMem_Realloc(void * ptr,size_t new_size)1006 PyMem_Realloc(void *ptr, size_t new_size)
1007 {
1008 /* see PyMem_RawMalloc() */
1009 if (new_size > (size_t)PY_SSIZE_T_MAX)
1010 return NULL;
1011 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1012 }
1013
1014 void
PyMem_Free(void * ptr)1015 PyMem_Free(void *ptr)
1016 {
1017 OBJECT_STAT_INC(frees);
1018 _PyMem.free(_PyMem.ctx, ptr);
1019 }
1020
1021
1022 /***************************/
1023 /* pymem utility functions */
1024 /***************************/
1025
1026 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)1027 _PyMem_RawWcsdup(const wchar_t *str)
1028 {
1029 assert(str != NULL);
1030
1031 size_t len = wcslen(str);
1032 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1033 return NULL;
1034 }
1035
1036 size_t size = (len + 1) * sizeof(wchar_t);
1037 wchar_t *str2 = PyMem_RawMalloc(size);
1038 if (str2 == NULL) {
1039 return NULL;
1040 }
1041
1042 memcpy(str2, str, size);
1043 return str2;
1044 }
1045
1046 char *
_PyMem_RawStrdup(const char * str)1047 _PyMem_RawStrdup(const char *str)
1048 {
1049 assert(str != NULL);
1050 size_t size = strlen(str) + 1;
1051 char *copy = PyMem_RawMalloc(size);
1052 if (copy == NULL) {
1053 return NULL;
1054 }
1055 memcpy(copy, str, size);
1056 return copy;
1057 }
1058
1059 char *
_PyMem_Strdup(const char * str)1060 _PyMem_Strdup(const char *str)
1061 {
1062 assert(str != NULL);
1063 size_t size = strlen(str) + 1;
1064 char *copy = PyMem_Malloc(size);
1065 if (copy == NULL) {
1066 return NULL;
1067 }
1068 memcpy(copy, str, size);
1069 return copy;
1070 }
1071
1072 /***********************************************/
1073 /* Delayed freeing support for Py_GIL_DISABLED */
1074 /***********************************************/
1075
1076 // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1077 #define WORK_ITEMS_PER_CHUNK 254
1078
1079 // A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1080 struct _mem_work_item {
1081 uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1082 uint64_t qsbr_goal;
1083 };
1084
1085 // A fixed-size buffer of pointers to be freed
1086 struct _mem_work_chunk {
1087 // Linked list node of chunks in queue
1088 struct llist_node node;
1089
1090 Py_ssize_t rd_idx; // index of next item to read
1091 Py_ssize_t wr_idx; // index of next item to write
1092 struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1093 };
1094
1095 static void
free_work_item(uintptr_t ptr)1096 free_work_item(uintptr_t ptr)
1097 {
1098 if (ptr & 0x01) {
1099 PyObject_Free((char *)(ptr - 1));
1100 }
1101 else {
1102 PyMem_Free((void *)ptr);
1103 }
1104 }
1105
1106 static void
free_delayed(uintptr_t ptr)1107 free_delayed(uintptr_t ptr)
1108 {
1109 #ifndef Py_GIL_DISABLED
1110 free_work_item(ptr);
1111 #else
1112 if (_PyRuntime.stoptheworld.world_stopped) {
1113 // Free immediately if the world is stopped, including during
1114 // interpreter shutdown.
1115 free_work_item(ptr);
1116 return;
1117 }
1118
1119 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1120 struct llist_node *head = &tstate->mem_free_queue;
1121
1122 struct _mem_work_chunk *buf = NULL;
1123 if (!llist_empty(head)) {
1124 // Try to re-use the last buffer
1125 buf = llist_data(head->prev, struct _mem_work_chunk, node);
1126 if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1127 // already full
1128 buf = NULL;
1129 }
1130 }
1131
1132 if (buf == NULL) {
1133 buf = PyMem_Calloc(1, sizeof(*buf));
1134 if (buf != NULL) {
1135 llist_insert_tail(head, &buf->node);
1136 }
1137 }
1138
1139 if (buf == NULL) {
1140 // failed to allocate a buffer, free immediately
1141 _PyEval_StopTheWorld(tstate->base.interp);
1142 free_work_item(ptr);
1143 _PyEval_StartTheWorld(tstate->base.interp);
1144 return;
1145 }
1146
1147 assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1148 uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr);
1149 buf->array[buf->wr_idx].ptr = ptr;
1150 buf->array[buf->wr_idx].qsbr_goal = seq;
1151 buf->wr_idx++;
1152
1153 if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1154 _PyMem_ProcessDelayed((PyThreadState *)tstate);
1155 }
1156 #endif
1157 }
1158
1159 void
_PyMem_FreeDelayed(void * ptr)1160 _PyMem_FreeDelayed(void *ptr)
1161 {
1162 assert(!((uintptr_t)ptr & 0x01));
1163 free_delayed((uintptr_t)ptr);
1164 }
1165
1166 void
_PyObject_FreeDelayed(void * ptr)1167 _PyObject_FreeDelayed(void *ptr)
1168 {
1169 assert(!((uintptr_t)ptr & 0x01));
1170 free_delayed(((uintptr_t)ptr)|0x01);
1171 }
1172
1173 static struct _mem_work_chunk *
work_queue_first(struct llist_node * head)1174 work_queue_first(struct llist_node *head)
1175 {
1176 return llist_data(head->next, struct _mem_work_chunk, node);
1177 }
1178
1179 static void
process_queue(struct llist_node * head,struct _qsbr_thread_state * qsbr,bool keep_empty)1180 process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
1181 bool keep_empty)
1182 {
1183 while (!llist_empty(head)) {
1184 struct _mem_work_chunk *buf = work_queue_first(head);
1185
1186 while (buf->rd_idx < buf->wr_idx) {
1187 struct _mem_work_item *item = &buf->array[buf->rd_idx];
1188 if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) {
1189 return;
1190 }
1191
1192 free_work_item(item->ptr);
1193 buf->rd_idx++;
1194 }
1195
1196 assert(buf->rd_idx == buf->wr_idx);
1197 if (keep_empty && buf->node.next == head) {
1198 // Keep the last buffer in the queue to reduce re-allocations
1199 buf->rd_idx = buf->wr_idx = 0;
1200 return;
1201 }
1202
1203 llist_remove(&buf->node);
1204 PyMem_Free(buf);
1205 }
1206 }
1207
1208 static void
process_interp_queue(struct _Py_mem_interp_free_queue * queue,struct _qsbr_thread_state * qsbr)1209 process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1210 struct _qsbr_thread_state *qsbr)
1211 {
1212 if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1213 return;
1214 }
1215
1216 // Try to acquire the lock, but don't block if it's already held.
1217 if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1218 process_queue(&queue->head, qsbr, false);
1219
1220 int more_work = !llist_empty(&queue->head);
1221 _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1222
1223 PyMutex_Unlock(&queue->mutex);
1224 }
1225 }
1226
1227 void
_PyMem_ProcessDelayed(PyThreadState * tstate)1228 _PyMem_ProcessDelayed(PyThreadState *tstate)
1229 {
1230 PyInterpreterState *interp = tstate->interp;
1231 _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1232
1233 // Process thread-local work
1234 process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
1235
1236 // Process shared interpreter work
1237 process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
1238 }
1239
1240 void
_PyMem_AbandonDelayed(PyThreadState * tstate)1241 _PyMem_AbandonDelayed(PyThreadState *tstate)
1242 {
1243 PyInterpreterState *interp = tstate->interp;
1244 struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1245
1246 if (llist_empty(queue)) {
1247 return;
1248 }
1249
1250 // Check if the queue contains one empty buffer
1251 struct _mem_work_chunk *buf = work_queue_first(queue);
1252 if (buf->rd_idx == buf->wr_idx) {
1253 llist_remove(&buf->node);
1254 PyMem_Free(buf);
1255 assert(llist_empty(queue));
1256 return;
1257 }
1258
1259 // Merge the thread's work queue into the interpreter's work queue.
1260 PyMutex_Lock(&interp->mem_free_queue.mutex);
1261 llist_concat(&interp->mem_free_queue.head, queue);
1262 _Py_atomic_store_int_relaxed(&interp->mem_free_queue.has_work, 1);
1263 PyMutex_Unlock(&interp->mem_free_queue.mutex);
1264
1265 assert(llist_empty(queue)); // the thread's queue is now empty
1266 }
1267
1268 void
_PyMem_FiniDelayed(PyInterpreterState * interp)1269 _PyMem_FiniDelayed(PyInterpreterState *interp)
1270 {
1271 struct llist_node *head = &interp->mem_free_queue.head;
1272 while (!llist_empty(head)) {
1273 struct _mem_work_chunk *buf = work_queue_first(head);
1274
1275 while (buf->rd_idx < buf->wr_idx) {
1276 // Free the remaining items immediately. There should be no other
1277 // threads accessing the memory at this point during shutdown.
1278 struct _mem_work_item *item = &buf->array[buf->rd_idx];
1279 free_work_item(item->ptr);
1280 buf->rd_idx++;
1281 }
1282
1283 llist_remove(&buf->node);
1284 PyMem_Free(buf);
1285 }
1286 }
1287
1288 /**************************/
1289 /* the "object" allocator */
1290 /**************************/
1291
1292 void *
PyObject_Malloc(size_t size)1293 PyObject_Malloc(size_t size)
1294 {
1295 /* see PyMem_RawMalloc() */
1296 if (size > (size_t)PY_SSIZE_T_MAX)
1297 return NULL;
1298 OBJECT_STAT_INC_COND(allocations512, size < 512);
1299 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1300 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1301 OBJECT_STAT_INC(allocations);
1302 return _PyObject.malloc(_PyObject.ctx, size);
1303 }
1304
1305 void *
PyObject_Calloc(size_t nelem,size_t elsize)1306 PyObject_Calloc(size_t nelem, size_t elsize)
1307 {
1308 /* see PyMem_RawMalloc() */
1309 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1310 return NULL;
1311 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1312 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1313 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1314 OBJECT_STAT_INC(allocations);
1315 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1316 }
1317
1318 void *
PyObject_Realloc(void * ptr,size_t new_size)1319 PyObject_Realloc(void *ptr, size_t new_size)
1320 {
1321 /* see PyMem_RawMalloc() */
1322 if (new_size > (size_t)PY_SSIZE_T_MAX)
1323 return NULL;
1324 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1325 }
1326
1327 void
PyObject_Free(void * ptr)1328 PyObject_Free(void *ptr)
1329 {
1330 OBJECT_STAT_INC(frees);
1331 _PyObject.free(_PyObject.ctx, ptr);
1332 }
1333
1334
1335 /* If we're using GCC, use __builtin_expect() to reduce overhead of
1336 the valgrind checks */
1337 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
1338 # define UNLIKELY(value) __builtin_expect((value), 0)
1339 # define LIKELY(value) __builtin_expect((value), 1)
1340 #else
1341 # define UNLIKELY(value) (value)
1342 # define LIKELY(value) (value)
1343 #endif
1344
1345 #ifdef WITH_PYMALLOC
1346
1347 #ifdef WITH_VALGRIND
1348 #include <valgrind/valgrind.h>
1349
1350 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
1351 static int running_on_valgrind = -1;
1352 #endif
1353
1354 typedef struct _obmalloc_state OMState;
1355
1356 /* obmalloc state for main interpreter and shared by all interpreters without
1357 * their own obmalloc state. By not explicitly initalizing this structure, it
1358 * will be allocated in the BSS which is a small performance win. The radix
1359 * tree arrays are fairly large but are sparsely used. */
1360 static struct _obmalloc_state obmalloc_state_main;
1361 static bool obmalloc_state_initialized;
1362
1363 static inline int
has_own_state(PyInterpreterState * interp)1364 has_own_state(PyInterpreterState *interp)
1365 {
1366 return (_Py_IsMainInterpreter(interp) ||
1367 !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1368 _Py_IsMainInterpreterFinalizing(interp));
1369 }
1370
1371 static inline OMState *
get_state(void)1372 get_state(void)
1373 {
1374 PyInterpreterState *interp = _PyInterpreterState_GET();
1375 assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1376 return interp->obmalloc;
1377 }
1378
1379 // These macros all rely on a local "state" variable.
1380 #define usedpools (state->pools.used)
1381 #define allarenas (state->mgmt.arenas)
1382 #define maxarenas (state->mgmt.maxarenas)
1383 #define unused_arena_objects (state->mgmt.unused_arena_objects)
1384 #define usable_arenas (state->mgmt.usable_arenas)
1385 #define nfp2lasta (state->mgmt.nfp2lasta)
1386 #define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1387 #define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1388 #define narenas_highwater (state->mgmt.narenas_highwater)
1389 #define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1390
1391 #ifdef WITH_MIMALLOC
count_blocks(const mi_heap_t * heap,const mi_heap_area_t * area,void * block,size_t block_size,void * allocated_blocks)1392 static bool count_blocks(
1393 const mi_heap_t* heap, const mi_heap_area_t* area,
1394 void* block, size_t block_size, void* allocated_blocks)
1395 {
1396 *(size_t *)allocated_blocks += area->used;
1397 return 1;
1398 }
1399
1400 static Py_ssize_t
get_mimalloc_allocated_blocks(PyInterpreterState * interp)1401 get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1402 {
1403 size_t allocated_blocks = 0;
1404 #ifdef Py_GIL_DISABLED
1405 for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
1406 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1407 for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1408 mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1409 mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1410 }
1411 }
1412
1413 mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1414 for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1415 _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1416 &allocated_blocks);
1417 }
1418 #else
1419 // TODO(sgross): this only counts the current thread's blocks.
1420 mi_heap_t *heap = mi_heap_get_default();
1421 mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1422 #endif
1423 return allocated_blocks;
1424 }
1425 #endif
1426
1427 Py_ssize_t
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState * interp)1428 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1429 {
1430 #ifdef WITH_MIMALLOC
1431 if (_PyMem_MimallocEnabled()) {
1432 return get_mimalloc_allocated_blocks(interp);
1433 }
1434 #endif
1435
1436 #ifdef Py_DEBUG
1437 assert(has_own_state(interp));
1438 #else
1439 if (!has_own_state(interp)) {
1440 _Py_FatalErrorFunc(__func__,
1441 "the interpreter doesn't have its own allocator");
1442 }
1443 #endif
1444 OMState *state = interp->obmalloc;
1445
1446 if (state == NULL) {
1447 return 0;
1448 }
1449
1450 Py_ssize_t n = raw_allocated_blocks;
1451 /* add up allocated blocks for used pools */
1452 for (uint i = 0; i < maxarenas; ++i) {
1453 /* Skip arenas which are not allocated. */
1454 if (allarenas[i].address == 0) {
1455 continue;
1456 }
1457
1458 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1459
1460 /* visit every pool in the arena */
1461 assert(base <= (uintptr_t) allarenas[i].pool_address);
1462 for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1463 poolp p = (poolp)base;
1464 n += p->ref.count;
1465 }
1466 }
1467 return n;
1468 }
1469
1470 static void free_obmalloc_arenas(PyInterpreterState *interp);
1471
1472 void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState * interp)1473 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1474 {
1475 #ifdef WITH_MIMALLOC
1476 if (_PyMem_MimallocEnabled()) {
1477 return;
1478 }
1479 #endif
1480 if (has_own_state(interp) && interp->obmalloc != NULL) {
1481 Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1482 assert(has_own_state(interp) || leaked == 0);
1483 interp->runtime->obmalloc.interpreter_leaks += leaked;
1484 if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1485 // free the obmalloc arenas and radix tree nodes. If leaked > 0
1486 // then some of the memory allocated by obmalloc has not been
1487 // freed. It might be safe to free the arenas in that case but
1488 // it's possible that extension modules are still using that
1489 // memory. So, it is safer to not free and to leak. Perhaps there
1490 // should be warning when this happens. It should be possible to
1491 // use a tool like "-fsanitize=address" to track down these leaks.
1492 free_obmalloc_arenas(interp);
1493 }
1494 }
1495 }
1496
1497 static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1498
1499 /* We preserve the number of blocks leaked during runtime finalization,
1500 so they can be reported if the runtime is initialized again. */
1501 // XXX We don't lose any information by dropping this,
1502 // so we should consider doing so.
1503 static Py_ssize_t last_final_leaks = 0;
1504
1505 void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState * runtime)1506 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1507 {
1508 last_final_leaks = get_num_global_allocated_blocks(runtime);
1509 runtime->obmalloc.interpreter_leaks = 0;
1510 }
1511
1512 static Py_ssize_t
get_num_global_allocated_blocks(_PyRuntimeState * runtime)1513 get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1514 {
1515 Py_ssize_t total = 0;
1516 if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1517 PyInterpreterState *interp = _PyInterpreterState_Main();
1518 if (interp == NULL) {
1519 /* We are at the very end of runtime finalization.
1520 We can't rely on finalizing->interp since that thread
1521 state is probably already freed, so we don't worry
1522 about it. */
1523 assert(PyInterpreterState_Head() == NULL);
1524 }
1525 else {
1526 assert(interp != NULL);
1527 /* It is probably the last interpreter but not necessarily. */
1528 assert(PyInterpreterState_Next(interp) == NULL);
1529 total += _PyInterpreterState_GetAllocatedBlocks(interp);
1530 }
1531 }
1532 else {
1533 _PyEval_StopTheWorldAll(&_PyRuntime);
1534 HEAD_LOCK(runtime);
1535 PyInterpreterState *interp = PyInterpreterState_Head();
1536 assert(interp != NULL);
1537 #ifdef Py_DEBUG
1538 int got_main = 0;
1539 #endif
1540 for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1541 #ifdef Py_DEBUG
1542 if (_Py_IsMainInterpreter(interp)) {
1543 assert(!got_main);
1544 got_main = 1;
1545 assert(has_own_state(interp));
1546 }
1547 #endif
1548 if (has_own_state(interp)) {
1549 total += _PyInterpreterState_GetAllocatedBlocks(interp);
1550 }
1551 }
1552 HEAD_UNLOCK(runtime);
1553 _PyEval_StartTheWorldAll(&_PyRuntime);
1554 #ifdef Py_DEBUG
1555 assert(got_main);
1556 #endif
1557 }
1558 total += runtime->obmalloc.interpreter_leaks;
1559 total += last_final_leaks;
1560 return total;
1561 }
1562
1563 Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)1564 _Py_GetGlobalAllocatedBlocks(void)
1565 {
1566 return get_num_global_allocated_blocks(&_PyRuntime);
1567 }
1568
1569 #if WITH_PYMALLOC_RADIX_TREE
1570 /*==========================================================================*/
1571 /* radix tree for tracking arena usage. */
1572
1573 #define arena_map_root (state->usage.arena_map_root)
1574 #ifdef USE_INTERIOR_NODES
1575 #define arena_map_mid_count (state->usage.arena_map_mid_count)
1576 #define arena_map_bot_count (state->usage.arena_map_bot_count)
1577 #endif
1578
1579 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1580 * it cannot be created */
1581 static inline Py_ALWAYS_INLINE arena_map_bot_t *
arena_map_get(OMState * state,pymem_block * p,int create)1582 arena_map_get(OMState *state, pymem_block *p, int create)
1583 {
1584 #ifdef USE_INTERIOR_NODES
1585 /* sanity check that IGNORE_BITS is correct */
1586 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1587 int i1 = MAP_TOP_INDEX(p);
1588 if (arena_map_root.ptrs[i1] == NULL) {
1589 if (!create) {
1590 return NULL;
1591 }
1592 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1593 if (n == NULL) {
1594 return NULL;
1595 }
1596 arena_map_root.ptrs[i1] = n;
1597 arena_map_mid_count++;
1598 }
1599 int i2 = MAP_MID_INDEX(p);
1600 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1601 if (!create) {
1602 return NULL;
1603 }
1604 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1605 if (n == NULL) {
1606 return NULL;
1607 }
1608 arena_map_root.ptrs[i1]->ptrs[i2] = n;
1609 arena_map_bot_count++;
1610 }
1611 return arena_map_root.ptrs[i1]->ptrs[i2];
1612 #else
1613 return &arena_map_root;
1614 #endif
1615 }
1616
1617
1618 /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
1619 * away 24 bits of the address. That reduces the space requirement of
1620 * the tree compared to similar radix tree page-map schemes. In
1621 * exchange for slashing the space requirement, it needs more
1622 * computation to check an address.
1623 *
1624 * Tracking coverage is done by "ideal" arena address. It is easier to
1625 * explain in decimal so let's say that the arena size is 100 bytes.
1626 * Then, ideal addresses are 100, 200, 300, etc. For checking if a
1627 * pointer address is inside an actual arena, we have to check two ideal
1628 * arena addresses. E.g. if pointer is 357, we need to check 200 and
1629 * 300. In the rare case that an arena is aligned in the ideal way
1630 * (e.g. base address of arena is 200) then we only have to check one
1631 * ideal address.
1632 *
1633 * The tree nodes for 200 and 300 both store the address of arena.
1634 * There are two cases: the arena starts at a lower ideal arena and
1635 * extends to this one, or the arena starts in this arena and extends to
1636 * the next ideal arena. The tail_lo and tail_hi members correspond to
1637 * these two cases.
1638 */
1639
1640
1641 /* mark or unmark addresses covered by arena */
1642 static int
arena_map_mark_used(OMState * state,uintptr_t arena_base,int is_used)1643 arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1644 {
1645 /* sanity check that IGNORE_BITS is correct */
1646 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1647 arena_map_bot_t *n_hi = arena_map_get(
1648 state, (pymem_block *)arena_base, is_used);
1649 if (n_hi == NULL) {
1650 assert(is_used); /* otherwise node should already exist */
1651 return 0; /* failed to allocate space for node */
1652 }
1653 int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1654 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1655 if (tail == 0) {
1656 /* is ideal arena address */
1657 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1658 }
1659 else {
1660 /* arena_base address is not ideal (aligned to arena size) and
1661 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
1662 * for the next arena. Note that it might be in different MAP_TOP
1663 * and MAP_MID nodes as well so we need to call arena_map_get()
1664 * again (do the full tree traversal).
1665 */
1666 n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1667 uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1668 /* If arena_base is a legit arena address, so is arena_base_next - 1
1669 * (last address in arena). If arena_base_next overflows then it
1670 * must overflow to 0. However, that would mean arena_base was
1671 * "ideal" and we should not be in this case. */
1672 assert(arena_base < arena_base_next);
1673 arena_map_bot_t *n_lo = arena_map_get(
1674 state, (pymem_block *)arena_base_next, is_used);
1675 if (n_lo == NULL) {
1676 assert(is_used); /* otherwise should already exist */
1677 n_hi->arenas[i3].tail_hi = 0;
1678 return 0; /* failed to allocate space for node */
1679 }
1680 int i3_next = MAP_BOT_INDEX(arena_base_next);
1681 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1682 }
1683 return 1;
1684 }
1685
1686 /* Return true if 'p' is a pointer inside an obmalloc arena.
1687 * _PyObject_Free() calls this so it needs to be very fast. */
1688 static int
arena_map_is_used(OMState * state,pymem_block * p)1689 arena_map_is_used(OMState *state, pymem_block *p)
1690 {
1691 arena_map_bot_t *n = arena_map_get(state, p, 0);
1692 if (n == NULL) {
1693 return 0;
1694 }
1695 int i3 = MAP_BOT_INDEX(p);
1696 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1697 int32_t hi = n->arenas[i3].tail_hi;
1698 int32_t lo = n->arenas[i3].tail_lo;
1699 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1700 return (tail < lo) || (tail >= hi && hi != 0);
1701 }
1702
1703 /* end of radix tree logic */
1704 /*==========================================================================*/
1705 #endif /* WITH_PYMALLOC_RADIX_TREE */
1706
1707
1708 /* Allocate a new arena. If we run out of memory, return NULL. Else
1709 * allocate a new arena, and return the address of an arena_object
1710 * describing the new arena. It's expected that the caller will set
1711 * `usable_arenas` to the return value.
1712 */
1713 static struct arena_object*
new_arena(OMState * state)1714 new_arena(OMState *state)
1715 {
1716 struct arena_object* arenaobj;
1717 uint excess; /* number of bytes above pool alignment */
1718 void *address;
1719
1720 int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1721 if (debug_stats == -1) {
1722 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1723 debug_stats = (opt != NULL && *opt != '\0');
1724 _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1725 }
1726 if (debug_stats) {
1727 _PyObject_DebugMallocStats(stderr);
1728 }
1729
1730 if (unused_arena_objects == NULL) {
1731 uint i;
1732 uint numarenas;
1733 size_t nbytes;
1734
1735 /* Double the number of arena objects on each allocation.
1736 * Note that it's possible for `numarenas` to overflow.
1737 */
1738 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1739 if (numarenas <= maxarenas)
1740 return NULL; /* overflow */
1741 #if SIZEOF_SIZE_T <= SIZEOF_INT
1742 if (numarenas > SIZE_MAX / sizeof(*allarenas))
1743 return NULL; /* overflow */
1744 #endif
1745 nbytes = numarenas * sizeof(*allarenas);
1746 arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1747 if (arenaobj == NULL)
1748 return NULL;
1749 allarenas = arenaobj;
1750
1751 /* We might need to fix pointers that were copied. However,
1752 * new_arena only gets called when all the pages in the
1753 * previous arenas are full. Thus, there are *no* pointers
1754 * into the old array. Thus, we don't have to worry about
1755 * invalid pointers. Just to be sure, some asserts:
1756 */
1757 assert(usable_arenas == NULL);
1758 assert(unused_arena_objects == NULL);
1759
1760 /* Put the new arenas on the unused_arena_objects list. */
1761 for (i = maxarenas; i < numarenas; ++i) {
1762 allarenas[i].address = 0; /* mark as unassociated */
1763 allarenas[i].nextarena = i < numarenas - 1 ?
1764 &allarenas[i+1] : NULL;
1765 }
1766
1767 /* Update globals. */
1768 unused_arena_objects = &allarenas[maxarenas];
1769 maxarenas = numarenas;
1770 }
1771
1772 /* Take the next available arena object off the head of the list. */
1773 assert(unused_arena_objects != NULL);
1774 arenaobj = unused_arena_objects;
1775 unused_arena_objects = arenaobj->nextarena;
1776 assert(arenaobj->address == 0);
1777 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1778 #if WITH_PYMALLOC_RADIX_TREE
1779 if (address != NULL) {
1780 if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
1781 /* marking arena in radix tree failed, abort */
1782 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1783 address = NULL;
1784 }
1785 }
1786 #endif
1787 if (address == NULL) {
1788 /* The allocation failed: return NULL after putting the
1789 * arenaobj back.
1790 */
1791 arenaobj->nextarena = unused_arena_objects;
1792 unused_arena_objects = arenaobj;
1793 return NULL;
1794 }
1795 arenaobj->address = (uintptr_t)address;
1796
1797 ++narenas_currently_allocated;
1798 ++ntimes_arena_allocated;
1799 if (narenas_currently_allocated > narenas_highwater)
1800 narenas_highwater = narenas_currently_allocated;
1801 arenaobj->freepools = NULL;
1802 /* pool_address <- first pool-aligned address in the arena
1803 nfreepools <- number of whole pools that fit after alignment */
1804 arenaobj->pool_address = (pymem_block*)arenaobj->address;
1805 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1806 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1807 if (excess != 0) {
1808 --arenaobj->nfreepools;
1809 arenaobj->pool_address += POOL_SIZE - excess;
1810 }
1811 arenaobj->ntotalpools = arenaobj->nfreepools;
1812
1813 return arenaobj;
1814 }
1815
1816
1817
1818 #if WITH_PYMALLOC_RADIX_TREE
1819 /* Return true if and only if P is an address that was allocated by
1820 pymalloc. When the radix tree is used, 'poolp' is unused.
1821 */
1822 static bool
address_in_range(OMState * state,void * p,poolp Py_UNUSED (pool))1823 address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
1824 {
1825 return arena_map_is_used(state, p);
1826 }
1827 #else
1828 /*
1829 address_in_range(P, POOL)
1830
1831 Return true if and only if P is an address that was allocated by pymalloc.
1832 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1833 (the caller is asked to compute this because the macro expands POOL more than
1834 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1835 variable and pass the latter to the macro; because address_in_range is
1836 called on every alloc/realloc/free, micro-efficiency is important here).
1837
1838 Tricky: Let B be the arena base address associated with the pool, B =
1839 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1840
1841 B <= P < B + ARENA_SIZE
1842
1843 Subtracting B throughout, this is true iff
1844
1845 0 <= P-B < ARENA_SIZE
1846
1847 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1848
1849 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1850 before the first arena has been allocated. `arenas` is still NULL in that
1851 case. We're relying on that maxarenas is also 0 in that case, so that
1852 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1853 into a NULL arenas.
1854
1855 Details: given P and POOL, the arena_object corresponding to P is AO =
1856 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1857 stores, etc), POOL is the correct address of P's pool, AO.address is the
1858 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1859 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1860 (NULL)). Therefore address_in_range correctly reports that obmalloc
1861 controls P.
1862
1863 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1864 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1865 in this case -- it may even be uninitialized trash. If the trash arenaindex
1866 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1867 control P.
1868
1869 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1870 allocated arena, obmalloc controls all the memory in slice AO.address :
1871 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1872 so P doesn't lie in that slice, so the macro correctly reports that P is not
1873 controlled by obmalloc.
1874
1875 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1876 arena_object (one not currently associated with an allocated arena),
1877 AO.address is 0, and the second test in the macro reduces to:
1878
1879 P < ARENA_SIZE
1880
1881 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1882 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1883 of the test still passes, and the third clause (AO.address != 0) is necessary
1884 to get the correct result: AO.address is 0 in this case, so the macro
1885 correctly reports that P is not controlled by obmalloc (despite that P lies in
1886 slice AO.address : AO.address + ARENA_SIZE).
1887
1888 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1889 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1890 corresponded to a currently-allocated arena, so the "P is not controlled by
1891 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1892 was impossible.
1893
1894 Note that the logic is excruciating, and reading up possibly uninitialized
1895 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1896 creates problems for some memory debuggers. The overwhelming advantage is
1897 that this test determines whether an arbitrary address is controlled by
1898 obmalloc in a small constant time, independent of the number of arenas
1899 obmalloc controls. Since this test is needed at every entry point, it's
1900 extremely desirable that it be this fast.
1901 */
1902
1903 static bool _Py_NO_SANITIZE_ADDRESS
1904 _Py_NO_SANITIZE_THREAD
1905 _Py_NO_SANITIZE_MEMORY
address_in_range(OMState * state,void * p,poolp pool)1906 address_in_range(OMState *state, void *p, poolp pool)
1907 {
1908 // Since address_in_range may be reading from memory which was not allocated
1909 // by Python, it is important that pool->arenaindex is read only once, as
1910 // another thread may be concurrently modifying the value without holding
1911 // the GIL. The following dance forces the compiler to read pool->arenaindex
1912 // only once.
1913 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1914 return arenaindex < maxarenas &&
1915 (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
1916 allarenas[arenaindex].address != 0;
1917 }
1918
1919 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1920
1921 /*==========================================================================*/
1922
1923 // Called when freelist is exhausted. Extend the freelist if there is
1924 // space for a block. Otherwise, remove this pool from usedpools.
1925 static void
pymalloc_pool_extend(poolp pool,uint size)1926 pymalloc_pool_extend(poolp pool, uint size)
1927 {
1928 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1929 /* There is room for another block. */
1930 pool->freeblock = (pymem_block*)pool + pool->nextoffset;
1931 pool->nextoffset += INDEX2SIZE(size);
1932 *(pymem_block **)(pool->freeblock) = NULL;
1933 return;
1934 }
1935
1936 /* Pool is full, unlink from used pools. */
1937 poolp next;
1938 next = pool->nextpool;
1939 pool = pool->prevpool;
1940 next->prevpool = pool;
1941 pool->nextpool = next;
1942 }
1943
1944 /* called when pymalloc_alloc can not allocate a block from usedpool.
1945 * This function takes new pool and allocate a block from it.
1946 */
1947 static void*
allocate_from_new_pool(OMState * state,uint size)1948 allocate_from_new_pool(OMState *state, uint size)
1949 {
1950 /* There isn't a pool of the right size class immediately
1951 * available: use a free pool.
1952 */
1953 if (UNLIKELY(usable_arenas == NULL)) {
1954 /* No arena has a free pool: allocate a new arena. */
1955 #ifdef WITH_MEMORY_LIMITS
1956 if (narenas_currently_allocated >= MAX_ARENAS) {
1957 return NULL;
1958 }
1959 #endif
1960 usable_arenas = new_arena(state);
1961 if (usable_arenas == NULL) {
1962 return NULL;
1963 }
1964 usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1965 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1966 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1967 }
1968 assert(usable_arenas->address != 0);
1969
1970 /* This arena already had the smallest nfreepools value, so decreasing
1971 * nfreepools doesn't change that, and we don't need to rearrange the
1972 * usable_arenas list. However, if the arena becomes wholly allocated,
1973 * we need to remove its arena_object from usable_arenas.
1974 */
1975 assert(usable_arenas->nfreepools > 0);
1976 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1977 /* It's the last of this size, so there won't be any. */
1978 nfp2lasta[usable_arenas->nfreepools] = NULL;
1979 }
1980 /* If any free pools will remain, it will be the new smallest. */
1981 if (usable_arenas->nfreepools > 1) {
1982 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1983 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1984 }
1985
1986 /* Try to get a cached free pool. */
1987 poolp pool = usable_arenas->freepools;
1988 if (LIKELY(pool != NULL)) {
1989 /* Unlink from cached pools. */
1990 usable_arenas->freepools = pool->nextpool;
1991 usable_arenas->nfreepools--;
1992 if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1993 /* Wholly allocated: remove. */
1994 assert(usable_arenas->freepools == NULL);
1995 assert(usable_arenas->nextarena == NULL ||
1996 usable_arenas->nextarena->prevarena ==
1997 usable_arenas);
1998 usable_arenas = usable_arenas->nextarena;
1999 if (usable_arenas != NULL) {
2000 usable_arenas->prevarena = NULL;
2001 assert(usable_arenas->address != 0);
2002 }
2003 }
2004 else {
2005 /* nfreepools > 0: it must be that freepools
2006 * isn't NULL, or that we haven't yet carved
2007 * off all the arena's pools for the first
2008 * time.
2009 */
2010 assert(usable_arenas->freepools != NULL ||
2011 usable_arenas->pool_address <=
2012 (pymem_block*)usable_arenas->address +
2013 ARENA_SIZE - POOL_SIZE);
2014 }
2015 }
2016 else {
2017 /* Carve off a new pool. */
2018 assert(usable_arenas->nfreepools > 0);
2019 assert(usable_arenas->freepools == NULL);
2020 pool = (poolp)usable_arenas->pool_address;
2021 assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2022 ARENA_SIZE - POOL_SIZE);
2023 pool->arenaindex = (uint)(usable_arenas - allarenas);
2024 assert(&allarenas[pool->arenaindex] == usable_arenas);
2025 pool->szidx = DUMMY_SIZE_IDX;
2026 usable_arenas->pool_address += POOL_SIZE;
2027 --usable_arenas->nfreepools;
2028
2029 if (usable_arenas->nfreepools == 0) {
2030 assert(usable_arenas->nextarena == NULL ||
2031 usable_arenas->nextarena->prevarena ==
2032 usable_arenas);
2033 /* Unlink the arena: it is completely allocated. */
2034 usable_arenas = usable_arenas->nextarena;
2035 if (usable_arenas != NULL) {
2036 usable_arenas->prevarena = NULL;
2037 assert(usable_arenas->address != 0);
2038 }
2039 }
2040 }
2041
2042 /* Frontlink to used pools. */
2043 pymem_block *bp;
2044 poolp next = usedpools[size + size]; /* == prev */
2045 pool->nextpool = next;
2046 pool->prevpool = next;
2047 next->nextpool = pool;
2048 next->prevpool = pool;
2049 pool->ref.count = 1;
2050 if (pool->szidx == size) {
2051 /* Luckily, this pool last contained blocks
2052 * of the same size class, so its header
2053 * and free list are already initialized.
2054 */
2055 bp = pool->freeblock;
2056 assert(bp != NULL);
2057 pool->freeblock = *(pymem_block **)bp;
2058 return bp;
2059 }
2060 /*
2061 * Initialize the pool header, set up the free list to
2062 * contain just the second block, and return the first
2063 * block.
2064 */
2065 pool->szidx = size;
2066 size = INDEX2SIZE(size);
2067 bp = (pymem_block *)pool + POOL_OVERHEAD;
2068 pool->nextoffset = POOL_OVERHEAD + (size << 1);
2069 pool->maxnextoffset = POOL_SIZE - size;
2070 pool->freeblock = bp + size;
2071 *(pymem_block **)(pool->freeblock) = NULL;
2072 return bp;
2073 }
2074
2075 /* pymalloc allocator
2076
2077 Return a pointer to newly allocated memory if pymalloc allocated memory.
2078
2079 Return NULL if pymalloc failed to allocate the memory block: on bigger
2080 requests, on error in the code below (as a last chance to serve the request)
2081 or when the max memory limit has been reached.
2082 */
2083 static inline void*
pymalloc_alloc(OMState * state,void * Py_UNUSED (ctx),size_t nbytes)2084 pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2085 {
2086 #ifdef WITH_VALGRIND
2087 if (UNLIKELY(running_on_valgrind == -1)) {
2088 running_on_valgrind = RUNNING_ON_VALGRIND;
2089 }
2090 if (UNLIKELY(running_on_valgrind)) {
2091 return NULL;
2092 }
2093 #endif
2094
2095 if (UNLIKELY(nbytes == 0)) {
2096 return NULL;
2097 }
2098 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2099 return NULL;
2100 }
2101
2102 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2103 poolp pool = usedpools[size + size];
2104 pymem_block *bp;
2105
2106 if (LIKELY(pool != pool->nextpool)) {
2107 /*
2108 * There is a used pool for this size class.
2109 * Pick up the head block of its free list.
2110 */
2111 ++pool->ref.count;
2112 bp = pool->freeblock;
2113 assert(bp != NULL);
2114
2115 if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2116 // Reached the end of the free list, try to extend it.
2117 pymalloc_pool_extend(pool, size);
2118 }
2119 }
2120 else {
2121 /* There isn't a pool of the right size class immediately
2122 * available: use a free pool.
2123 */
2124 bp = allocate_from_new_pool(state, size);
2125 }
2126
2127 return (void *)bp;
2128 }
2129
2130
2131 void *
_PyObject_Malloc(void * ctx,size_t nbytes)2132 _PyObject_Malloc(void *ctx, size_t nbytes)
2133 {
2134 OMState *state = get_state();
2135 void* ptr = pymalloc_alloc(state, ctx, nbytes);
2136 if (LIKELY(ptr != NULL)) {
2137 return ptr;
2138 }
2139
2140 ptr = PyMem_RawMalloc(nbytes);
2141 if (ptr != NULL) {
2142 raw_allocated_blocks++;
2143 }
2144 return ptr;
2145 }
2146
2147
2148 void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)2149 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2150 {
2151 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2152 size_t nbytes = nelem * elsize;
2153
2154 OMState *state = get_state();
2155 void* ptr = pymalloc_alloc(state, ctx, nbytes);
2156 if (LIKELY(ptr != NULL)) {
2157 memset(ptr, 0, nbytes);
2158 return ptr;
2159 }
2160
2161 ptr = PyMem_RawCalloc(nelem, elsize);
2162 if (ptr != NULL) {
2163 raw_allocated_blocks++;
2164 }
2165 return ptr;
2166 }
2167
2168
2169 static void
insert_to_usedpool(OMState * state,poolp pool)2170 insert_to_usedpool(OMState *state, poolp pool)
2171 {
2172 assert(pool->ref.count > 0); /* else the pool is empty */
2173
2174 uint size = pool->szidx;
2175 poolp next = usedpools[size + size];
2176 poolp prev = next->prevpool;
2177
2178 /* insert pool before next: prev <-> pool <-> next */
2179 pool->nextpool = next;
2180 pool->prevpool = prev;
2181 next->prevpool = pool;
2182 prev->nextpool = pool;
2183 }
2184
2185 static void
insert_to_freepool(OMState * state,poolp pool)2186 insert_to_freepool(OMState *state, poolp pool)
2187 {
2188 poolp next = pool->nextpool;
2189 poolp prev = pool->prevpool;
2190 next->prevpool = prev;
2191 prev->nextpool = next;
2192
2193 /* Link the pool to freepools. This is a singly-linked
2194 * list, and pool->prevpool isn't used there.
2195 */
2196 struct arena_object *ao = &allarenas[pool->arenaindex];
2197 pool->nextpool = ao->freepools;
2198 ao->freepools = pool;
2199 uint nf = ao->nfreepools;
2200 /* If this is the rightmost arena with this number of free pools,
2201 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
2202 * are no arenas in usable_arenas with that value.
2203 */
2204 struct arena_object* lastnf = nfp2lasta[nf];
2205 assert((nf == 0 && lastnf == NULL) ||
2206 (nf > 0 &&
2207 lastnf != NULL &&
2208 lastnf->nfreepools == nf &&
2209 (lastnf->nextarena == NULL ||
2210 nf < lastnf->nextarena->nfreepools)));
2211 if (lastnf == ao) { /* it is the rightmost */
2212 struct arena_object* p = ao->prevarena;
2213 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2214 }
2215 ao->nfreepools = ++nf;
2216
2217 /* All the rest is arena management. We just freed
2218 * a pool, and there are 4 cases for arena mgmt:
2219 * 1. If all the pools are free, return the arena to
2220 * the system free(). Except if this is the last
2221 * arena in the list, keep it to avoid thrashing:
2222 * keeping one wholly free arena in the list avoids
2223 * pathological cases where a simple loop would
2224 * otherwise provoke needing to allocate and free an
2225 * arena on every iteration. See bpo-37257.
2226 * 2. If this is the only free pool in the arena,
2227 * add the arena back to the `usable_arenas` list.
2228 * 3. If the "next" arena has a smaller count of free
2229 * pools, we have to "slide this arena right" to
2230 * restore that usable_arenas is sorted in order of
2231 * nfreepools.
2232 * 4. Else there's nothing more to do.
2233 */
2234 if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2235 /* Case 1. First unlink ao from usable_arenas.
2236 */
2237 assert(ao->prevarena == NULL ||
2238 ao->prevarena->address != 0);
2239 assert(ao ->nextarena == NULL ||
2240 ao->nextarena->address != 0);
2241
2242 /* Fix the pointer in the prevarena, or the
2243 * usable_arenas pointer.
2244 */
2245 if (ao->prevarena == NULL) {
2246 usable_arenas = ao->nextarena;
2247 assert(usable_arenas == NULL ||
2248 usable_arenas->address != 0);
2249 }
2250 else {
2251 assert(ao->prevarena->nextarena == ao);
2252 ao->prevarena->nextarena =
2253 ao->nextarena;
2254 }
2255 /* Fix the pointer in the nextarena. */
2256 if (ao->nextarena != NULL) {
2257 assert(ao->nextarena->prevarena == ao);
2258 ao->nextarena->prevarena =
2259 ao->prevarena;
2260 }
2261 /* Record that this arena_object slot is
2262 * available to be reused.
2263 */
2264 ao->nextarena = unused_arena_objects;
2265 unused_arena_objects = ao;
2266
2267 #if WITH_PYMALLOC_RADIX_TREE
2268 /* mark arena region as not under control of obmalloc */
2269 arena_map_mark_used(state, ao->address, 0);
2270 #endif
2271
2272 /* Free the entire arena. */
2273 _PyObject_Arena.free(_PyObject_Arena.ctx,
2274 (void *)ao->address, ARENA_SIZE);
2275 ao->address = 0; /* mark unassociated */
2276 --narenas_currently_allocated;
2277
2278 return;
2279 }
2280
2281 if (nf == 1) {
2282 /* Case 2. Put ao at the head of
2283 * usable_arenas. Note that because
2284 * ao->nfreepools was 0 before, ao isn't
2285 * currently on the usable_arenas list.
2286 */
2287 ao->nextarena = usable_arenas;
2288 ao->prevarena = NULL;
2289 if (usable_arenas)
2290 usable_arenas->prevarena = ao;
2291 usable_arenas = ao;
2292 assert(usable_arenas->address != 0);
2293 if (nfp2lasta[1] == NULL) {
2294 nfp2lasta[1] = ao;
2295 }
2296
2297 return;
2298 }
2299
2300 /* If this arena is now out of order, we need to keep
2301 * the list sorted. The list is kept sorted so that
2302 * the "most full" arenas are used first, which allows
2303 * the nearly empty arenas to be completely freed. In
2304 * a few un-scientific tests, it seems like this
2305 * approach allowed a lot more memory to be freed.
2306 */
2307 /* If this is the only arena with nf, record that. */
2308 if (nfp2lasta[nf] == NULL) {
2309 nfp2lasta[nf] = ao;
2310 } /* else the rightmost with nf doesn't change */
2311 /* If this was the rightmost of the old size, it remains in place. */
2312 if (ao == lastnf) {
2313 /* Case 4. Nothing to do. */
2314 return;
2315 }
2316 /* If ao were the only arena in the list, the last block would have
2317 * gotten us out.
2318 */
2319 assert(ao->nextarena != NULL);
2320
2321 /* Case 3: We have to move the arena towards the end of the list,
2322 * because it has more free pools than the arena to its right. It needs
2323 * to move to follow lastnf.
2324 * First unlink ao from usable_arenas.
2325 */
2326 if (ao->prevarena != NULL) {
2327 /* ao isn't at the head of the list */
2328 assert(ao->prevarena->nextarena == ao);
2329 ao->prevarena->nextarena = ao->nextarena;
2330 }
2331 else {
2332 /* ao is at the head of the list */
2333 assert(usable_arenas == ao);
2334 usable_arenas = ao->nextarena;
2335 }
2336 ao->nextarena->prevarena = ao->prevarena;
2337 /* And insert after lastnf. */
2338 ao->prevarena = lastnf;
2339 ao->nextarena = lastnf->nextarena;
2340 if (ao->nextarena != NULL) {
2341 ao->nextarena->prevarena = ao;
2342 }
2343 lastnf->nextarena = ao;
2344 /* Verify that the swaps worked. */
2345 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2346 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2347 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2348 assert((usable_arenas == ao && ao->prevarena == NULL)
2349 || ao->prevarena->nextarena == ao);
2350 }
2351
2352 /* Free a memory block allocated by pymalloc_alloc().
2353 Return 1 if it was freed.
2354 Return 0 if the block was not allocated by pymalloc_alloc(). */
2355 static inline int
pymalloc_free(OMState * state,void * Py_UNUSED (ctx),void * p)2356 pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2357 {
2358 assert(p != NULL);
2359
2360 #ifdef WITH_VALGRIND
2361 if (UNLIKELY(running_on_valgrind > 0)) {
2362 return 0;
2363 }
2364 #endif
2365
2366 poolp pool = POOL_ADDR(p);
2367 if (UNLIKELY(!address_in_range(state, p, pool))) {
2368 return 0;
2369 }
2370 /* We allocated this address. */
2371
2372 /* Link p to the start of the pool's freeblock list. Since
2373 * the pool had at least the p block outstanding, the pool
2374 * wasn't empty (so it's already in a usedpools[] list, or
2375 * was full and is in no list -- it's not in the freeblocks
2376 * list in any case).
2377 */
2378 assert(pool->ref.count > 0); /* else it was empty */
2379 pymem_block *lastfree = pool->freeblock;
2380 *(pymem_block **)p = lastfree;
2381 pool->freeblock = (pymem_block *)p;
2382 pool->ref.count--;
2383
2384 if (UNLIKELY(lastfree == NULL)) {
2385 /* Pool was full, so doesn't currently live in any list:
2386 * link it to the front of the appropriate usedpools[] list.
2387 * This mimics LRU pool usage for new allocations and
2388 * targets optimal filling when several pools contain
2389 * blocks of the same size class.
2390 */
2391 insert_to_usedpool(state, pool);
2392 return 1;
2393 }
2394
2395 /* freeblock wasn't NULL, so the pool wasn't full,
2396 * and the pool is in a usedpools[] list.
2397 */
2398 if (LIKELY(pool->ref.count != 0)) {
2399 /* pool isn't empty: leave it in usedpools */
2400 return 1;
2401 }
2402
2403 /* Pool is now empty: unlink from usedpools, and
2404 * link to the front of freepools. This ensures that
2405 * previously freed pools will be allocated later
2406 * (being not referenced, they are perhaps paged out).
2407 */
2408 insert_to_freepool(state, pool);
2409 return 1;
2410 }
2411
2412
2413 void
_PyObject_Free(void * ctx,void * p)2414 _PyObject_Free(void *ctx, void *p)
2415 {
2416 /* PyObject_Free(NULL) has no effect */
2417 if (p == NULL) {
2418 return;
2419 }
2420
2421 OMState *state = get_state();
2422 if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2423 /* pymalloc didn't allocate this address */
2424 PyMem_RawFree(p);
2425 raw_allocated_blocks--;
2426 }
2427 }
2428
2429
2430 /* pymalloc realloc.
2431
2432 If nbytes==0, then as the Python docs promise, we do not treat this like
2433 free(p), and return a non-NULL result.
2434
2435 Return 1 if pymalloc reallocated memory and wrote the new pointer into
2436 newptr_p.
2437
2438 Return 0 if pymalloc didn't allocated p. */
2439 static int
pymalloc_realloc(OMState * state,void * ctx,void ** newptr_p,void * p,size_t nbytes)2440 pymalloc_realloc(OMState *state, void *ctx,
2441 void **newptr_p, void *p, size_t nbytes)
2442 {
2443 void *bp;
2444 poolp pool;
2445 size_t size;
2446
2447 assert(p != NULL);
2448
2449 #ifdef WITH_VALGRIND
2450 /* Treat running_on_valgrind == -1 the same as 0 */
2451 if (UNLIKELY(running_on_valgrind > 0)) {
2452 return 0;
2453 }
2454 #endif
2455
2456 pool = POOL_ADDR(p);
2457 if (!address_in_range(state, p, pool)) {
2458 /* pymalloc is not managing this block.
2459
2460 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2461 over this block. However, if we do, we need to copy the valid data
2462 from the C-managed block to one of our blocks, and there's no
2463 portable way to know how much of the memory space starting at p is
2464 valid.
2465
2466 As bug 1185883 pointed out the hard way, it's possible that the
2467 C-managed block is "at the end" of allocated VM space, so that a
2468 memory fault can occur if we try to copy nbytes bytes starting at p.
2469 Instead we punt: let C continue to manage this block. */
2470 return 0;
2471 }
2472
2473 /* pymalloc is in charge of this block */
2474 size = INDEX2SIZE(pool->szidx);
2475 if (nbytes <= size) {
2476 /* The block is staying the same or shrinking.
2477
2478 If it's shrinking, there's a tradeoff: it costs cycles to copy the
2479 block to a smaller size class, but it wastes memory not to copy it.
2480
2481 The compromise here is to copy on shrink only if at least 25% of
2482 size can be shaved off. */
2483 if (4 * nbytes > 3 * size) {
2484 /* It's the same, or shrinking and new/old > 3/4. */
2485 *newptr_p = p;
2486 return 1;
2487 }
2488 size = nbytes;
2489 }
2490
2491 bp = _PyObject_Malloc(ctx, nbytes);
2492 if (bp != NULL) {
2493 memcpy(bp, p, size);
2494 _PyObject_Free(ctx, p);
2495 }
2496 *newptr_p = bp;
2497 return 1;
2498 }
2499
2500
2501 void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2502 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2503 {
2504 void *ptr2;
2505
2506 if (ptr == NULL) {
2507 return _PyObject_Malloc(ctx, nbytes);
2508 }
2509
2510 OMState *state = get_state();
2511 if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2512 return ptr2;
2513 }
2514
2515 return PyMem_RawRealloc(ptr, nbytes);
2516 }
2517
2518 #else /* ! WITH_PYMALLOC */
2519
2520 /*==========================================================================*/
2521 /* pymalloc not enabled: Redirect the entry points to malloc. These will
2522 * only be used by extensions that are compiled with pymalloc enabled. */
2523
2524 Py_ssize_t
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState * Py_UNUSED (interp))2525 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2526 {
2527 return 0;
2528 }
2529
2530 Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)2531 _Py_GetGlobalAllocatedBlocks(void)
2532 {
2533 return 0;
2534 }
2535
2536 void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState * Py_UNUSED (interp))2537 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2538 {
2539 return;
2540 }
2541
2542 void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState * Py_UNUSED (runtime))2543 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2544 {
2545 return;
2546 }
2547
2548 #endif /* WITH_PYMALLOC */
2549
2550
2551 /*==========================================================================*/
2552 /* A x-platform debugging allocator. This doesn't manage memory directly,
2553 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2554 */
2555
2556 /* Uncomment this define to add the "serialno" field */
2557 /* #define PYMEM_DEBUG_SERIALNO */
2558
2559 #ifdef PYMEM_DEBUG_SERIALNO
2560 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
2561
2562 /* serialno is always incremented via calling this routine. The point is
2563 * to supply a single place to set a breakpoint.
2564 */
2565 static void
bumpserialno(void)2566 bumpserialno(void)
2567 {
2568 ++serialno;
2569 }
2570 #endif
2571
2572 #define SST SIZEOF_SIZE_T
2573
2574 #ifdef PYMEM_DEBUG_SERIALNO
2575 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2576 #else
2577 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2578 #endif
2579
2580 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2581 static size_t
read_size_t(const void * p)2582 read_size_t(const void *p)
2583 {
2584 const uint8_t *q = (const uint8_t *)p;
2585 size_t result = *q++;
2586 int i;
2587
2588 for (i = SST; --i > 0; ++q)
2589 result = (result << 8) | *q;
2590 return result;
2591 }
2592
2593 /* Write n as a big-endian size_t, MSB at address p, LSB at
2594 * p + sizeof(size_t) - 1.
2595 */
2596 static void
write_size_t(void * p,size_t n)2597 write_size_t(void *p, size_t n)
2598 {
2599 uint8_t *q = (uint8_t *)p + SST - 1;
2600 int i;
2601
2602 for (i = SST; --i >= 0; --q) {
2603 *q = (uint8_t)(n & 0xff);
2604 n >>= 8;
2605 }
2606 }
2607
2608 static void
fill_mem_debug(debug_alloc_api_t * api,void * data,int c,size_t nbytes,bool is_alloc)2609 fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2610 bool is_alloc)
2611 {
2612 #ifdef Py_GIL_DISABLED
2613 if (api->api_id == 'o') {
2614 // Don't overwrite the first few bytes of a PyObject allocation in the
2615 // free-threaded build
2616 _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2617 size_t debug_offset;
2618 if (is_alloc) {
2619 debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2620 }
2621 else {
2622 char *alloc = (char *)data - 2*SST; // start of the allocation
2623 debug_offset = _mi_ptr_page(alloc)->debug_offset;
2624 }
2625 debug_offset -= 2*SST; // account for pymalloc extra bytes
2626 if (debug_offset < nbytes) {
2627 memset((char *)data + debug_offset, c, nbytes - debug_offset);
2628 }
2629 return;
2630 }
2631 #endif
2632 memset(data, c, nbytes);
2633 }
2634
2635 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2636 fills them with useful stuff, here calling the underlying malloc's result p:
2637
2638 p[0: S]
2639 Number of bytes originally asked for. This is a size_t, big-endian (easier
2640 to read in a memory dump).
2641 p[S]
2642 API ID. See PEP 445. This is a character, but seems undocumented.
2643 p[S+1: 2*S]
2644 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2645 p[2*S: 2*S+n]
2646 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2647 Used to catch reference to uninitialized memory.
2648 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2649 handled the request itself.
2650 p[2*S+n: 2*S+n+S]
2651 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2652 p[2*S+n+S: 2*S+n+2*S]
2653 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2654 and _PyMem_DebugRealloc.
2655 This is a big-endian size_t.
2656 If "bad memory" is detected later, the serial number gives an
2657 excellent way to set a breakpoint on the next run, to capture the
2658 instant at which this block was passed out.
2659
2660 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2661 for 3 * S extra bytes, and omits the last serialno field.
2662 */
2663
2664 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2665 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2666 {
2667 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2668 uint8_t *p; /* base address of malloc'ed pad block */
2669 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2670 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2671 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2672
2673 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2674 /* integer overflow: can't represent total as a Py_ssize_t */
2675 return NULL;
2676 }
2677 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2678
2679 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2680 ^--- p ^--- data ^--- tail
2681 S: nbytes stored as size_t
2682 I: API identifier (1 byte)
2683 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2684 C: Clean bytes used later to store actual data
2685 N: Serial number stored as size_t
2686
2687 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2688 is omitted. */
2689
2690 if (use_calloc) {
2691 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2692 }
2693 else {
2694 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2695 }
2696 if (p == NULL) {
2697 return NULL;
2698 }
2699 data = p + 2*SST;
2700
2701 #ifdef PYMEM_DEBUG_SERIALNO
2702 bumpserialno();
2703 #endif
2704
2705 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2706 write_size_t(p, nbytes);
2707 p[SST] = (uint8_t)api->api_id;
2708 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2709
2710 if (nbytes > 0 && !use_calloc) {
2711 fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
2712 }
2713
2714 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2715 tail = data + nbytes;
2716 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2717 #ifdef PYMEM_DEBUG_SERIALNO
2718 write_size_t(tail + SST, serialno);
2719 #endif
2720
2721 return data;
2722 }
2723
2724 void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2725 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2726 {
2727 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2728 }
2729
2730 void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2731 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2732 {
2733 size_t nbytes;
2734 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2735 nbytes = nelem * elsize;
2736 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2737 }
2738
2739
2740 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2741 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2742 Then fills the original bytes with PYMEM_DEADBYTE.
2743 Then calls the underlying free.
2744 */
2745 void
_PyMem_DebugRawFree(void * ctx,void * p)2746 _PyMem_DebugRawFree(void *ctx, void *p)
2747 {
2748 /* PyMem_Free(NULL) has no effect */
2749 if (p == NULL) {
2750 return;
2751 }
2752
2753 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2754 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2755 size_t nbytes;
2756
2757 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2758 nbytes = read_size_t(q);
2759 nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
2760 memset(q, PYMEM_DEADBYTE, 2*SST);
2761 fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
2762 api->alloc.free(api->alloc.ctx, q);
2763 }
2764
2765
2766 void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2767 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2768 {
2769 if (p == NULL) {
2770 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2771 }
2772
2773 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2774 uint8_t *head; /* base address of malloc'ed pad block */
2775 uint8_t *data; /* pointer to data bytes */
2776 uint8_t *r;
2777 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2778 size_t total; /* 2 * SST + nbytes + 2 * SST */
2779 size_t original_nbytes;
2780 #define ERASED_SIZE 64
2781
2782 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2783
2784 data = (uint8_t *)p;
2785 head = data - 2*SST;
2786 original_nbytes = read_size_t(head);
2787 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2788 /* integer overflow: can't represent total as a Py_ssize_t */
2789 return NULL;
2790 }
2791 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2792
2793 tail = data + original_nbytes;
2794 #ifdef PYMEM_DEBUG_SERIALNO
2795 size_t block_serialno = read_size_t(tail + SST);
2796 #endif
2797 #ifndef Py_GIL_DISABLED
2798 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2799 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2800 */
2801 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2802 if (original_nbytes <= sizeof(save)) {
2803 memcpy(save, data, original_nbytes);
2804 memset(data - 2 * SST, PYMEM_DEADBYTE,
2805 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2806 }
2807 else {
2808 memcpy(save, data, ERASED_SIZE);
2809 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2810 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2811 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2812 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2813 }
2814 #endif
2815
2816 /* Resize and add decorations. */
2817 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2818 if (r == NULL) {
2819 /* if realloc() failed: rewrite header and footer which have
2820 just been erased */
2821 nbytes = original_nbytes;
2822 }
2823 else {
2824 head = r;
2825 #ifdef PYMEM_DEBUG_SERIALNO
2826 bumpserialno();
2827 block_serialno = serialno;
2828 #endif
2829 }
2830 data = head + 2*SST;
2831
2832 write_size_t(head, nbytes);
2833 head[SST] = (uint8_t)api->api_id;
2834 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2835
2836 tail = data + nbytes;
2837 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2838 #ifdef PYMEM_DEBUG_SERIALNO
2839 write_size_t(tail + SST, block_serialno);
2840 #endif
2841
2842 #ifndef Py_GIL_DISABLED
2843 /* Restore saved bytes. */
2844 if (original_nbytes <= sizeof(save)) {
2845 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2846 }
2847 else {
2848 size_t i = original_nbytes - ERASED_SIZE;
2849 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2850 if (nbytes > i) {
2851 memcpy(data + i, &save[ERASED_SIZE],
2852 Py_MIN(nbytes - i, ERASED_SIZE));
2853 }
2854 }
2855 #endif
2856
2857 if (r == NULL) {
2858 return NULL;
2859 }
2860
2861 if (nbytes > original_nbytes) {
2862 /* growing: mark new extra memory clean */
2863 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2864 nbytes - original_nbytes);
2865 }
2866
2867 return data;
2868 }
2869
2870 static inline void
_PyMem_DebugCheckGIL(const char * func)2871 _PyMem_DebugCheckGIL(const char *func)
2872 {
2873 if (!PyGILState_Check()) {
2874 _Py_FatalErrorFunc(func,
2875 "Python memory allocator called "
2876 "without holding the GIL");
2877 }
2878 }
2879
2880 void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2881 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2882 {
2883 _PyMem_DebugCheckGIL(__func__);
2884 return _PyMem_DebugRawMalloc(ctx, nbytes);
2885 }
2886
2887 void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2888 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2889 {
2890 _PyMem_DebugCheckGIL(__func__);
2891 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2892 }
2893
2894
2895 void
_PyMem_DebugFree(void * ctx,void * ptr)2896 _PyMem_DebugFree(void *ctx, void *ptr)
2897 {
2898 _PyMem_DebugCheckGIL(__func__);
2899 _PyMem_DebugRawFree(ctx, ptr);
2900 }
2901
2902
2903 void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2904 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2905 {
2906 _PyMem_DebugCheckGIL(__func__);
2907 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2908 }
2909
2910 /* Check the forbidden bytes on both ends of the memory allocated for p.
2911 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2912 * and call Py_FatalError to kill the program.
2913 * The API id, is also checked.
2914 */
2915 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2916 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2917 {
2918 assert(p != NULL);
2919
2920 const uint8_t *q = (const uint8_t *)p;
2921 size_t nbytes;
2922 const uint8_t *tail;
2923 int i;
2924 char id;
2925
2926 /* Check the API id */
2927 id = (char)q[-SST];
2928 if (id != api) {
2929 _PyObject_DebugDumpAddress(p);
2930 _Py_FatalErrorFormat(func,
2931 "bad ID: Allocated using API '%c', "
2932 "verified using API '%c'",
2933 id, api);
2934 }
2935
2936 /* Check the stuff at the start of p first: if there's underwrite
2937 * corruption, the number-of-bytes field may be nuts, and checking
2938 * the tail could lead to a segfault then.
2939 */
2940 for (i = SST-1; i >= 1; --i) {
2941 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2942 _PyObject_DebugDumpAddress(p);
2943 _Py_FatalErrorFunc(func, "bad leading pad byte");
2944 }
2945 }
2946
2947 nbytes = read_size_t(q - 2*SST);
2948 tail = q + nbytes;
2949 for (i = 0; i < SST; ++i) {
2950 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2951 _PyObject_DebugDumpAddress(p);
2952 _Py_FatalErrorFunc(func, "bad trailing pad byte");
2953 }
2954 }
2955 }
2956
2957 /* Display info to stderr about the memory block at p. */
2958 static void
_PyObject_DebugDumpAddress(const void * p)2959 _PyObject_DebugDumpAddress(const void *p)
2960 {
2961 const uint8_t *q = (const uint8_t *)p;
2962 const uint8_t *tail;
2963 size_t nbytes;
2964 int i;
2965 int ok;
2966 char id;
2967
2968 fprintf(stderr, "Debug memory block at address p=%p:", p);
2969 if (p == NULL) {
2970 fprintf(stderr, "\n");
2971 return;
2972 }
2973 id = (char)q[-SST];
2974 fprintf(stderr, " API '%c'\n", id);
2975
2976 nbytes = read_size_t(q - 2*SST);
2977 fprintf(stderr, " %zu bytes originally requested\n", nbytes);
2978
2979 /* In case this is nuts, check the leading pad bytes first. */
2980 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2981 ok = 1;
2982 for (i = 1; i <= SST-1; ++i) {
2983 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2984 ok = 0;
2985 break;
2986 }
2987 }
2988 if (ok)
2989 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2990 else {
2991 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2992 PYMEM_FORBIDDENBYTE);
2993 for (i = SST-1; i >= 1; --i) {
2994 const uint8_t byte = *(q-i);
2995 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2996 if (byte != PYMEM_FORBIDDENBYTE)
2997 fputs(" *** OUCH", stderr);
2998 fputc('\n', stderr);
2999 }
3000
3001 fputs(" Because memory is corrupted at the start, the "
3002 "count of bytes requested\n"
3003 " may be bogus, and checking the trailing pad "
3004 "bytes may segfault.\n", stderr);
3005 }
3006
3007 tail = q + nbytes;
3008 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
3009 ok = 1;
3010 for (i = 0; i < SST; ++i) {
3011 if (tail[i] != PYMEM_FORBIDDENBYTE) {
3012 ok = 0;
3013 break;
3014 }
3015 }
3016 if (ok)
3017 fputs("FORBIDDENBYTE, as expected.\n", stderr);
3018 else {
3019 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3020 PYMEM_FORBIDDENBYTE);
3021 for (i = 0; i < SST; ++i) {
3022 const uint8_t byte = tail[i];
3023 fprintf(stderr, " at tail+%d: 0x%02x",
3024 i, byte);
3025 if (byte != PYMEM_FORBIDDENBYTE)
3026 fputs(" *** OUCH", stderr);
3027 fputc('\n', stderr);
3028 }
3029 }
3030
3031 #ifdef PYMEM_DEBUG_SERIALNO
3032 size_t serial = read_size_t(tail + SST);
3033 fprintf(stderr,
3034 " The block was made by call #%zu to debug malloc/realloc.\n",
3035 serial);
3036 #endif
3037
3038 if (nbytes > 0) {
3039 i = 0;
3040 fputs(" Data at p:", stderr);
3041 /* print up to 8 bytes at the start */
3042 while (q < tail && i < 8) {
3043 fprintf(stderr, " %02x", *q);
3044 ++i;
3045 ++q;
3046 }
3047 /* and up to 8 at the end */
3048 if (q < tail) {
3049 if (tail - q > 8) {
3050 fputs(" ...", stderr);
3051 q = tail - 8;
3052 }
3053 while (q < tail) {
3054 fprintf(stderr, " %02x", *q);
3055 ++q;
3056 }
3057 }
3058 fputc('\n', stderr);
3059 }
3060 fputc('\n', stderr);
3061
3062 fflush(stderr);
3063 _PyMem_DumpTraceback(fileno(stderr), p);
3064 }
3065
3066
3067 static size_t
printone(FILE * out,const char * msg,size_t value)3068 printone(FILE *out, const char* msg, size_t value)
3069 {
3070 int i, k;
3071 char buf[100];
3072 size_t origvalue = value;
3073
3074 fputs(msg, out);
3075 for (i = (int)strlen(msg); i < 35; ++i)
3076 fputc(' ', out);
3077 fputc('=', out);
3078
3079 /* Write the value with commas. */
3080 i = 22;
3081 buf[i--] = '\0';
3082 buf[i--] = '\n';
3083 k = 3;
3084 do {
3085 size_t nextvalue = value / 10;
3086 unsigned int digit = (unsigned int)(value - nextvalue * 10);
3087 value = nextvalue;
3088 buf[i--] = (char)(digit + '0');
3089 --k;
3090 if (k == 0 && value && i >= 0) {
3091 k = 3;
3092 buf[i--] = ',';
3093 }
3094 } while (value && i >= 0);
3095
3096 while (i >= 0)
3097 buf[i--] = ' ';
3098 fputs(buf, out);
3099
3100 return origvalue;
3101 }
3102
3103 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)3104 _PyDebugAllocatorStats(FILE *out,
3105 const char *block_name, int num_blocks, size_t sizeof_block)
3106 {
3107 char buf1[128];
3108 char buf2[128];
3109 PyOS_snprintf(buf1, sizeof(buf1),
3110 "%d %ss * %zd bytes each",
3111 num_blocks, block_name, sizeof_block);
3112 PyOS_snprintf(buf2, sizeof(buf2),
3113 "%48s ", buf1);
3114 (void)printone(out, buf2, num_blocks * sizeof_block);
3115 }
3116
3117 // Return true if the obmalloc state structure is heap allocated,
3118 // by PyMem_RawCalloc(). For the main interpreter, this structure
3119 // allocated in the BSS. Allocating that way gives some memory savings
3120 // and a small performance win (at least on a demand paged OS). On
3121 // 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3122 // memory is for the arena_map_top array. Since normally only one entry
3123 // of that array is used, only one page of resident memory is actually
3124 // used, rather than the full 256 kB.
_PyMem_obmalloc_state_on_heap(PyInterpreterState * interp)3125 bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3126 {
3127 #if WITH_PYMALLOC
3128 return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3129 #else
3130 return false;
3131 #endif
3132 }
3133
3134 #ifdef WITH_PYMALLOC
3135 static void
init_obmalloc_pools(PyInterpreterState * interp)3136 init_obmalloc_pools(PyInterpreterState *interp)
3137 {
3138 // initialize the obmalloc->pools structure. This must be done
3139 // before the obmalloc alloc/free functions can be called.
3140 poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3141 _obmalloc_pools_INIT(interp->obmalloc->pools);
3142 memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3143 }
3144 #endif /* WITH_PYMALLOC */
3145
_PyMem_init_obmalloc(PyInterpreterState * interp)3146 int _PyMem_init_obmalloc(PyInterpreterState *interp)
3147 {
3148 #ifdef WITH_PYMALLOC
3149 /* Initialize obmalloc, but only for subinterpreters,
3150 since the main interpreter is initialized statically. */
3151 if (_Py_IsMainInterpreter(interp)
3152 || _PyInterpreterState_HasFeature(interp,
3153 Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3154 interp->obmalloc = &obmalloc_state_main;
3155 if (!obmalloc_state_initialized) {
3156 init_obmalloc_pools(interp);
3157 obmalloc_state_initialized = true;
3158 }
3159 } else {
3160 interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3161 if (interp->obmalloc == NULL) {
3162 return -1;
3163 }
3164 init_obmalloc_pools(interp);
3165 }
3166 #endif /* WITH_PYMALLOC */
3167 return 0; // success
3168 }
3169
3170
3171 #ifdef WITH_PYMALLOC
3172
3173 static void
free_obmalloc_arenas(PyInterpreterState * interp)3174 free_obmalloc_arenas(PyInterpreterState *interp)
3175 {
3176 OMState *state = interp->obmalloc;
3177 for (uint i = 0; i < maxarenas; ++i) {
3178 // free each obmalloc memory arena
3179 struct arena_object *ao = &allarenas[i];
3180 _PyObject_Arena.free(_PyObject_Arena.ctx,
3181 (void *)ao->address, ARENA_SIZE);
3182 }
3183 // free the array containing pointers to all arenas
3184 PyMem_RawFree(allarenas);
3185 #if WITH_PYMALLOC_RADIX_TREE
3186 #ifdef USE_INTERIOR_NODES
3187 // Free the middle and bottom nodes of the radix tree. These are allocated
3188 // by arena_map_mark_used() but not freed when arenas are freed.
3189 for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3190 arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3191 if (mid == NULL) {
3192 continue;
3193 }
3194 for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3195 arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3196 if (bot == NULL) {
3197 continue;
3198 }
3199 PyMem_RawFree(bot);
3200 }
3201 PyMem_RawFree(mid);
3202 }
3203 #endif
3204 #endif
3205 }
3206
3207 #ifdef Py_DEBUG
3208 /* Is target in the list? The list is traversed via the nextpool pointers.
3209 * The list may be NULL-terminated, or circular. Return 1 if target is in
3210 * list, else 0.
3211 */
3212 static int
pool_is_in_list(const poolp target,poolp list)3213 pool_is_in_list(const poolp target, poolp list)
3214 {
3215 poolp origlist = list;
3216 assert(target != NULL);
3217 if (list == NULL)
3218 return 0;
3219 do {
3220 if (target == list)
3221 return 1;
3222 list = list->nextpool;
3223 } while (list != NULL && list != origlist);
3224 return 0;
3225 }
3226 #endif
3227
3228 #ifdef WITH_MIMALLOC
3229 struct _alloc_stats {
3230 size_t allocated_blocks;
3231 size_t allocated_bytes;
3232 size_t allocated_with_overhead;
3233 size_t bytes_reserved;
3234 size_t bytes_committed;
3235 };
3236
_collect_alloc_stats(const mi_heap_t * heap,const mi_heap_area_t * area,void * block,size_t block_size,void * arg)3237 static bool _collect_alloc_stats(
3238 const mi_heap_t* heap, const mi_heap_area_t* area,
3239 void* block, size_t block_size, void* arg)
3240 {
3241 struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3242 stats->allocated_blocks += area->used;
3243 stats->allocated_bytes += area->used * area->block_size;
3244 stats->allocated_with_overhead += area->used * area->full_block_size;
3245 stats->bytes_reserved += area->reserved;
3246 stats->bytes_committed += area->committed;
3247 return 1;
3248 }
3249
3250 static void
py_mimalloc_print_stats(FILE * out)3251 py_mimalloc_print_stats(FILE *out)
3252 {
3253 fprintf(out, "Small block threshold = %zd, in %u size classes.\n",
3254 MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3255 fprintf(out, "Medium block threshold = %zd\n",
3256 MI_MEDIUM_OBJ_SIZE_MAX);
3257 fprintf(out, "Large object max size = %zd\n",
3258 MI_LARGE_OBJ_SIZE_MAX);
3259
3260 mi_heap_t *heap = mi_heap_get_default();
3261 struct _alloc_stats stats;
3262 memset(&stats, 0, sizeof(stats));
3263 mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3264
3265 fprintf(out, " Allocated Blocks: %zd\n", stats.allocated_blocks);
3266 fprintf(out, " Allocated Bytes: %zd\n", stats.allocated_bytes);
3267 fprintf(out, " Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3268 fprintf(out, " Bytes Reserved: %zd\n", stats.bytes_reserved);
3269 fprintf(out, " Bytes Committed: %zd\n", stats.bytes_committed);
3270 }
3271 #endif
3272
3273
3274 static void
pymalloc_print_stats(FILE * out)3275 pymalloc_print_stats(FILE *out)
3276 {
3277 OMState *state = get_state();
3278
3279 uint i;
3280 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3281 /* # of pools, allocated blocks, and free blocks per class index */
3282 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3283 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3284 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3285 /* total # of allocated bytes in used and full pools */
3286 size_t allocated_bytes = 0;
3287 /* total # of available bytes in used pools */
3288 size_t available_bytes = 0;
3289 /* # of free pools + pools not yet carved out of current arena */
3290 uint numfreepools = 0;
3291 /* # of bytes for arena alignment padding */
3292 size_t arena_alignment = 0;
3293 /* # of bytes in used and full pools used for pool_headers */
3294 size_t pool_header_bytes = 0;
3295 /* # of bytes in used and full pools wasted due to quantization,
3296 * i.e. the necessarily leftover space at the ends of used and
3297 * full pools.
3298 */
3299 size_t quantization = 0;
3300 /* # of arenas actually allocated. */
3301 size_t narenas = 0;
3302 /* running total -- should equal narenas * ARENA_SIZE */
3303 size_t total;
3304 char buf[128];
3305
3306 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3307 SMALL_REQUEST_THRESHOLD, numclasses);
3308
3309 for (i = 0; i < numclasses; ++i)
3310 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3311
3312 /* Because full pools aren't linked to from anything, it's easiest
3313 * to march over all the arenas. If we're lucky, most of the memory
3314 * will be living in full pools -- would be a shame to miss them.
3315 */
3316 for (i = 0; i < maxarenas; ++i) {
3317 uintptr_t base = allarenas[i].address;
3318
3319 /* Skip arenas which are not allocated. */
3320 if (allarenas[i].address == (uintptr_t)NULL)
3321 continue;
3322 narenas += 1;
3323
3324 numfreepools += allarenas[i].nfreepools;
3325
3326 /* round up to pool alignment */
3327 if (base & (uintptr_t)POOL_SIZE_MASK) {
3328 arena_alignment += POOL_SIZE;
3329 base &= ~(uintptr_t)POOL_SIZE_MASK;
3330 base += POOL_SIZE;
3331 }
3332
3333 /* visit every pool in the arena */
3334 assert(base <= (uintptr_t) allarenas[i].pool_address);
3335 for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3336 poolp p = (poolp)base;
3337 const uint sz = p->szidx;
3338 uint freeblocks;
3339
3340 if (p->ref.count == 0) {
3341 /* currently unused */
3342 #ifdef Py_DEBUG
3343 assert(pool_is_in_list(p, allarenas[i].freepools));
3344 #endif
3345 continue;
3346 }
3347 ++numpools[sz];
3348 numblocks[sz] += p->ref.count;
3349 freeblocks = NUMBLOCKS(sz) - p->ref.count;
3350 numfreeblocks[sz] += freeblocks;
3351 #ifdef Py_DEBUG
3352 if (freeblocks > 0)
3353 assert(pool_is_in_list(p, usedpools[sz + sz]));
3354 #endif
3355 }
3356 }
3357 assert(narenas == narenas_currently_allocated);
3358
3359 fputc('\n', out);
3360 fputs("class size num pools blocks in use avail blocks\n"
3361 "----- ---- --------- ------------- ------------\n",
3362 out);
3363
3364 for (i = 0; i < numclasses; ++i) {
3365 size_t p = numpools[i];
3366 size_t b = numblocks[i];
3367 size_t f = numfreeblocks[i];
3368 uint size = INDEX2SIZE(i);
3369 if (p == 0) {
3370 assert(b == 0 && f == 0);
3371 continue;
3372 }
3373 fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3374 i, size, p, b, f);
3375 allocated_bytes += b * size;
3376 available_bytes += f * size;
3377 pool_header_bytes += p * POOL_OVERHEAD;
3378 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3379 }
3380 fputc('\n', out);
3381 #ifdef PYMEM_DEBUG_SERIALNO
3382 if (_PyMem_DebugEnabled()) {
3383 (void)printone(out, "# times object malloc called", serialno);
3384 }
3385 #endif
3386 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3387 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3388 (void)printone(out, "# arenas highwater mark", narenas_highwater);
3389 (void)printone(out, "# arenas allocated current", narenas);
3390
3391 PyOS_snprintf(buf, sizeof(buf),
3392 "%zu arenas * %d bytes/arena",
3393 narenas, ARENA_SIZE);
3394 (void)printone(out, buf, narenas * ARENA_SIZE);
3395
3396 fputc('\n', out);
3397
3398 /* Account for what all of those arena bytes are being used for. */
3399 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3400 total += printone(out, "# bytes in available blocks", available_bytes);
3401
3402 PyOS_snprintf(buf, sizeof(buf),
3403 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3404 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3405
3406 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3407 total += printone(out, "# bytes lost to quantization", quantization);
3408 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3409 (void)printone(out, "Total", total);
3410 assert(narenas * ARENA_SIZE == total);
3411
3412 #if WITH_PYMALLOC_RADIX_TREE
3413 fputs("\narena map counts\n", out);
3414 #ifdef USE_INTERIOR_NODES
3415 (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3416 (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3417 fputc('\n', out);
3418 #endif
3419 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3420 #ifdef USE_INTERIOR_NODES
3421 total += printone(out, "# bytes lost to arena map mid",
3422 sizeof(arena_map_mid_t) * arena_map_mid_count);
3423 total += printone(out, "# bytes lost to arena map bot",
3424 sizeof(arena_map_bot_t) * arena_map_bot_count);
3425 (void)printone(out, "Total", total);
3426 #endif
3427 #endif
3428
3429 }
3430
3431 /* Print summary info to "out" about the state of pymalloc's structures.
3432 * In Py_DEBUG mode, also perform some expensive internal consistency
3433 * checks.
3434 *
3435 * Return 0 if the memory debug hooks are not installed or no statistics was
3436 * written into out, return 1 otherwise.
3437 */
3438 int
_PyObject_DebugMallocStats(FILE * out)3439 _PyObject_DebugMallocStats(FILE *out)
3440 {
3441 #ifdef WITH_MIMALLOC
3442 if (_PyMem_MimallocEnabled()) {
3443 py_mimalloc_print_stats(out);
3444 return 1;
3445 }
3446 else
3447 #endif
3448 if (_PyMem_PymallocEnabled()) {
3449 pymalloc_print_stats(out);
3450 return 1;
3451 }
3452 else {
3453 return 0;
3454 }
3455 }
3456
3457 #endif /* #ifdef WITH_PYMALLOC */
3458