• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_pymem.h"         // _PyTraceMalloc_Config
3 
4 #include <stdbool.h>
5 
6 
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9 
10 
11 /* Python's malloc wrappers (see pymem.h) */
12 
13 #undef  uint
14 #define uint    unsigned int    /* assuming >= 16 bits */
15 
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21 
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26 
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
29 
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31 
32 #if defined(__has_feature)  /* Clang */
33 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 #    define _Py_NO_SANITIZE_ADDRESS \
35         __attribute__((no_sanitize("address")))
36 #  endif
37 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
38 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 #  endif
40 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
41 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 #  endif
43 #elif defined(__GNUC__)
44 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
45 #    define _Py_NO_SANITIZE_ADDRESS \
46         __attribute__((no_sanitize_address))
47 #  endif
48    // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49    // is provided only since GCC 7.
50 #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 #  endif
53 #endif
54 
55 #ifndef _Py_NO_SANITIZE_ADDRESS
56 #  define _Py_NO_SANITIZE_ADDRESS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 #  define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 #  define _Py_NO_SANITIZE_MEMORY
63 #endif
64 
65 #ifdef WITH_PYMALLOC
66 
67 #ifdef MS_WINDOWS
68 #  include <windows.h>
69 #elif defined(HAVE_MMAP)
70 #  include <sys/mman.h>
71 #  ifdef MAP_ANONYMOUS
72 #    define ARENAS_USE_MMAP
73 #  endif
74 #endif
75 
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82 
83 
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85    Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86    library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88 
89 
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94        for malloc(0), which would be treated as an error. Some platforms would
95        return a pointer with no memory behind it, which would break pymalloc.
96        To solve these problems, allocate an extra byte. */
97     if (size == 0)
98         size = 1;
99     return malloc(size);
100 }
101 
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106        for calloc(0, 0), which would be treated as an error. Some platforms
107        would return a pointer with no memory behind it, which would break
108        pymalloc.  To solve these problems, allocate an extra byte. */
109     if (nelem == 0 || elsize == 0) {
110         nelem = 1;
111         elsize = 1;
112     }
113     return calloc(nelem, elsize);
114 }
115 
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119     if (size == 0)
120         size = 1;
121     return realloc(ptr, size);
122 }
123 
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127     free(ptr);
128 }
129 
130 
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135     return VirtualAlloc(NULL, size,
136                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138 
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142     VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144 
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149     void *ptr;
150     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152     if (ptr == MAP_FAILED)
153         return NULL;
154     assert(ptr != NULL);
155     return ptr;
156 }
157 
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161     munmap(ptr, size);
162 }
163 
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168     return malloc(size);
169 }
170 
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174     free(ptr);
175 }
176 #endif
177 
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182 
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 #  define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190 
191 typedef struct {
192     /* We tag each block with an API ID in order to tag API violations */
193     char api_id;
194     PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197     debug_alloc_api_t raw;
198     debug_alloc_api_t mem;
199     debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201     {'r', PYRAW_ALLOC},
202     {'m', PYMEM_ALLOC},
203     {'o', PYOBJ_ALLOC}
204     };
205 
206 #define PYDBGRAW_ALLOC \
207     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212 
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222 
223 
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226                             PyMemAllocatorEx *old_alloc)
227 {
228     if (old_alloc != NULL) {
229         PyMem_GetAllocator(domain, old_alloc);
230     }
231 
232 
233     PyMemAllocatorEx new_alloc;
234     switch(domain)
235     {
236     case PYMEM_DOMAIN_RAW:
237         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238         break;
239     case PYMEM_DOMAIN_MEM:
240         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241         break;
242     case PYMEM_DOMAIN_OBJ:
243         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244         break;
245     default:
246         /* unknown domain */
247         return -1;
248     }
249     PyMem_SetAllocator(domain, &new_alloc);
250     if (debug) {
251         _PyMem_SetupDebugHooksDomain(domain);
252     }
253     return 0;
254 }
255 
256 
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259                            PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262     const int debug = 1;
263 #else
264     const int debug = 0;
265 #endif
266     return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268 
269 
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273     if (name == NULL || *name == '\0') {
274         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275            nameions): use default memory allocators */
276         *allocator = PYMEM_ALLOCATOR_DEFAULT;
277     }
278     else if (strcmp(name, "default") == 0) {
279         *allocator = PYMEM_ALLOCATOR_DEFAULT;
280     }
281     else if (strcmp(name, "debug") == 0) {
282         *allocator = PYMEM_ALLOCATOR_DEBUG;
283     }
284 #ifdef WITH_PYMALLOC
285     else if (strcmp(name, "pymalloc") == 0) {
286         *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287     }
288     else if (strcmp(name, "pymalloc_debug") == 0) {
289         *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290     }
291 #endif
292     else if (strcmp(name, "malloc") == 0) {
293         *allocator = PYMEM_ALLOCATOR_MALLOC;
294     }
295     else if (strcmp(name, "malloc_debug") == 0) {
296         *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297     }
298     else {
299         /* unknown allocator */
300         return -1;
301     }
302     return 0;
303 }
304 
305 
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309     switch (allocator) {
310     case PYMEM_ALLOCATOR_NOT_SET:
311         /* do nothing */
312         break;
313 
314     case PYMEM_ALLOCATOR_DEFAULT:
315         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318         break;
319 
320     case PYMEM_ALLOCATOR_DEBUG:
321         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324         break;
325 
326 #ifdef WITH_PYMALLOC
327     case PYMEM_ALLOCATOR_PYMALLOC:
328     case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329     {
330         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332 
333         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336 
337         if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338             PyMem_SetupDebugHooks();
339         }
340         break;
341     }
342 #endif
343 
344     case PYMEM_ALLOCATOR_MALLOC:
345     case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346     {
347         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351 
352         if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353             PyMem_SetupDebugHooks();
354         }
355         break;
356     }
357 
358     default:
359         /* unknown allocator */
360         return -1;
361     }
362     return 0;
363 }
364 
365 
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371 
372 
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380 
381     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383         pymemallocator_eq(&_PyObject, &malloc_alloc))
384     {
385         return "malloc";
386     }
387 #ifdef WITH_PYMALLOC
388     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389         pymemallocator_eq(&_PyMem, &pymalloc) &&
390         pymemallocator_eq(&_PyObject, &pymalloc))
391     {
392         return "pymalloc";
393     }
394 #endif
395 
396     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399 
400     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401         pymemallocator_eq(&_PyMem, &dbg_mem) &&
402         pymemallocator_eq(&_PyObject, &dbg_obj))
403     {
404         /* Debug hooks installed */
405         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408         {
409             return "malloc_debug";
410         }
411 #ifdef WITH_PYMALLOC
412         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415         {
416             return "pymalloc_debug";
417         }
418 #endif
419     }
420     return NULL;
421 }
422 
423 
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432 
433 
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440     _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442     };
443 
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448     return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450 
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454     if (_PyMem_DebugEnabled()) {
455         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456     }
457     else {
458         return (_PyObject.malloc == _PyObject_Malloc);
459     }
460 }
461 #endif
462 
463 
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467     PyMemAllocatorEx alloc;
468 
469     if (domain == PYMEM_DOMAIN_RAW) {
470         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471             return;
472         }
473 
474         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475         alloc.ctx = &_PyMem_Debug.raw;
476         alloc.malloc = _PyMem_DebugRawMalloc;
477         alloc.calloc = _PyMem_DebugRawCalloc;
478         alloc.realloc = _PyMem_DebugRawRealloc;
479         alloc.free = _PyMem_DebugRawFree;
480         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481     }
482     else if (domain == PYMEM_DOMAIN_MEM) {
483         if (_PyMem.malloc == _PyMem_DebugMalloc) {
484             return;
485         }
486 
487         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488         alloc.ctx = &_PyMem_Debug.mem;
489         alloc.malloc = _PyMem_DebugMalloc;
490         alloc.calloc = _PyMem_DebugCalloc;
491         alloc.realloc = _PyMem_DebugRealloc;
492         alloc.free = _PyMem_DebugFree;
493         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494     }
495     else if (domain == PYMEM_DOMAIN_OBJ)  {
496         if (_PyObject.malloc == _PyMem_DebugMalloc) {
497             return;
498         }
499 
500         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501         alloc.ctx = &_PyMem_Debug.obj;
502         alloc.malloc = _PyMem_DebugMalloc;
503         alloc.calloc = _PyMem_DebugCalloc;
504         alloc.realloc = _PyMem_DebugRealloc;
505         alloc.free = _PyMem_DebugFree;
506         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507     }
508 }
509 
510 
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518 
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522     switch(domain)
523     {
524     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527     default:
528         /* unknown domain: set all attributes to NULL */
529         allocator->ctx = NULL;
530         allocator->malloc = NULL;
531         allocator->calloc = NULL;
532         allocator->realloc = NULL;
533         allocator->free = NULL;
534     }
535 }
536 
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540     switch(domain)
541     {
542     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545     /* ignore unknown domain */
546     }
547 }
548 
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552     *allocator = _PyObject_Arena;
553 }
554 
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558     _PyObject_Arena = *allocator;
559 }
560 
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564     /*
565      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566      * Most python internals blindly use a signed Py_ssize_t to track
567      * things without checking for overflows or negatives.
568      * As size_t is unsigned, checking for size < 0 is not required.
569      */
570     if (size > (size_t)PY_SSIZE_T_MAX)
571         return NULL;
572     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574 
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578     /* see PyMem_RawMalloc() */
579     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580         return NULL;
581     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583 
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587     /* see PyMem_RawMalloc() */
588     if (new_size > (size_t)PY_SSIZE_T_MAX)
589         return NULL;
590     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592 
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597 
598 
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602     /* see PyMem_RawMalloc() */
603     if (size > (size_t)PY_SSIZE_T_MAX)
604         return NULL;
605     return _PyMem.malloc(_PyMem.ctx, size);
606 }
607 
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611     /* see PyMem_RawMalloc() */
612     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613         return NULL;
614     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616 
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620     /* see PyMem_RawMalloc() */
621     if (new_size > (size_t)PY_SSIZE_T_MAX)
622         return NULL;
623     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625 
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629     _PyMem.free(_PyMem.ctx, ptr);
630 }
631 
632 
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636     assert(str != NULL);
637 
638     size_t len = wcslen(str);
639     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640         return NULL;
641     }
642 
643     size_t size = (len + 1) * sizeof(wchar_t);
644     wchar_t *str2 = PyMem_RawMalloc(size);
645     if (str2 == NULL) {
646         return NULL;
647     }
648 
649     memcpy(str2, str, size);
650     return str2;
651 }
652 
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656     assert(str != NULL);
657     size_t size = strlen(str) + 1;
658     char *copy = PyMem_RawMalloc(size);
659     if (copy == NULL) {
660         return NULL;
661     }
662     memcpy(copy, str, size);
663     return copy;
664 }
665 
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669     assert(str != NULL);
670     size_t size = strlen(str) + 1;
671     char *copy = PyMem_Malloc(size);
672     if (copy == NULL) {
673         return NULL;
674     }
675     memcpy(copy, str, size);
676     return copy;
677 }
678 
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682     /* see PyMem_RawMalloc() */
683     if (size > (size_t)PY_SSIZE_T_MAX)
684         return NULL;
685     return _PyObject.malloc(_PyObject.ctx, size);
686 }
687 
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691     /* see PyMem_RawMalloc() */
692     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693         return NULL;
694     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696 
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700     /* see PyMem_RawMalloc() */
701     if (new_size > (size_t)PY_SSIZE_T_MAX)
702         return NULL;
703     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705 
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709     _PyObject.free(_PyObject.ctx, ptr);
710 }
711 
712 
713 /* If we're using GCC, use __builtin_expect() to reduce overhead of
714    the valgrind checks */
715 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
716 #  define UNLIKELY(value) __builtin_expect((value), 0)
717 #  define LIKELY(value) __builtin_expect((value), 1)
718 #else
719 #  define UNLIKELY(value) (value)
720 #  define LIKELY(value) (value)
721 #endif
722 
723 #ifdef WITH_PYMALLOC
724 
725 #ifdef WITH_VALGRIND
726 #include <valgrind/valgrind.h>
727 
728 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
729 static int running_on_valgrind = -1;
730 #endif
731 
732 
733 /* An object allocator for Python.
734 
735    Here is an introduction to the layers of the Python memory architecture,
736    showing where the object allocator is actually used (layer +2), It is
737    called for every object allocation and deallocation (PyObject_New/Del),
738    unless the object-specific allocators implement a proprietary allocation
739    scheme (ex.: ints use a simple free list). This is also the place where
740    the cyclic garbage collector operates selectively on container objects.
741 
742 
743     Object-specific allocators
744     _____   ______   ______       ________
745    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
746 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
747     _______________________________       |                           |
748    [   Python's object allocator   ]      |                           |
749 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
750     ______________________________________________________________    |
751    [          Python's raw memory allocator (PyMem_ API)          ]   |
752 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
753     __________________________________________________________________
754    [    Underlying general-purpose allocator (ex: C library malloc)   ]
755  0 | <------ Virtual memory allocated for the python process -------> |
756 
757    =========================================================================
758     _______________________________________________________________________
759    [                OS-specific Virtual Memory Manager (VMM)               ]
760 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
761     __________________________________   __________________________________
762    [                                  ] [                                  ]
763 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
764 
765 */
766 /*==========================================================================*/
767 
768 /* A fast, special-purpose memory allocator for small blocks, to be used
769    on top of a general-purpose malloc -- heavily based on previous art. */
770 
771 /* Vladimir Marangozov -- August 2000 */
772 
773 /*
774  * "Memory management is where the rubber meets the road -- if we do the wrong
775  * thing at any level, the results will not be good. And if we don't make the
776  * levels work well together, we are in serious trouble." (1)
777  *
778  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
779  *    "Dynamic Storage Allocation: A Survey and Critical Review",
780  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
781  */
782 
783 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
784 
785 /*==========================================================================*/
786 
787 /*
788  * Allocation strategy abstract:
789  *
790  * For small requests, the allocator sub-allocates <Big> blocks of memory.
791  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
792  * system's allocator.
793  *
794  * Small requests are grouped in size classes spaced 8 bytes apart, due
795  * to the required valid alignment of the returned address. Requests of
796  * a particular size are serviced from memory pools of 4K (one VMM page).
797  * Pools are fragmented on demand and contain free lists of blocks of one
798  * particular size class. In other words, there is a fixed-size allocator
799  * for each size class. Free pools are shared by the different allocators
800  * thus minimizing the space reserved for a particular size class.
801  *
802  * This allocation strategy is a variant of what is known as "simple
803  * segregated storage based on array of free lists". The main drawback of
804  * simple segregated storage is that we might end up with lot of reserved
805  * memory for the different free lists, which degenerate in time. To avoid
806  * this, we partition each free list in pools and we share dynamically the
807  * reserved space between all free lists. This technique is quite efficient
808  * for memory intensive programs which allocate mainly small-sized blocks.
809  *
810  * For small requests we have the following table:
811  *
812  * Request in bytes     Size of allocated block      Size class idx
813  * ----------------------------------------------------------------
814  *        1-8                     8                       0
815  *        9-16                   16                       1
816  *       17-24                   24                       2
817  *       25-32                   32                       3
818  *       33-40                   40                       4
819  *       41-48                   48                       5
820  *       49-56                   56                       6
821  *       57-64                   64                       7
822  *       65-72                   72                       8
823  *        ...                   ...                     ...
824  *      497-504                 504                      62
825  *      505-512                 512                      63
826  *
827  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
828  *      allocator.
829  */
830 
831 /*==========================================================================*/
832 
833 /*
834  * -- Main tunable settings section --
835  */
836 
837 /*
838  * Alignment of addresses returned to the user. 8-bytes alignment works
839  * on most current architectures (with 32-bit or 64-bit address buses).
840  * The alignment value is also used for grouping small requests in size
841  * classes spaced ALIGNMENT bytes apart.
842  *
843  * You shouldn't change this unless you know what you are doing.
844  */
845 
846 #if SIZEOF_VOID_P > 4
847 #define ALIGNMENT              16               /* must be 2^N */
848 #define ALIGNMENT_SHIFT         4
849 #else
850 #define ALIGNMENT               8               /* must be 2^N */
851 #define ALIGNMENT_SHIFT         3
852 #endif
853 
854 /* Return the number of bytes in size class I, as a uint. */
855 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
856 
857 /*
858  * Max size threshold below which malloc requests are considered to be
859  * small enough in order to use preallocated memory pools. You can tune
860  * this value according to your application behaviour and memory needs.
861  *
862  * Note: a size threshold of 512 guarantees that newly created dictionaries
863  * will be allocated from preallocated memory pools on 64-bit.
864  *
865  * The following invariants must hold:
866  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
867  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
868  *
869  * Although not required, for better performance and space efficiency,
870  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
871  */
872 #define SMALL_REQUEST_THRESHOLD 512
873 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
874 
875 /*
876  * The system's VMM page size can be obtained on most unices with a
877  * getpagesize() call or deduced from various header files. To make
878  * things simpler, we assume that it is 4K, which is OK for most systems.
879  * It is probably better if this is the native page size, but it doesn't
880  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
881  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
882  * violation fault.  4K is apparently OK for all the platforms that python
883  * currently targets.
884  */
885 #define SYSTEM_PAGE_SIZE        (4 * 1024)
886 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
887 
888 /*
889  * Maximum amount of memory managed by the allocator for small requests.
890  */
891 #ifdef WITH_MEMORY_LIMITS
892 #ifndef SMALL_MEMORY_LIMIT
893 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
894 #endif
895 #endif
896 
897 #if !defined(WITH_PYMALLOC_RADIX_TREE)
898 /* Use radix-tree to track arena memory regions, for address_in_range().
899  * Enable by default since it allows larger pool sizes.  Can be disabled
900  * using -DWITH_PYMALLOC_RADIX_TREE=0 */
901 #define WITH_PYMALLOC_RADIX_TREE 1
902 #endif
903 
904 #if SIZEOF_VOID_P > 4
905 /* on 64-bit platforms use larger pools and arenas if we can */
906 #define USE_LARGE_ARENAS
907 #if WITH_PYMALLOC_RADIX_TREE
908 /* large pools only supported if radix-tree is enabled */
909 #define USE_LARGE_POOLS
910 #endif
911 #endif
912 
913 /*
914  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
915  * on a page boundary. This is a reserved virtual address space for the
916  * current process (obtained through a malloc()/mmap() call). In no way this
917  * means that the memory arenas will be used entirely. A malloc(<Big>) is
918  * usually an address range reservation for <Big> bytes, unless all pages within
919  * this space are referenced subsequently. So malloc'ing big blocks and not
920  * using them does not mean "wasting memory". It's an addressable range
921  * wastage...
922  *
923  * Arenas are allocated with mmap() on systems supporting anonymous memory
924  * mappings to reduce heap fragmentation.
925  */
926 #ifdef USE_LARGE_ARENAS
927 #define ARENA_BITS              20                    /* 1 MiB */
928 #else
929 #define ARENA_BITS              18                    /* 256 KiB */
930 #endif
931 #define ARENA_SIZE              (1 << ARENA_BITS)
932 #define ARENA_SIZE_MASK         (ARENA_SIZE - 1)
933 
934 #ifdef WITH_MEMORY_LIMITS
935 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
936 #endif
937 
938 /*
939  * Size of the pools used for small blocks.  Must be a power of 2.
940  */
941 #ifdef USE_LARGE_POOLS
942 #define POOL_BITS               14                  /* 16 KiB */
943 #else
944 #define POOL_BITS               12                  /* 4 KiB */
945 #endif
946 #define POOL_SIZE               (1 << POOL_BITS)
947 #define POOL_SIZE_MASK          (POOL_SIZE - 1)
948 
949 #if !WITH_PYMALLOC_RADIX_TREE
950 #if POOL_SIZE != SYSTEM_PAGE_SIZE
951 #   error "pool size must be equal to system page size"
952 #endif
953 #endif
954 
955 #define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
956 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
957 #   error "arena size not an exact multiple of pool size"
958 #endif
959 
960 /*
961  * -- End of tunable settings section --
962  */
963 
964 /*==========================================================================*/
965 
966 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
967 typedef uint8_t block;
968 
969 /* Pool for small blocks. */
970 struct pool_header {
971     union { block *_padding;
972             uint count; } ref;          /* number of allocated blocks    */
973     block *freeblock;                   /* pool's free list head         */
974     struct pool_header *nextpool;       /* next pool of this size class  */
975     struct pool_header *prevpool;       /* previous pool       ""        */
976     uint arenaindex;                    /* index into arenas of base adr */
977     uint szidx;                         /* block size class index        */
978     uint nextoffset;                    /* bytes to virgin block         */
979     uint maxnextoffset;                 /* largest valid nextoffset      */
980 };
981 
982 typedef struct pool_header *poolp;
983 
984 /* Record keeping for arenas. */
985 struct arena_object {
986     /* The address of the arena, as returned by malloc.  Note that 0
987      * will never be returned by a successful malloc, and is used
988      * here to mark an arena_object that doesn't correspond to an
989      * allocated arena.
990      */
991     uintptr_t address;
992 
993     /* Pool-aligned pointer to the next pool to be carved off. */
994     block* pool_address;
995 
996     /* The number of available pools in the arena:  free pools + never-
997      * allocated pools.
998      */
999     uint nfreepools;
1000 
1001     /* The total number of pools in the arena, whether or not available. */
1002     uint ntotalpools;
1003 
1004     /* Singly-linked list of available pools. */
1005     struct pool_header* freepools;
1006 
1007     /* Whenever this arena_object is not associated with an allocated
1008      * arena, the nextarena member is used to link all unassociated
1009      * arena_objects in the singly-linked `unused_arena_objects` list.
1010      * The prevarena member is unused in this case.
1011      *
1012      * When this arena_object is associated with an allocated arena
1013      * with at least one available pool, both members are used in the
1014      * doubly-linked `usable_arenas` list, which is maintained in
1015      * increasing order of `nfreepools` values.
1016      *
1017      * Else this arena_object is associated with an allocated arena
1018      * all of whose pools are in use.  `nextarena` and `prevarena`
1019      * are both meaningless in this case.
1020      */
1021     struct arena_object* nextarena;
1022     struct arena_object* prevarena;
1023 };
1024 
1025 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1026 
1027 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
1028 
1029 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1030 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1031 
1032 /* Return total number of blocks in pool of size index I, as a uint. */
1033 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1034 
1035 /*==========================================================================*/
1036 
1037 /*
1038  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1039 
1040 This is involved.  For an index i, usedpools[i+i] is the header for a list of
1041 all partially used pools holding small blocks with "size class idx" i. So
1042 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1043 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1044 
1045 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1046 member) as needed.  Once carved off, a pool is in one of three states forever
1047 after:
1048 
1049 used == partially used, neither empty nor full
1050     At least one block in the pool is currently allocated, and at least one
1051     block in the pool is not currently allocated (note this implies a pool
1052     has room for at least two blocks).
1053     This is a pool's initial state, as a pool is created only when malloc
1054     needs space.
1055     The pool holds blocks of a fixed size, and is in the circular list headed
1056     at usedpools[i] (see above).  It's linked to the other used pools of the
1057     same size class via the pool_header's nextpool and prevpool members.
1058     If all but one block is currently allocated, a malloc can cause a
1059     transition to the full state.  If all but one block is not currently
1060     allocated, a free can cause a transition to the empty state.
1061 
1062 full == all the pool's blocks are currently allocated
1063     On transition to full, a pool is unlinked from its usedpools[] list.
1064     It's not linked to from anything then anymore, and its nextpool and
1065     prevpool members are meaningless until it transitions back to used.
1066     A free of a block in a full pool puts the pool back in the used state.
1067     Then it's linked in at the front of the appropriate usedpools[] list, so
1068     that the next allocation for its size class will reuse the freed block.
1069 
1070 empty == all the pool's blocks are currently available for allocation
1071     On transition to empty, a pool is unlinked from its usedpools[] list,
1072     and linked to the front of its arena_object's singly-linked freepools list,
1073     via its nextpool member.  The prevpool member has no meaning in this case.
1074     Empty pools have no inherent size class:  the next time a malloc finds
1075     an empty list in usedpools[], it takes the first pool off of freepools.
1076     If the size class needed happens to be the same as the size class the pool
1077     last had, some pool initialization can be skipped.
1078 
1079 
1080 Block Management
1081 
1082 Blocks within pools are again carved out as needed.  pool->freeblock points to
1083 the start of a singly-linked list of free blocks within the pool.  When a
1084 block is freed, it's inserted at the front of its pool's freeblock list.  Note
1085 that the available blocks in a pool are *not* linked all together when a pool
1086 is initialized.  Instead only "the first two" (lowest addresses) blocks are
1087 set up, returning the first such block, and setting pool->freeblock to a
1088 one-block list holding the second such block.  This is consistent with that
1089 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1090 of memory until it's actually needed.
1091 
1092 So long as a pool is in the used state, we're certain there *is* a block
1093 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1094 points to the end of the free list before we've carved the entire pool into
1095 blocks, that means we simply haven't yet gotten to one of the higher-address
1096 blocks.  The offset from the pool_header to the start of "the next" virgin
1097 block is stored in the pool_header nextoffset member, and the largest value
1098 of nextoffset that makes sense is stored in the maxnextoffset member when a
1099 pool is initialized.  All the blocks in a pool have been passed out at least
1100 once when and only when nextoffset > maxnextoffset.
1101 
1102 
1103 Major obscurity:  While the usedpools vector is declared to have poolp
1104 entries, it doesn't really.  It really contains two pointers per (conceptual)
1105 poolp entry, the nextpool and prevpool members of a pool_header.  The
1106 excruciating initialization code below fools C so that
1107 
1108     usedpool[i+i]
1109 
1110 "acts like" a genuine poolp, but only so long as you only reference its
1111 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1112 compensating for that a pool_header's nextpool and prevpool members
1113 immediately follow a pool_header's first two members:
1114 
1115     union { block *_padding;
1116             uint count; } ref;
1117     block *freeblock;
1118 
1119 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1120 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1121 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1122 circular list is empty).
1123 
1124 It's unclear why the usedpools setup is so convoluted.  It could be to
1125 minimize the amount of cache required to hold this heavily-referenced table
1126 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1127 referencing code has to remember to "double the index" and doing so isn't
1128 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1129 on that C doesn't insert any padding anywhere in a pool_header at or before
1130 the prevpool member.
1131 **************************************************************************** */
1132 
1133 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1134 #define PT(x)   PTA(x), PTA(x)
1135 
1136 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1137     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1138 #if NB_SMALL_SIZE_CLASSES > 8
1139     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1140 #if NB_SMALL_SIZE_CLASSES > 16
1141     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1142 #if NB_SMALL_SIZE_CLASSES > 24
1143     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1144 #if NB_SMALL_SIZE_CLASSES > 32
1145     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1146 #if NB_SMALL_SIZE_CLASSES > 40
1147     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1148 #if NB_SMALL_SIZE_CLASSES > 48
1149     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1150 #if NB_SMALL_SIZE_CLASSES > 56
1151     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1152 #if NB_SMALL_SIZE_CLASSES > 64
1153 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1154 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1155 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1156 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1157 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1158 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1159 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1160 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1161 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1162 };
1163 
1164 /*==========================================================================
1165 Arena management.
1166 
1167 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1168 which may not be currently used (== they're arena_objects that aren't
1169 currently associated with an allocated arena).  Note that arenas proper are
1170 separately malloc'ed.
1171 
1172 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1173 we do try to free() arenas, and use some mild heuristic strategies to increase
1174 the likelihood that arenas eventually can be freed.
1175 
1176 unused_arena_objects
1177 
1178     This is a singly-linked list of the arena_objects that are currently not
1179     being used (no arena is associated with them).  Objects are taken off the
1180     head of the list in new_arena(), and are pushed on the head of the list in
1181     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1182     is on this list if and only if its .address member is 0.
1183 
1184 usable_arenas
1185 
1186     This is a doubly-linked list of the arena_objects associated with arenas
1187     that have pools available.  These pools are either waiting to be reused,
1188     or have not been used before.  The list is sorted to have the most-
1189     allocated arenas first (ascending order based on the nfreepools member).
1190     This means that the next allocation will come from a heavily used arena,
1191     which gives the nearly empty arenas a chance to be returned to the system.
1192     In my unscientific tests this dramatically improved the number of arenas
1193     that could be freed.
1194 
1195 Note that an arena_object associated with an arena all of whose pools are
1196 currently in use isn't on either list.
1197 
1198 Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1199 used to be done by one-at-a-time linear search when an arena's number of
1200 free pools changed.  That could, overall, consume time quadratic in the
1201 number of arenas.  That didn't really matter when there were only a few
1202 hundred arenas (typical!), but could be a timing disaster when there were
1203 hundreds of thousands.  See bpo-37029.
1204 
1205 Now we have a vector of "search fingers" to eliminate the need to search:
1206 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1207 with nfp free pools.  This is NULL if and only if there is no arena with
1208 nfp free pools in usable_arenas.
1209 */
1210 
1211 /* Array of objects used to track chunks of memory (arenas). */
1212 static struct arena_object* arenas = NULL;
1213 /* Number of slots currently allocated in the `arenas` vector. */
1214 static uint maxarenas = 0;
1215 
1216 /* The head of the singly-linked, NULL-terminated list of available
1217  * arena_objects.
1218  */
1219 static struct arena_object* unused_arena_objects = NULL;
1220 
1221 /* The head of the doubly-linked, NULL-terminated at each end, list of
1222  * arena_objects associated with arenas that have pools available.
1223  */
1224 static struct arena_object* usable_arenas = NULL;
1225 
1226 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1227 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1228 
1229 /* How many arena_objects do we initially allocate?
1230  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1231  * `arenas` vector.
1232  */
1233 #define INITIAL_ARENA_OBJECTS 16
1234 
1235 /* Number of arenas allocated that haven't been free()'d. */
1236 static size_t narenas_currently_allocated = 0;
1237 
1238 /* Total number of times malloc() called to allocate an arena. */
1239 static size_t ntimes_arena_allocated = 0;
1240 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1241 static size_t narenas_highwater = 0;
1242 
1243 static Py_ssize_t raw_allocated_blocks;
1244 
1245 Py_ssize_t
_Py_GetAllocatedBlocks(void)1246 _Py_GetAllocatedBlocks(void)
1247 {
1248     Py_ssize_t n = raw_allocated_blocks;
1249     /* add up allocated blocks for used pools */
1250     for (uint i = 0; i < maxarenas; ++i) {
1251         /* Skip arenas which are not allocated. */
1252         if (arenas[i].address == 0) {
1253             continue;
1254         }
1255 
1256         uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1257 
1258         /* visit every pool in the arena */
1259         assert(base <= (uintptr_t) arenas[i].pool_address);
1260         for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1261             poolp p = (poolp)base;
1262             n += p->ref.count;
1263         }
1264     }
1265     return n;
1266 }
1267 
1268 #if WITH_PYMALLOC_RADIX_TREE
1269 /*==========================================================================*/
1270 /* radix tree for tracking arena usage
1271 
1272    bit allocation for keys
1273 
1274    64-bit pointers and 2^20 arena size:
1275      16 -> ignored (POINTER_BITS - ADDRESS_BITS)
1276      10 -> MAP_TOP
1277      10 -> MAP_MID
1278       8 -> MAP_BOT
1279      20 -> ideal aligned arena
1280    ----
1281      64
1282 
1283    32-bit pointers and 2^18 arena size:
1284      14 -> MAP_BOT
1285      18 -> ideal aligned arena
1286    ----
1287      32
1288 
1289 */
1290 
1291 #if SIZEOF_VOID_P == 8
1292 
1293 /* number of bits in a pointer */
1294 #define POINTER_BITS 64
1295 
1296 /* Current 64-bit processors are limited to 48-bit physical addresses.  For
1297  * now, the top 17 bits of addresses will all be equal to bit 2**47.  If that
1298  * changes in the future, this must be adjusted upwards.
1299  */
1300 #define ADDRESS_BITS 48
1301 
1302 /* use the top and mid layers of the radix tree */
1303 #define USE_INTERIOR_NODES
1304 
1305 #elif SIZEOF_VOID_P == 4
1306 
1307 #define POINTER_BITS 32
1308 #define ADDRESS_BITS 32
1309 
1310 #else
1311 
1312  /* Currently this code works for 64-bit or 32-bit pointers only.  */
1313 #error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1314 
1315 #endif /* SIZEOF_VOID_P */
1316 
1317 /* arena_coverage_t members require this to be true  */
1318 #if ARENA_BITS >= 32
1319 #   error "arena size must be < 2^32"
1320 #endif
1321 
1322 #ifdef USE_INTERIOR_NODES
1323 /* number of bits used for MAP_TOP and MAP_MID nodes */
1324 #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1325 #else
1326 #define INTERIOR_BITS 0
1327 #endif
1328 
1329 #define MAP_TOP_BITS INTERIOR_BITS
1330 #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1331 #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1332 
1333 #define MAP_MID_BITS INTERIOR_BITS
1334 #define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1335 #define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1336 
1337 #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1338 #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1339 #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1340 
1341 #define MAP_BOT_SHIFT ARENA_BITS
1342 #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1343 #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1344 
1345 #define AS_UINT(p) ((uintptr_t)(p))
1346 #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1347 #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1348 #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1349 
1350 #if ADDRESS_BITS > POINTER_BITS
1351 /* Return non-physical address bits of a pointer.  Those bits should be same
1352  * for all valid pointers if ADDRESS_BITS set correctly.  Linux has support for
1353  * 57-bit address space (Intel 5-level paging) but will not currently give
1354  * those addresses to user space.
1355  */
1356 #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1357 #else
1358 #define HIGH_BITS(p) 0
1359 #endif
1360 
1361 
1362 /* This is the leaf of the radix tree.  See arena_map_mark_used() for the
1363  * meaning of these members. */
1364 typedef struct {
1365     int32_t tail_hi;
1366     int32_t tail_lo;
1367 } arena_coverage_t;
1368 
1369 typedef struct arena_map_bot {
1370     /* The members tail_hi and tail_lo are accessed together.  So, it
1371      * better to have them as an array of structs, rather than two
1372      * arrays.
1373      */
1374     arena_coverage_t arenas[MAP_BOT_LENGTH];
1375 } arena_map_bot_t;
1376 
1377 #ifdef USE_INTERIOR_NODES
1378 typedef struct arena_map_mid {
1379     struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1380 } arena_map_mid_t;
1381 
1382 typedef struct arena_map_top {
1383     struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1384 } arena_map_top_t;
1385 #endif
1386 
1387 /* The root of radix tree.  Note that by initializing like this, the memory
1388  * should be in the BSS.  The OS will only memory map pages as the MAP_MID
1389  * nodes get used (OS pages are demand loaded as needed).
1390  */
1391 #ifdef USE_INTERIOR_NODES
1392 static arena_map_top_t arena_map_root;
1393 /* accounting for number of used interior nodes */
1394 static int arena_map_mid_count;
1395 static int arena_map_bot_count;
1396 #else
1397 static arena_map_bot_t arena_map_root;
1398 #endif
1399 
1400 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1401  * it cannot be created */
1402 static arena_map_bot_t *
arena_map_get(block * p,int create)1403 arena_map_get(block *p, int create)
1404 {
1405 #ifdef USE_INTERIOR_NODES
1406     /* sanity check that ADDRESS_BITS is correct */
1407     assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1408     int i1 = MAP_TOP_INDEX(p);
1409     if (arena_map_root.ptrs[i1] == NULL) {
1410         if (!create) {
1411             return NULL;
1412         }
1413         arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1414         if (n == NULL) {
1415             return NULL;
1416         }
1417         arena_map_root.ptrs[i1] = n;
1418         arena_map_mid_count++;
1419     }
1420     int i2 = MAP_MID_INDEX(p);
1421     if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1422         if (!create) {
1423             return NULL;
1424         }
1425         arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1426         if (n == NULL) {
1427             return NULL;
1428         }
1429         arena_map_root.ptrs[i1]->ptrs[i2] = n;
1430         arena_map_bot_count++;
1431     }
1432     return arena_map_root.ptrs[i1]->ptrs[i2];
1433 #else
1434     return &arena_map_root;
1435 #endif
1436 }
1437 
1438 
1439 /* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1440  * away 24 bits of the address.  That reduces the space requirement of
1441  * the tree compared to similar radix tree page-map schemes.  In
1442  * exchange for slashing the space requirement, it needs more
1443  * computation to check an address.
1444  *
1445  * Tracking coverage is done by "ideal" arena address.  It is easier to
1446  * explain in decimal so let's say that the arena size is 100 bytes.
1447  * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1448  * pointer address is inside an actual arena, we have to check two ideal
1449  * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1450  * 300.  In the rare case that an arena is aligned in the ideal way
1451  * (e.g. base address of arena is 200) then we only have to check one
1452  * ideal address.
1453  *
1454  * The tree nodes for 200 and 300 both store the address of arena.
1455  * There are two cases: the arena starts at a lower ideal arena and
1456  * extends to this one, or the arena starts in this arena and extends to
1457  * the next ideal arena.  The tail_lo and tail_hi members correspond to
1458  * these two cases.
1459  */
1460 
1461 
1462 /* mark or unmark addresses covered by arena */
1463 static int
arena_map_mark_used(uintptr_t arena_base,int is_used)1464 arena_map_mark_used(uintptr_t arena_base, int is_used)
1465 {
1466     /* sanity check that ADDRESS_BITS is correct */
1467     assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1468     arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1469     if (n_hi == NULL) {
1470         assert(is_used); /* otherwise node should already exist */
1471         return 0; /* failed to allocate space for node */
1472     }
1473     int i3 = MAP_BOT_INDEX((block *)arena_base);
1474     int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1475     if (tail == 0) {
1476         /* is ideal arena address */
1477         n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1478     }
1479     else {
1480         /* arena_base address is not ideal (aligned to arena size) and
1481          * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1482          * for the next arena.  Note that it might be in different MAP_TOP
1483          * and MAP_MID nodes as well so we need to call arena_map_get()
1484          * again (do the full tree traversal).
1485          */
1486         n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1487         uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1488         /* If arena_base is a legit arena address, so is arena_base_next - 1
1489          * (last address in arena).  If arena_base_next overflows then it
1490          * must overflow to 0.  However, that would mean arena_base was
1491          * "ideal" and we should not be in this case. */
1492         assert(arena_base < arena_base_next);
1493         arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1494         if (n_lo == NULL) {
1495             assert(is_used); /* otherwise should already exist */
1496             n_hi->arenas[i3].tail_hi = 0;
1497             return 0; /* failed to allocate space for node */
1498         }
1499         int i3_next = MAP_BOT_INDEX(arena_base_next);
1500         n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1501     }
1502     return 1;
1503 }
1504 
1505 /* Return true if 'p' is a pointer inside an obmalloc arena.
1506  * _PyObject_Free() calls this so it needs to be very fast. */
1507 static int
arena_map_is_used(block * p)1508 arena_map_is_used(block *p)
1509 {
1510     arena_map_bot_t *n = arena_map_get(p, 0);
1511     if (n == NULL) {
1512         return 0;
1513     }
1514     int i3 = MAP_BOT_INDEX(p);
1515     /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1516     int32_t hi = n->arenas[i3].tail_hi;
1517     int32_t lo = n->arenas[i3].tail_lo;
1518     int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1519     return (tail < lo) || (tail >= hi && hi != 0);
1520 }
1521 
1522 /* end of radix tree logic */
1523 /*==========================================================================*/
1524 #endif /* WITH_PYMALLOC_RADIX_TREE */
1525 
1526 
1527 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1528  * allocate a new arena, and return the address of an arena_object
1529  * describing the new arena.  It's expected that the caller will set
1530  * `usable_arenas` to the return value.
1531  */
1532 static struct arena_object*
new_arena(void)1533 new_arena(void)
1534 {
1535     struct arena_object* arenaobj;
1536     uint excess;        /* number of bytes above pool alignment */
1537     void *address;
1538     static int debug_stats = -1;
1539 
1540     if (debug_stats == -1) {
1541         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1542         debug_stats = (opt != NULL && *opt != '\0');
1543     }
1544     if (debug_stats)
1545         _PyObject_DebugMallocStats(stderr);
1546 
1547     if (unused_arena_objects == NULL) {
1548         uint i;
1549         uint numarenas;
1550         size_t nbytes;
1551 
1552         /* Double the number of arena objects on each allocation.
1553          * Note that it's possible for `numarenas` to overflow.
1554          */
1555         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1556         if (numarenas <= maxarenas)
1557             return NULL;                /* overflow */
1558 #if SIZEOF_SIZE_T <= SIZEOF_INT
1559         if (numarenas > SIZE_MAX / sizeof(*arenas))
1560             return NULL;                /* overflow */
1561 #endif
1562         nbytes = numarenas * sizeof(*arenas);
1563         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1564         if (arenaobj == NULL)
1565             return NULL;
1566         arenas = arenaobj;
1567 
1568         /* We might need to fix pointers that were copied.  However,
1569          * new_arena only gets called when all the pages in the
1570          * previous arenas are full.  Thus, there are *no* pointers
1571          * into the old array. Thus, we don't have to worry about
1572          * invalid pointers.  Just to be sure, some asserts:
1573          */
1574         assert(usable_arenas == NULL);
1575         assert(unused_arena_objects == NULL);
1576 
1577         /* Put the new arenas on the unused_arena_objects list. */
1578         for (i = maxarenas; i < numarenas; ++i) {
1579             arenas[i].address = 0;              /* mark as unassociated */
1580             arenas[i].nextarena = i < numarenas - 1 ?
1581                                    &arenas[i+1] : NULL;
1582         }
1583 
1584         /* Update globals. */
1585         unused_arena_objects = &arenas[maxarenas];
1586         maxarenas = numarenas;
1587     }
1588 
1589     /* Take the next available arena object off the head of the list. */
1590     assert(unused_arena_objects != NULL);
1591     arenaobj = unused_arena_objects;
1592     unused_arena_objects = arenaobj->nextarena;
1593     assert(arenaobj->address == 0);
1594     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1595 #if WITH_PYMALLOC_RADIX_TREE
1596     if (address != NULL) {
1597         if (!arena_map_mark_used((uintptr_t)address, 1)) {
1598             /* marking arena in radix tree failed, abort */
1599             _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1600             address = NULL;
1601         }
1602     }
1603 #endif
1604     if (address == NULL) {
1605         /* The allocation failed: return NULL after putting the
1606          * arenaobj back.
1607          */
1608         arenaobj->nextarena = unused_arena_objects;
1609         unused_arena_objects = arenaobj;
1610         return NULL;
1611     }
1612     arenaobj->address = (uintptr_t)address;
1613 
1614     ++narenas_currently_allocated;
1615     ++ntimes_arena_allocated;
1616     if (narenas_currently_allocated > narenas_highwater)
1617         narenas_highwater = narenas_currently_allocated;
1618     arenaobj->freepools = NULL;
1619     /* pool_address <- first pool-aligned address in the arena
1620        nfreepools <- number of whole pools that fit after alignment */
1621     arenaobj->pool_address = (block*)arenaobj->address;
1622     arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1623     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1624     if (excess != 0) {
1625         --arenaobj->nfreepools;
1626         arenaobj->pool_address += POOL_SIZE - excess;
1627     }
1628     arenaobj->ntotalpools = arenaobj->nfreepools;
1629 
1630     return arenaobj;
1631 }
1632 
1633 
1634 
1635 #if WITH_PYMALLOC_RADIX_TREE
1636 /* Return true if and only if P is an address that was allocated by
1637    pymalloc.  When the radix tree is used, 'poolp' is unused.
1638  */
1639 static bool
address_in_range(void * p,poolp pool)1640 address_in_range(void *p, poolp pool)
1641 {
1642     return arena_map_is_used(p);
1643 }
1644 #else
1645 /*
1646 address_in_range(P, POOL)
1647 
1648 Return true if and only if P is an address that was allocated by pymalloc.
1649 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1650 (the caller is asked to compute this because the macro expands POOL more than
1651 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1652 variable and pass the latter to the macro; because address_in_range is
1653 called on every alloc/realloc/free, micro-efficiency is important here).
1654 
1655 Tricky:  Let B be the arena base address associated with the pool, B =
1656 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1657 
1658     B <= P < B + ARENA_SIZE
1659 
1660 Subtracting B throughout, this is true iff
1661 
1662     0 <= P-B < ARENA_SIZE
1663 
1664 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1665 
1666 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1667 before the first arena has been allocated.  `arenas` is still NULL in that
1668 case.  We're relying on that maxarenas is also 0 in that case, so that
1669 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1670 into a NULL arenas.
1671 
1672 Details:  given P and POOL, the arena_object corresponding to P is AO =
1673 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1674 stores, etc), POOL is the correct address of P's pool, AO.address is the
1675 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1676 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1677 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1678 controls P.
1679 
1680 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1681 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1682 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1683 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1684 control P.
1685 
1686 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1687 allocated arena, obmalloc controls all the memory in slice AO.address :
1688 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1689 so P doesn't lie in that slice, so the macro correctly reports that P is not
1690 controlled by obmalloc.
1691 
1692 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1693 arena_object (one not currently associated with an allocated arena),
1694 AO.address is 0, and the second test in the macro reduces to:
1695 
1696     P < ARENA_SIZE
1697 
1698 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1699 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1700 of the test still passes, and the third clause (AO.address != 0) is necessary
1701 to get the correct result:  AO.address is 0 in this case, so the macro
1702 correctly reports that P is not controlled by obmalloc (despite that P lies in
1703 slice AO.address : AO.address + ARENA_SIZE).
1704 
1705 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1706 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1707 corresponded to a currently-allocated arena, so the "P is not controlled by
1708 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1709 was impossible.
1710 
1711 Note that the logic is excruciating, and reading up possibly uninitialized
1712 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1713 creates problems for some memory debuggers.  The overwhelming advantage is
1714 that this test determines whether an arbitrary address is controlled by
1715 obmalloc in a small constant time, independent of the number of arenas
1716 obmalloc controls.  Since this test is needed at every entry point, it's
1717 extremely desirable that it be this fast.
1718 */
1719 
1720 static bool _Py_NO_SANITIZE_ADDRESS
1721             _Py_NO_SANITIZE_THREAD
1722             _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1723 address_in_range(void *p, poolp pool)
1724 {
1725     // Since address_in_range may be reading from memory which was not allocated
1726     // by Python, it is important that pool->arenaindex is read only once, as
1727     // another thread may be concurrently modifying the value without holding
1728     // the GIL. The following dance forces the compiler to read pool->arenaindex
1729     // only once.
1730     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1731     return arenaindex < maxarenas &&
1732         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1733         arenas[arenaindex].address != 0;
1734 }
1735 
1736 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1737 
1738 /*==========================================================================*/
1739 
1740 // Called when freelist is exhausted.  Extend the freelist if there is
1741 // space for a block.  Otherwise, remove this pool from usedpools.
1742 static void
pymalloc_pool_extend(poolp pool,uint size)1743 pymalloc_pool_extend(poolp pool, uint size)
1744 {
1745     if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1746         /* There is room for another block. */
1747         pool->freeblock = (block*)pool + pool->nextoffset;
1748         pool->nextoffset += INDEX2SIZE(size);
1749         *(block **)(pool->freeblock) = NULL;
1750         return;
1751     }
1752 
1753     /* Pool is full, unlink from used pools. */
1754     poolp next;
1755     next = pool->nextpool;
1756     pool = pool->prevpool;
1757     next->prevpool = pool;
1758     pool->nextpool = next;
1759 }
1760 
1761 /* called when pymalloc_alloc can not allocate a block from usedpool.
1762  * This function takes new pool and allocate a block from it.
1763  */
1764 static void*
allocate_from_new_pool(uint size)1765 allocate_from_new_pool(uint size)
1766 {
1767     /* There isn't a pool of the right size class immediately
1768      * available:  use a free pool.
1769      */
1770     if (UNLIKELY(usable_arenas == NULL)) {
1771         /* No arena has a free pool:  allocate a new arena. */
1772 #ifdef WITH_MEMORY_LIMITS
1773         if (narenas_currently_allocated >= MAX_ARENAS) {
1774             return NULL;
1775         }
1776 #endif
1777         usable_arenas = new_arena();
1778         if (usable_arenas == NULL) {
1779             return NULL;
1780         }
1781         usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1782         assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1783         nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1784     }
1785     assert(usable_arenas->address != 0);
1786 
1787     /* This arena already had the smallest nfreepools value, so decreasing
1788      * nfreepools doesn't change that, and we don't need to rearrange the
1789      * usable_arenas list.  However, if the arena becomes wholly allocated,
1790      * we need to remove its arena_object from usable_arenas.
1791      */
1792     assert(usable_arenas->nfreepools > 0);
1793     if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1794         /* It's the last of this size, so there won't be any. */
1795         nfp2lasta[usable_arenas->nfreepools] = NULL;
1796     }
1797     /* If any free pools will remain, it will be the new smallest. */
1798     if (usable_arenas->nfreepools > 1) {
1799         assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1800         nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1801     }
1802 
1803     /* Try to get a cached free pool. */
1804     poolp pool = usable_arenas->freepools;
1805     if (LIKELY(pool != NULL)) {
1806         /* Unlink from cached pools. */
1807         usable_arenas->freepools = pool->nextpool;
1808         usable_arenas->nfreepools--;
1809         if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1810             /* Wholly allocated:  remove. */
1811             assert(usable_arenas->freepools == NULL);
1812             assert(usable_arenas->nextarena == NULL ||
1813                    usable_arenas->nextarena->prevarena ==
1814                    usable_arenas);
1815             usable_arenas = usable_arenas->nextarena;
1816             if (usable_arenas != NULL) {
1817                 usable_arenas->prevarena = NULL;
1818                 assert(usable_arenas->address != 0);
1819             }
1820         }
1821         else {
1822             /* nfreepools > 0:  it must be that freepools
1823              * isn't NULL, or that we haven't yet carved
1824              * off all the arena's pools for the first
1825              * time.
1826              */
1827             assert(usable_arenas->freepools != NULL ||
1828                    usable_arenas->pool_address <=
1829                    (block*)usable_arenas->address +
1830                        ARENA_SIZE - POOL_SIZE);
1831         }
1832     }
1833     else {
1834         /* Carve off a new pool. */
1835         assert(usable_arenas->nfreepools > 0);
1836         assert(usable_arenas->freepools == NULL);
1837         pool = (poolp)usable_arenas->pool_address;
1838         assert((block*)pool <= (block*)usable_arenas->address +
1839                                  ARENA_SIZE - POOL_SIZE);
1840         pool->arenaindex = (uint)(usable_arenas - arenas);
1841         assert(&arenas[pool->arenaindex] == usable_arenas);
1842         pool->szidx = DUMMY_SIZE_IDX;
1843         usable_arenas->pool_address += POOL_SIZE;
1844         --usable_arenas->nfreepools;
1845 
1846         if (usable_arenas->nfreepools == 0) {
1847             assert(usable_arenas->nextarena == NULL ||
1848                    usable_arenas->nextarena->prevarena ==
1849                    usable_arenas);
1850             /* Unlink the arena:  it is completely allocated. */
1851             usable_arenas = usable_arenas->nextarena;
1852             if (usable_arenas != NULL) {
1853                 usable_arenas->prevarena = NULL;
1854                 assert(usable_arenas->address != 0);
1855             }
1856         }
1857     }
1858 
1859     /* Frontlink to used pools. */
1860     block *bp;
1861     poolp next = usedpools[size + size]; /* == prev */
1862     pool->nextpool = next;
1863     pool->prevpool = next;
1864     next->nextpool = pool;
1865     next->prevpool = pool;
1866     pool->ref.count = 1;
1867     if (pool->szidx == size) {
1868         /* Luckily, this pool last contained blocks
1869          * of the same size class, so its header
1870          * and free list are already initialized.
1871          */
1872         bp = pool->freeblock;
1873         assert(bp != NULL);
1874         pool->freeblock = *(block **)bp;
1875         return bp;
1876     }
1877     /*
1878      * Initialize the pool header, set up the free list to
1879      * contain just the second block, and return the first
1880      * block.
1881      */
1882     pool->szidx = size;
1883     size = INDEX2SIZE(size);
1884     bp = (block *)pool + POOL_OVERHEAD;
1885     pool->nextoffset = POOL_OVERHEAD + (size << 1);
1886     pool->maxnextoffset = POOL_SIZE - size;
1887     pool->freeblock = bp + size;
1888     *(block **)(pool->freeblock) = NULL;
1889     return bp;
1890 }
1891 
1892 /* pymalloc allocator
1893 
1894    Return a pointer to newly allocated memory if pymalloc allocated memory.
1895 
1896    Return NULL if pymalloc failed to allocate the memory block: on bigger
1897    requests, on error in the code below (as a last chance to serve the request)
1898    or when the max memory limit has been reached.
1899 */
1900 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1901 pymalloc_alloc(void *ctx, size_t nbytes)
1902 {
1903 #ifdef WITH_VALGRIND
1904     if (UNLIKELY(running_on_valgrind == -1)) {
1905         running_on_valgrind = RUNNING_ON_VALGRIND;
1906     }
1907     if (UNLIKELY(running_on_valgrind)) {
1908         return NULL;
1909     }
1910 #endif
1911 
1912     if (UNLIKELY(nbytes == 0)) {
1913         return NULL;
1914     }
1915     if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1916         return NULL;
1917     }
1918 
1919     uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1920     poolp pool = usedpools[size + size];
1921     block *bp;
1922 
1923     if (LIKELY(pool != pool->nextpool)) {
1924         /*
1925          * There is a used pool for this size class.
1926          * Pick up the head block of its free list.
1927          */
1928         ++pool->ref.count;
1929         bp = pool->freeblock;
1930         assert(bp != NULL);
1931 
1932         if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1933             // Reached the end of the free list, try to extend it.
1934             pymalloc_pool_extend(pool, size);
1935         }
1936     }
1937     else {
1938         /* There isn't a pool of the right size class immediately
1939          * available:  use a free pool.
1940          */
1941         bp = allocate_from_new_pool(size);
1942     }
1943 
1944     return (void *)bp;
1945 }
1946 
1947 
1948 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1949 _PyObject_Malloc(void *ctx, size_t nbytes)
1950 {
1951     void* ptr = pymalloc_alloc(ctx, nbytes);
1952     if (LIKELY(ptr != NULL)) {
1953         return ptr;
1954     }
1955 
1956     ptr = PyMem_RawMalloc(nbytes);
1957     if (ptr != NULL) {
1958         raw_allocated_blocks++;
1959     }
1960     return ptr;
1961 }
1962 
1963 
1964 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1965 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1966 {
1967     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1968     size_t nbytes = nelem * elsize;
1969 
1970     void* ptr = pymalloc_alloc(ctx, nbytes);
1971     if (LIKELY(ptr != NULL)) {
1972         memset(ptr, 0, nbytes);
1973         return ptr;
1974     }
1975 
1976     ptr = PyMem_RawCalloc(nelem, elsize);
1977     if (ptr != NULL) {
1978         raw_allocated_blocks++;
1979     }
1980     return ptr;
1981 }
1982 
1983 
1984 static void
insert_to_usedpool(poolp pool)1985 insert_to_usedpool(poolp pool)
1986 {
1987     assert(pool->ref.count > 0);            /* else the pool is empty */
1988 
1989     uint size = pool->szidx;
1990     poolp next = usedpools[size + size];
1991     poolp prev = next->prevpool;
1992 
1993     /* insert pool before next:   prev <-> pool <-> next */
1994     pool->nextpool = next;
1995     pool->prevpool = prev;
1996     next->prevpool = pool;
1997     prev->nextpool = pool;
1998 }
1999 
2000 static void
insert_to_freepool(poolp pool)2001 insert_to_freepool(poolp pool)
2002 {
2003     poolp next = pool->nextpool;
2004     poolp prev = pool->prevpool;
2005     next->prevpool = prev;
2006     prev->nextpool = next;
2007 
2008     /* Link the pool to freepools.  This is a singly-linked
2009      * list, and pool->prevpool isn't used there.
2010      */
2011     struct arena_object *ao = &arenas[pool->arenaindex];
2012     pool->nextpool = ao->freepools;
2013     ao->freepools = pool;
2014     uint nf = ao->nfreepools;
2015     /* If this is the rightmost arena with this number of free pools,
2016      * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2017      * are no arenas in usable_arenas with that value.
2018      */
2019     struct arena_object* lastnf = nfp2lasta[nf];
2020     assert((nf == 0 && lastnf == NULL) ||
2021            (nf > 0 &&
2022             lastnf != NULL &&
2023             lastnf->nfreepools == nf &&
2024             (lastnf->nextarena == NULL ||
2025              nf < lastnf->nextarena->nfreepools)));
2026     if (lastnf == ao) {  /* it is the rightmost */
2027         struct arena_object* p = ao->prevarena;
2028         nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2029     }
2030     ao->nfreepools = ++nf;
2031 
2032     /* All the rest is arena management.  We just freed
2033      * a pool, and there are 4 cases for arena mgmt:
2034      * 1. If all the pools are free, return the arena to
2035      *    the system free().  Except if this is the last
2036      *    arena in the list, keep it to avoid thrashing:
2037      *    keeping one wholly free arena in the list avoids
2038      *    pathological cases where a simple loop would
2039      *    otherwise provoke needing to allocate and free an
2040      *    arena on every iteration.  See bpo-37257.
2041      * 2. If this is the only free pool in the arena,
2042      *    add the arena back to the `usable_arenas` list.
2043      * 3. If the "next" arena has a smaller count of free
2044      *    pools, we have to "slide this arena right" to
2045      *    restore that usable_arenas is sorted in order of
2046      *    nfreepools.
2047      * 4. Else there's nothing more to do.
2048      */
2049     if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2050         /* Case 1.  First unlink ao from usable_arenas.
2051          */
2052         assert(ao->prevarena == NULL ||
2053                ao->prevarena->address != 0);
2054         assert(ao ->nextarena == NULL ||
2055                ao->nextarena->address != 0);
2056 
2057         /* Fix the pointer in the prevarena, or the
2058          * usable_arenas pointer.
2059          */
2060         if (ao->prevarena == NULL) {
2061             usable_arenas = ao->nextarena;
2062             assert(usable_arenas == NULL ||
2063                    usable_arenas->address != 0);
2064         }
2065         else {
2066             assert(ao->prevarena->nextarena == ao);
2067             ao->prevarena->nextarena =
2068                 ao->nextarena;
2069         }
2070         /* Fix the pointer in the nextarena. */
2071         if (ao->nextarena != NULL) {
2072             assert(ao->nextarena->prevarena == ao);
2073             ao->nextarena->prevarena =
2074                 ao->prevarena;
2075         }
2076         /* Record that this arena_object slot is
2077          * available to be reused.
2078          */
2079         ao->nextarena = unused_arena_objects;
2080         unused_arena_objects = ao;
2081 
2082 #if WITH_PYMALLOC_RADIX_TREE
2083         /* mark arena region as not under control of obmalloc */
2084         arena_map_mark_used(ao->address, 0);
2085 #endif
2086 
2087         /* Free the entire arena. */
2088         _PyObject_Arena.free(_PyObject_Arena.ctx,
2089                              (void *)ao->address, ARENA_SIZE);
2090         ao->address = 0;                        /* mark unassociated */
2091         --narenas_currently_allocated;
2092 
2093         return;
2094     }
2095 
2096     if (nf == 1) {
2097         /* Case 2.  Put ao at the head of
2098          * usable_arenas.  Note that because
2099          * ao->nfreepools was 0 before, ao isn't
2100          * currently on the usable_arenas list.
2101          */
2102         ao->nextarena = usable_arenas;
2103         ao->prevarena = NULL;
2104         if (usable_arenas)
2105             usable_arenas->prevarena = ao;
2106         usable_arenas = ao;
2107         assert(usable_arenas->address != 0);
2108         if (nfp2lasta[1] == NULL) {
2109             nfp2lasta[1] = ao;
2110         }
2111 
2112         return;
2113     }
2114 
2115     /* If this arena is now out of order, we need to keep
2116      * the list sorted.  The list is kept sorted so that
2117      * the "most full" arenas are used first, which allows
2118      * the nearly empty arenas to be completely freed.  In
2119      * a few un-scientific tests, it seems like this
2120      * approach allowed a lot more memory to be freed.
2121      */
2122     /* If this is the only arena with nf, record that. */
2123     if (nfp2lasta[nf] == NULL) {
2124         nfp2lasta[nf] = ao;
2125     } /* else the rightmost with nf doesn't change */
2126     /* If this was the rightmost of the old size, it remains in place. */
2127     if (ao == lastnf) {
2128         /* Case 4.  Nothing to do. */
2129         return;
2130     }
2131     /* If ao were the only arena in the list, the last block would have
2132      * gotten us out.
2133      */
2134     assert(ao->nextarena != NULL);
2135 
2136     /* Case 3:  We have to move the arena towards the end of the list,
2137      * because it has more free pools than the arena to its right.  It needs
2138      * to move to follow lastnf.
2139      * First unlink ao from usable_arenas.
2140      */
2141     if (ao->prevarena != NULL) {
2142         /* ao isn't at the head of the list */
2143         assert(ao->prevarena->nextarena == ao);
2144         ao->prevarena->nextarena = ao->nextarena;
2145     }
2146     else {
2147         /* ao is at the head of the list */
2148         assert(usable_arenas == ao);
2149         usable_arenas = ao->nextarena;
2150     }
2151     ao->nextarena->prevarena = ao->prevarena;
2152     /* And insert after lastnf. */
2153     ao->prevarena = lastnf;
2154     ao->nextarena = lastnf->nextarena;
2155     if (ao->nextarena != NULL) {
2156         ao->nextarena->prevarena = ao;
2157     }
2158     lastnf->nextarena = ao;
2159     /* Verify that the swaps worked. */
2160     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2161     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2162     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2163     assert((usable_arenas == ao && ao->prevarena == NULL)
2164            || ao->prevarena->nextarena == ao);
2165 }
2166 
2167 /* Free a memory block allocated by pymalloc_alloc().
2168    Return 1 if it was freed.
2169    Return 0 if the block was not allocated by pymalloc_alloc(). */
2170 static inline int
pymalloc_free(void * ctx,void * p)2171 pymalloc_free(void *ctx, void *p)
2172 {
2173     assert(p != NULL);
2174 
2175 #ifdef WITH_VALGRIND
2176     if (UNLIKELY(running_on_valgrind > 0)) {
2177         return 0;
2178     }
2179 #endif
2180 
2181     poolp pool = POOL_ADDR(p);
2182     if (UNLIKELY(!address_in_range(p, pool))) {
2183         return 0;
2184     }
2185     /* We allocated this address. */
2186 
2187     /* Link p to the start of the pool's freeblock list.  Since
2188      * the pool had at least the p block outstanding, the pool
2189      * wasn't empty (so it's already in a usedpools[] list, or
2190      * was full and is in no list -- it's not in the freeblocks
2191      * list in any case).
2192      */
2193     assert(pool->ref.count > 0);            /* else it was empty */
2194     block *lastfree = pool->freeblock;
2195     *(block **)p = lastfree;
2196     pool->freeblock = (block *)p;
2197     pool->ref.count--;
2198 
2199     if (UNLIKELY(lastfree == NULL)) {
2200         /* Pool was full, so doesn't currently live in any list:
2201          * link it to the front of the appropriate usedpools[] list.
2202          * This mimics LRU pool usage for new allocations and
2203          * targets optimal filling when several pools contain
2204          * blocks of the same size class.
2205          */
2206         insert_to_usedpool(pool);
2207         return 1;
2208     }
2209 
2210     /* freeblock wasn't NULL, so the pool wasn't full,
2211      * and the pool is in a usedpools[] list.
2212      */
2213     if (LIKELY(pool->ref.count != 0)) {
2214         /* pool isn't empty:  leave it in usedpools */
2215         return 1;
2216     }
2217 
2218     /* Pool is now empty:  unlink from usedpools, and
2219      * link to the front of freepools.  This ensures that
2220      * previously freed pools will be allocated later
2221      * (being not referenced, they are perhaps paged out).
2222      */
2223     insert_to_freepool(pool);
2224     return 1;
2225 }
2226 
2227 
2228 static void
_PyObject_Free(void * ctx,void * p)2229 _PyObject_Free(void *ctx, void *p)
2230 {
2231     /* PyObject_Free(NULL) has no effect */
2232     if (p == NULL) {
2233         return;
2234     }
2235 
2236     if (UNLIKELY(!pymalloc_free(ctx, p))) {
2237         /* pymalloc didn't allocate this address */
2238         PyMem_RawFree(p);
2239         raw_allocated_blocks--;
2240     }
2241 }
2242 
2243 
2244 /* pymalloc realloc.
2245 
2246    If nbytes==0, then as the Python docs promise, we do not treat this like
2247    free(p), and return a non-NULL result.
2248 
2249    Return 1 if pymalloc reallocated memory and wrote the new pointer into
2250    newptr_p.
2251 
2252    Return 0 if pymalloc didn't allocated p. */
2253 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2254 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2255 {
2256     void *bp;
2257     poolp pool;
2258     size_t size;
2259 
2260     assert(p != NULL);
2261 
2262 #ifdef WITH_VALGRIND
2263     /* Treat running_on_valgrind == -1 the same as 0 */
2264     if (UNLIKELY(running_on_valgrind > 0)) {
2265         return 0;
2266     }
2267 #endif
2268 
2269     pool = POOL_ADDR(p);
2270     if (!address_in_range(p, pool)) {
2271         /* pymalloc is not managing this block.
2272 
2273            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2274            over this block.  However, if we do, we need to copy the valid data
2275            from the C-managed block to one of our blocks, and there's no
2276            portable way to know how much of the memory space starting at p is
2277            valid.
2278 
2279            As bug 1185883 pointed out the hard way, it's possible that the
2280            C-managed block is "at the end" of allocated VM space, so that a
2281            memory fault can occur if we try to copy nbytes bytes starting at p.
2282            Instead we punt: let C continue to manage this block. */
2283         return 0;
2284     }
2285 
2286     /* pymalloc is in charge of this block */
2287     size = INDEX2SIZE(pool->szidx);
2288     if (nbytes <= size) {
2289         /* The block is staying the same or shrinking.
2290 
2291            If it's shrinking, there's a tradeoff: it costs cycles to copy the
2292            block to a smaller size class, but it wastes memory not to copy it.
2293 
2294            The compromise here is to copy on shrink only if at least 25% of
2295            size can be shaved off. */
2296         if (4 * nbytes > 3 * size) {
2297             /* It's the same, or shrinking and new/old > 3/4. */
2298             *newptr_p = p;
2299             return 1;
2300         }
2301         size = nbytes;
2302     }
2303 
2304     bp = _PyObject_Malloc(ctx, nbytes);
2305     if (bp != NULL) {
2306         memcpy(bp, p, size);
2307         _PyObject_Free(ctx, p);
2308     }
2309     *newptr_p = bp;
2310     return 1;
2311 }
2312 
2313 
2314 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2315 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2316 {
2317     void *ptr2;
2318 
2319     if (ptr == NULL) {
2320         return _PyObject_Malloc(ctx, nbytes);
2321     }
2322 
2323     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2324         return ptr2;
2325     }
2326 
2327     return PyMem_RawRealloc(ptr, nbytes);
2328 }
2329 
2330 #else   /* ! WITH_PYMALLOC */
2331 
2332 /*==========================================================================*/
2333 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2334  * only be used by extensions that are compiled with pymalloc enabled. */
2335 
2336 Py_ssize_t
_Py_GetAllocatedBlocks(void)2337 _Py_GetAllocatedBlocks(void)
2338 {
2339     return 0;
2340 }
2341 
2342 #endif /* WITH_PYMALLOC */
2343 
2344 
2345 /*==========================================================================*/
2346 /* A x-platform debugging allocator.  This doesn't manage memory directly,
2347  * it wraps a real allocator, adding extra debugging info to the memory blocks.
2348  */
2349 
2350 /* Uncomment this define to add the "serialno" field */
2351 /* #define PYMEM_DEBUG_SERIALNO */
2352 
2353 #ifdef PYMEM_DEBUG_SERIALNO
2354 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2355 
2356 /* serialno is always incremented via calling this routine.  The point is
2357  * to supply a single place to set a breakpoint.
2358  */
2359 static void
bumpserialno(void)2360 bumpserialno(void)
2361 {
2362     ++serialno;
2363 }
2364 #endif
2365 
2366 #define SST SIZEOF_SIZE_T
2367 
2368 #ifdef PYMEM_DEBUG_SERIALNO
2369 #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2370 #else
2371 #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2372 #endif
2373 
2374 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2375 static size_t
read_size_t(const void * p)2376 read_size_t(const void *p)
2377 {
2378     const uint8_t *q = (const uint8_t *)p;
2379     size_t result = *q++;
2380     int i;
2381 
2382     for (i = SST; --i > 0; ++q)
2383         result = (result << 8) | *q;
2384     return result;
2385 }
2386 
2387 /* Write n as a big-endian size_t, MSB at address p, LSB at
2388  * p + sizeof(size_t) - 1.
2389  */
2390 static void
write_size_t(void * p,size_t n)2391 write_size_t(void *p, size_t n)
2392 {
2393     uint8_t *q = (uint8_t *)p + SST - 1;
2394     int i;
2395 
2396     for (i = SST; --i >= 0; --q) {
2397         *q = (uint8_t)(n & 0xff);
2398         n >>= 8;
2399     }
2400 }
2401 
2402 /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2403    fills them with useful stuff, here calling the underlying malloc's result p:
2404 
2405 p[0: S]
2406     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2407     to read in a memory dump).
2408 p[S]
2409     API ID.  See PEP 445.  This is a character, but seems undocumented.
2410 p[S+1: 2*S]
2411     Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2412 p[2*S: 2*S+n]
2413     The requested memory, filled with copies of PYMEM_CLEANBYTE.
2414     Used to catch reference to uninitialized memory.
2415     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2416     handled the request itself.
2417 p[2*S+n: 2*S+n+S]
2418     Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2419 p[2*S+n+S: 2*S+n+2*S]
2420     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2421     and _PyMem_DebugRealloc.
2422     This is a big-endian size_t.
2423     If "bad memory" is detected later, the serial number gives an
2424     excellent way to set a breakpoint on the next run, to capture the
2425     instant at which this block was passed out.
2426 
2427 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2428 for 3 * S extra bytes, and omits the last serialno field.
2429 */
2430 
2431 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2432 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2433 {
2434     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2435     uint8_t *p;           /* base address of malloc'ed pad block */
2436     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2437     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2438     size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2439 
2440     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2441         /* integer overflow: can't represent total as a Py_ssize_t */
2442         return NULL;
2443     }
2444     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2445 
2446     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2447                 ^--- p    ^--- data   ^--- tail
2448        S: nbytes stored as size_t
2449        I: API identifier (1 byte)
2450        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2451        C: Clean bytes used later to store actual data
2452        N: Serial number stored as size_t
2453 
2454        If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2455        is omitted. */
2456 
2457     if (use_calloc) {
2458         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2459     }
2460     else {
2461         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2462     }
2463     if (p == NULL) {
2464         return NULL;
2465     }
2466     data = p + 2*SST;
2467 
2468 #ifdef PYMEM_DEBUG_SERIALNO
2469     bumpserialno();
2470 #endif
2471 
2472     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2473     write_size_t(p, nbytes);
2474     p[SST] = (uint8_t)api->api_id;
2475     memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2476 
2477     if (nbytes > 0 && !use_calloc) {
2478         memset(data, PYMEM_CLEANBYTE, nbytes);
2479     }
2480 
2481     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2482     tail = data + nbytes;
2483     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2484 #ifdef PYMEM_DEBUG_SERIALNO
2485     write_size_t(tail + SST, serialno);
2486 #endif
2487 
2488     return data;
2489 }
2490 
2491 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2492 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2493 {
2494     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2495 }
2496 
2497 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2498 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2499 {
2500     size_t nbytes;
2501     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2502     nbytes = nelem * elsize;
2503     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2504 }
2505 
2506 
2507 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2508    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2509    Then fills the original bytes with PYMEM_DEADBYTE.
2510    Then calls the underlying free.
2511 */
2512 static void
_PyMem_DebugRawFree(void * ctx,void * p)2513 _PyMem_DebugRawFree(void *ctx, void *p)
2514 {
2515     /* PyMem_Free(NULL) has no effect */
2516     if (p == NULL) {
2517         return;
2518     }
2519 
2520     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2521     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2522     size_t nbytes;
2523 
2524     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2525     nbytes = read_size_t(q);
2526     nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2527     memset(q, PYMEM_DEADBYTE, nbytes);
2528     api->alloc.free(api->alloc.ctx, q);
2529 }
2530 
2531 
2532 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2533 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2534 {
2535     if (p == NULL) {
2536         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2537     }
2538 
2539     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2540     uint8_t *head;        /* base address of malloc'ed pad block */
2541     uint8_t *data;        /* pointer to data bytes */
2542     uint8_t *r;
2543     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2544     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2545     size_t original_nbytes;
2546 #define ERASED_SIZE 64
2547     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2548 
2549     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2550 
2551     data = (uint8_t *)p;
2552     head = data - 2*SST;
2553     original_nbytes = read_size_t(head);
2554     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2555         /* integer overflow: can't represent total as a Py_ssize_t */
2556         return NULL;
2557     }
2558     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2559 
2560     tail = data + original_nbytes;
2561 #ifdef PYMEM_DEBUG_SERIALNO
2562     size_t block_serialno = read_size_t(tail + SST);
2563 #endif
2564     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2565        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2566      */
2567     if (original_nbytes <= sizeof(save)) {
2568         memcpy(save, data, original_nbytes);
2569         memset(data - 2 * SST, PYMEM_DEADBYTE,
2570                original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2571     }
2572     else {
2573         memcpy(save, data, ERASED_SIZE);
2574         memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2575         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2576         memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2577                ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2578     }
2579 
2580     /* Resize and add decorations. */
2581     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2582     if (r == NULL) {
2583         /* if realloc() failed: rewrite header and footer which have
2584            just been erased */
2585         nbytes = original_nbytes;
2586     }
2587     else {
2588         head = r;
2589 #ifdef PYMEM_DEBUG_SERIALNO
2590         bumpserialno();
2591         block_serialno = serialno;
2592 #endif
2593     }
2594     data = head + 2*SST;
2595 
2596     write_size_t(head, nbytes);
2597     head[SST] = (uint8_t)api->api_id;
2598     memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2599 
2600     tail = data + nbytes;
2601     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2602 #ifdef PYMEM_DEBUG_SERIALNO
2603     write_size_t(tail + SST, block_serialno);
2604 #endif
2605 
2606     /* Restore saved bytes. */
2607     if (original_nbytes <= sizeof(save)) {
2608         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2609     }
2610     else {
2611         size_t i = original_nbytes - ERASED_SIZE;
2612         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2613         if (nbytes > i) {
2614             memcpy(data + i, &save[ERASED_SIZE],
2615                    Py_MIN(nbytes - i, ERASED_SIZE));
2616         }
2617     }
2618 
2619     if (r == NULL) {
2620         return NULL;
2621     }
2622 
2623     if (nbytes > original_nbytes) {
2624         /* growing: mark new extra memory clean */
2625         memset(data + original_nbytes, PYMEM_CLEANBYTE,
2626                nbytes - original_nbytes);
2627     }
2628 
2629     return data;
2630 }
2631 
2632 static inline void
_PyMem_DebugCheckGIL(const char * func)2633 _PyMem_DebugCheckGIL(const char *func)
2634 {
2635     if (!PyGILState_Check()) {
2636         _Py_FatalErrorFunc(func,
2637                            "Python memory allocator called "
2638                            "without holding the GIL");
2639     }
2640 }
2641 
2642 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2643 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2644 {
2645     _PyMem_DebugCheckGIL(__func__);
2646     return _PyMem_DebugRawMalloc(ctx, nbytes);
2647 }
2648 
2649 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2650 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2651 {
2652     _PyMem_DebugCheckGIL(__func__);
2653     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2654 }
2655 
2656 
2657 static void
_PyMem_DebugFree(void * ctx,void * ptr)2658 _PyMem_DebugFree(void *ctx, void *ptr)
2659 {
2660     _PyMem_DebugCheckGIL(__func__);
2661     _PyMem_DebugRawFree(ctx, ptr);
2662 }
2663 
2664 
2665 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2666 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2667 {
2668     _PyMem_DebugCheckGIL(__func__);
2669     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2670 }
2671 
2672 /* Check the forbidden bytes on both ends of the memory allocated for p.
2673  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2674  * and call Py_FatalError to kill the program.
2675  * The API id, is also checked.
2676  */
2677 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2678 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2679 {
2680     assert(p != NULL);
2681 
2682     const uint8_t *q = (const uint8_t *)p;
2683     size_t nbytes;
2684     const uint8_t *tail;
2685     int i;
2686     char id;
2687 
2688     /* Check the API id */
2689     id = (char)q[-SST];
2690     if (id != api) {
2691         _PyObject_DebugDumpAddress(p);
2692         _Py_FatalErrorFormat(func,
2693                              "bad ID: Allocated using API '%c', "
2694                              "verified using API '%c'",
2695                              id, api);
2696     }
2697 
2698     /* Check the stuff at the start of p first:  if there's underwrite
2699      * corruption, the number-of-bytes field may be nuts, and checking
2700      * the tail could lead to a segfault then.
2701      */
2702     for (i = SST-1; i >= 1; --i) {
2703         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2704             _PyObject_DebugDumpAddress(p);
2705             _Py_FatalErrorFunc(func, "bad leading pad byte");
2706         }
2707     }
2708 
2709     nbytes = read_size_t(q - 2*SST);
2710     tail = q + nbytes;
2711     for (i = 0; i < SST; ++i) {
2712         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2713             _PyObject_DebugDumpAddress(p);
2714             _Py_FatalErrorFunc(func, "bad trailing pad byte");
2715         }
2716     }
2717 }
2718 
2719 /* Display info to stderr about the memory block at p. */
2720 static void
_PyObject_DebugDumpAddress(const void * p)2721 _PyObject_DebugDumpAddress(const void *p)
2722 {
2723     const uint8_t *q = (const uint8_t *)p;
2724     const uint8_t *tail;
2725     size_t nbytes;
2726     int i;
2727     int ok;
2728     char id;
2729 
2730     fprintf(stderr, "Debug memory block at address p=%p:", p);
2731     if (p == NULL) {
2732         fprintf(stderr, "\n");
2733         return;
2734     }
2735     id = (char)q[-SST];
2736     fprintf(stderr, " API '%c'\n", id);
2737 
2738     nbytes = read_size_t(q - 2*SST);
2739     fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2740 
2741     /* In case this is nuts, check the leading pad bytes first. */
2742     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2743     ok = 1;
2744     for (i = 1; i <= SST-1; ++i) {
2745         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2746             ok = 0;
2747             break;
2748         }
2749     }
2750     if (ok)
2751         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2752     else {
2753         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2754             PYMEM_FORBIDDENBYTE);
2755         for (i = SST-1; i >= 1; --i) {
2756             const uint8_t byte = *(q-i);
2757             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2758             if (byte != PYMEM_FORBIDDENBYTE)
2759                 fputs(" *** OUCH", stderr);
2760             fputc('\n', stderr);
2761         }
2762 
2763         fputs("    Because memory is corrupted at the start, the "
2764               "count of bytes requested\n"
2765               "       may be bogus, and checking the trailing pad "
2766               "bytes may segfault.\n", stderr);
2767     }
2768 
2769     tail = q + nbytes;
2770     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2771     ok = 1;
2772     for (i = 0; i < SST; ++i) {
2773         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2774             ok = 0;
2775             break;
2776         }
2777     }
2778     if (ok)
2779         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2780     else {
2781         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2782                 PYMEM_FORBIDDENBYTE);
2783         for (i = 0; i < SST; ++i) {
2784             const uint8_t byte = tail[i];
2785             fprintf(stderr, "        at tail+%d: 0x%02x",
2786                     i, byte);
2787             if (byte != PYMEM_FORBIDDENBYTE)
2788                 fputs(" *** OUCH", stderr);
2789             fputc('\n', stderr);
2790         }
2791     }
2792 
2793 #ifdef PYMEM_DEBUG_SERIALNO
2794     size_t serial = read_size_t(tail + SST);
2795     fprintf(stderr,
2796             "    The block was made by call #%zu to debug malloc/realloc.\n",
2797             serial);
2798 #endif
2799 
2800     if (nbytes > 0) {
2801         i = 0;
2802         fputs("    Data at p:", stderr);
2803         /* print up to 8 bytes at the start */
2804         while (q < tail && i < 8) {
2805             fprintf(stderr, " %02x", *q);
2806             ++i;
2807             ++q;
2808         }
2809         /* and up to 8 at the end */
2810         if (q < tail) {
2811             if (tail - q > 8) {
2812                 fputs(" ...", stderr);
2813                 q = tail - 8;
2814             }
2815             while (q < tail) {
2816                 fprintf(stderr, " %02x", *q);
2817                 ++q;
2818             }
2819         }
2820         fputc('\n', stderr);
2821     }
2822     fputc('\n', stderr);
2823 
2824     fflush(stderr);
2825     _PyMem_DumpTraceback(fileno(stderr), p);
2826 }
2827 
2828 
2829 static size_t
printone(FILE * out,const char * msg,size_t value)2830 printone(FILE *out, const char* msg, size_t value)
2831 {
2832     int i, k;
2833     char buf[100];
2834     size_t origvalue = value;
2835 
2836     fputs(msg, out);
2837     for (i = (int)strlen(msg); i < 35; ++i)
2838         fputc(' ', out);
2839     fputc('=', out);
2840 
2841     /* Write the value with commas. */
2842     i = 22;
2843     buf[i--] = '\0';
2844     buf[i--] = '\n';
2845     k = 3;
2846     do {
2847         size_t nextvalue = value / 10;
2848         unsigned int digit = (unsigned int)(value - nextvalue * 10);
2849         value = nextvalue;
2850         buf[i--] = (char)(digit + '0');
2851         --k;
2852         if (k == 0 && value && i >= 0) {
2853             k = 3;
2854             buf[i--] = ',';
2855         }
2856     } while (value && i >= 0);
2857 
2858     while (i >= 0)
2859         buf[i--] = ' ';
2860     fputs(buf, out);
2861 
2862     return origvalue;
2863 }
2864 
2865 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2866 _PyDebugAllocatorStats(FILE *out,
2867                        const char *block_name, int num_blocks, size_t sizeof_block)
2868 {
2869     char buf1[128];
2870     char buf2[128];
2871     PyOS_snprintf(buf1, sizeof(buf1),
2872                   "%d %ss * %zd bytes each",
2873                   num_blocks, block_name, sizeof_block);
2874     PyOS_snprintf(buf2, sizeof(buf2),
2875                   "%48s ", buf1);
2876     (void)printone(out, buf2, num_blocks * sizeof_block);
2877 }
2878 
2879 
2880 #ifdef WITH_PYMALLOC
2881 
2882 #ifdef Py_DEBUG
2883 /* Is target in the list?  The list is traversed via the nextpool pointers.
2884  * The list may be NULL-terminated, or circular.  Return 1 if target is in
2885  * list, else 0.
2886  */
2887 static int
pool_is_in_list(const poolp target,poolp list)2888 pool_is_in_list(const poolp target, poolp list)
2889 {
2890     poolp origlist = list;
2891     assert(target != NULL);
2892     if (list == NULL)
2893         return 0;
2894     do {
2895         if (target == list)
2896             return 1;
2897         list = list->nextpool;
2898     } while (list != NULL && list != origlist);
2899     return 0;
2900 }
2901 #endif
2902 
2903 /* Print summary info to "out" about the state of pymalloc's structures.
2904  * In Py_DEBUG mode, also perform some expensive internal consistency
2905  * checks.
2906  *
2907  * Return 0 if the memory debug hooks are not installed or no statistics was
2908  * written into out, return 1 otherwise.
2909  */
2910 int
_PyObject_DebugMallocStats(FILE * out)2911 _PyObject_DebugMallocStats(FILE *out)
2912 {
2913     if (!_PyMem_PymallocEnabled()) {
2914         return 0;
2915     }
2916 
2917     uint i;
2918     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2919     /* # of pools, allocated blocks, and free blocks per class index */
2920     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2921     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2922     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2923     /* total # of allocated bytes in used and full pools */
2924     size_t allocated_bytes = 0;
2925     /* total # of available bytes in used pools */
2926     size_t available_bytes = 0;
2927     /* # of free pools + pools not yet carved out of current arena */
2928     uint numfreepools = 0;
2929     /* # of bytes for arena alignment padding */
2930     size_t arena_alignment = 0;
2931     /* # of bytes in used and full pools used for pool_headers */
2932     size_t pool_header_bytes = 0;
2933     /* # of bytes in used and full pools wasted due to quantization,
2934      * i.e. the necessarily leftover space at the ends of used and
2935      * full pools.
2936      */
2937     size_t quantization = 0;
2938     /* # of arenas actually allocated. */
2939     size_t narenas = 0;
2940     /* running total -- should equal narenas * ARENA_SIZE */
2941     size_t total;
2942     char buf[128];
2943 
2944     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2945             SMALL_REQUEST_THRESHOLD, numclasses);
2946 
2947     for (i = 0; i < numclasses; ++i)
2948         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2949 
2950     /* Because full pools aren't linked to from anything, it's easiest
2951      * to march over all the arenas.  If we're lucky, most of the memory
2952      * will be living in full pools -- would be a shame to miss them.
2953      */
2954     for (i = 0; i < maxarenas; ++i) {
2955         uint j;
2956         uintptr_t base = arenas[i].address;
2957 
2958         /* Skip arenas which are not allocated. */
2959         if (arenas[i].address == (uintptr_t)NULL)
2960             continue;
2961         narenas += 1;
2962 
2963         numfreepools += arenas[i].nfreepools;
2964 
2965         /* round up to pool alignment */
2966         if (base & (uintptr_t)POOL_SIZE_MASK) {
2967             arena_alignment += POOL_SIZE;
2968             base &= ~(uintptr_t)POOL_SIZE_MASK;
2969             base += POOL_SIZE;
2970         }
2971 
2972         /* visit every pool in the arena */
2973         assert(base <= (uintptr_t) arenas[i].pool_address);
2974         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2975              ++j, base += POOL_SIZE) {
2976             poolp p = (poolp)base;
2977             const uint sz = p->szidx;
2978             uint freeblocks;
2979 
2980             if (p->ref.count == 0) {
2981                 /* currently unused */
2982 #ifdef Py_DEBUG
2983                 assert(pool_is_in_list(p, arenas[i].freepools));
2984 #endif
2985                 continue;
2986             }
2987             ++numpools[sz];
2988             numblocks[sz] += p->ref.count;
2989             freeblocks = NUMBLOCKS(sz) - p->ref.count;
2990             numfreeblocks[sz] += freeblocks;
2991 #ifdef Py_DEBUG
2992             if (freeblocks > 0)
2993                 assert(pool_is_in_list(p, usedpools[sz + sz]));
2994 #endif
2995         }
2996     }
2997     assert(narenas == narenas_currently_allocated);
2998 
2999     fputc('\n', out);
3000     fputs("class   size   num pools   blocks in use  avail blocks\n"
3001           "-----   ----   ---------   -------------  ------------\n",
3002           out);
3003 
3004     for (i = 0; i < numclasses; ++i) {
3005         size_t p = numpools[i];
3006         size_t b = numblocks[i];
3007         size_t f = numfreeblocks[i];
3008         uint size = INDEX2SIZE(i);
3009         if (p == 0) {
3010             assert(b == 0 && f == 0);
3011             continue;
3012         }
3013         fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3014                 i, size, p, b, f);
3015         allocated_bytes += b * size;
3016         available_bytes += f * size;
3017         pool_header_bytes += p * POOL_OVERHEAD;
3018         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3019     }
3020     fputc('\n', out);
3021 #ifdef PYMEM_DEBUG_SERIALNO
3022     if (_PyMem_DebugEnabled()) {
3023         (void)printone(out, "# times object malloc called", serialno);
3024     }
3025 #endif
3026     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3027     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3028     (void)printone(out, "# arenas highwater mark", narenas_highwater);
3029     (void)printone(out, "# arenas allocated current", narenas);
3030 
3031     PyOS_snprintf(buf, sizeof(buf),
3032                   "%zu arenas * %d bytes/arena",
3033                   narenas, ARENA_SIZE);
3034     (void)printone(out, buf, narenas * ARENA_SIZE);
3035 
3036     fputc('\n', out);
3037 
3038     /* Account for what all of those arena bytes are being used for. */
3039     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3040     total += printone(out, "# bytes in available blocks", available_bytes);
3041 
3042     PyOS_snprintf(buf, sizeof(buf),
3043         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3044     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3045 
3046     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3047     total += printone(out, "# bytes lost to quantization", quantization);
3048     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3049     (void)printone(out, "Total", total);
3050     assert(narenas * ARENA_SIZE == total);
3051 
3052 #if WITH_PYMALLOC_RADIX_TREE
3053     fputs("\narena map counts\n", out);
3054 #ifdef USE_INTERIOR_NODES
3055     (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3056     (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3057     fputc('\n', out);
3058 #endif
3059     total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3060 #ifdef USE_INTERIOR_NODES
3061     total += printone(out, "# bytes lost to arena map mid",
3062                       sizeof(arena_map_mid_t) * arena_map_mid_count);
3063     total += printone(out, "# bytes lost to arena map bot",
3064                       sizeof(arena_map_bot_t) * arena_map_bot_count);
3065     (void)printone(out, "Total", total);
3066 #endif
3067 #endif
3068 
3069     return 1;
3070 }
3071 
3072 #endif /* #ifdef WITH_PYMALLOC */
3073