1 #include "Python.h"
2 #include "pycore_pymem.h" // _PyTraceMalloc_Config
3
4 #include <stdbool.h>
5
6
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9
10
11 /* Python's malloc wrappers (see pymem.h) */
12
13 #undef uint
14 #define uint unsigned int /* assuming >= 16 bits */
15
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
29
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31
32 #if defined(__has_feature) /* Clang */
33 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 # define _Py_NO_SANITIZE_ADDRESS \
35 __attribute__((no_sanitize("address")))
36 # endif
37 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
38 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 # endif
40 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
41 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 # endif
43 #elif defined(__GNUC__)
44 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
45 # define _Py_NO_SANITIZE_ADDRESS \
46 __attribute__((no_sanitize_address))
47 # endif
48 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49 // is provided only since GCC 7.
50 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 # endif
53 #endif
54
55 #ifndef _Py_NO_SANITIZE_ADDRESS
56 # define _Py_NO_SANITIZE_ADDRESS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 # define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 # define _Py_NO_SANITIZE_MEMORY
63 #endif
64
65 #ifdef WITH_PYMALLOC
66
67 #ifdef MS_WINDOWS
68 # include <windows.h>
69 #elif defined(HAVE_MMAP)
70 # include <sys/mman.h>
71 # ifdef MAP_ANONYMOUS
72 # define ARENAS_USE_MMAP
73 # endif
74 #endif
75
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82
83
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86 library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88
89
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94 for malloc(0), which would be treated as an error. Some platforms would
95 return a pointer with no memory behind it, which would break pymalloc.
96 To solve these problems, allocate an extra byte. */
97 if (size == 0)
98 size = 1;
99 return malloc(size);
100 }
101
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106 for calloc(0, 0), which would be treated as an error. Some platforms
107 would return a pointer with no memory behind it, which would break
108 pymalloc. To solve these problems, allocate an extra byte. */
109 if (nelem == 0 || elsize == 0) {
110 nelem = 1;
111 elsize = 1;
112 }
113 return calloc(nelem, elsize);
114 }
115
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119 if (size == 0)
120 size = 1;
121 return realloc(ptr, size);
122 }
123
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127 free(ptr);
128 }
129
130
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135 return VirtualAlloc(NULL, size,
136 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142 VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149 void *ptr;
150 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152 if (ptr == MAP_FAILED)
153 return NULL;
154 assert(ptr != NULL);
155 return ptr;
156 }
157
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161 munmap(ptr, size);
162 }
163
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168 return malloc(size);
169 }
170
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174 free(ptr);
175 }
176 #endif
177
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 # define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 # define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190
191 typedef struct {
192 /* We tag each block with an API ID in order to tag API violations */
193 char api_id;
194 PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197 debug_alloc_api_t raw;
198 debug_alloc_api_t mem;
199 debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201 {'r', PYRAW_ALLOC},
202 {'m', PYMEM_ALLOC},
203 {'o', PYOBJ_ALLOC}
204 };
205
206 #define PYDBGRAW_ALLOC \
207 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222
223
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226 PyMemAllocatorEx *old_alloc)
227 {
228 if (old_alloc != NULL) {
229 PyMem_GetAllocator(domain, old_alloc);
230 }
231
232
233 PyMemAllocatorEx new_alloc;
234 switch(domain)
235 {
236 case PYMEM_DOMAIN_RAW:
237 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238 break;
239 case PYMEM_DOMAIN_MEM:
240 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241 break;
242 case PYMEM_DOMAIN_OBJ:
243 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244 break;
245 default:
246 /* unknown domain */
247 return -1;
248 }
249 PyMem_SetAllocator(domain, &new_alloc);
250 if (debug) {
251 _PyMem_SetupDebugHooksDomain(domain);
252 }
253 return 0;
254 }
255
256
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259 PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262 const int debug = 1;
263 #else
264 const int debug = 0;
265 #endif
266 return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268
269
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273 if (name == NULL || *name == '\0') {
274 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275 nameions): use default memory allocators */
276 *allocator = PYMEM_ALLOCATOR_DEFAULT;
277 }
278 else if (strcmp(name, "default") == 0) {
279 *allocator = PYMEM_ALLOCATOR_DEFAULT;
280 }
281 else if (strcmp(name, "debug") == 0) {
282 *allocator = PYMEM_ALLOCATOR_DEBUG;
283 }
284 #ifdef WITH_PYMALLOC
285 else if (strcmp(name, "pymalloc") == 0) {
286 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287 }
288 else if (strcmp(name, "pymalloc_debug") == 0) {
289 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290 }
291 #endif
292 else if (strcmp(name, "malloc") == 0) {
293 *allocator = PYMEM_ALLOCATOR_MALLOC;
294 }
295 else if (strcmp(name, "malloc_debug") == 0) {
296 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297 }
298 else {
299 /* unknown allocator */
300 return -1;
301 }
302 return 0;
303 }
304
305
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309 switch (allocator) {
310 case PYMEM_ALLOCATOR_NOT_SET:
311 /* do nothing */
312 break;
313
314 case PYMEM_ALLOCATOR_DEFAULT:
315 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318 break;
319
320 case PYMEM_ALLOCATOR_DEBUG:
321 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324 break;
325
326 #ifdef WITH_PYMALLOC
327 case PYMEM_ALLOCATOR_PYMALLOC:
328 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329 {
330 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332
333 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336
337 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338 PyMem_SetupDebugHooks();
339 }
340 break;
341 }
342 #endif
343
344 case PYMEM_ALLOCATOR_MALLOC:
345 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346 {
347 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351
352 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353 PyMem_SetupDebugHooks();
354 }
355 break;
356 }
357
358 default:
359 /* unknown allocator */
360 return -1;
361 }
362 return 0;
363 }
364
365
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371
372
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380
381 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383 pymemallocator_eq(&_PyObject, &malloc_alloc))
384 {
385 return "malloc";
386 }
387 #ifdef WITH_PYMALLOC
388 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389 pymemallocator_eq(&_PyMem, &pymalloc) &&
390 pymemallocator_eq(&_PyObject, &pymalloc))
391 {
392 return "pymalloc";
393 }
394 #endif
395
396 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399
400 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401 pymemallocator_eq(&_PyMem, &dbg_mem) &&
402 pymemallocator_eq(&_PyObject, &dbg_obj))
403 {
404 /* Debug hooks installed */
405 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408 {
409 return "malloc_debug";
410 }
411 #ifdef WITH_PYMALLOC
412 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415 {
416 return "pymalloc_debug";
417 }
418 #endif
419 }
420 return NULL;
421 }
422
423
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432
433
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440 _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442 };
443
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448 return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454 if (_PyMem_DebugEnabled()) {
455 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456 }
457 else {
458 return (_PyObject.malloc == _PyObject_Malloc);
459 }
460 }
461 #endif
462
463
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467 PyMemAllocatorEx alloc;
468
469 if (domain == PYMEM_DOMAIN_RAW) {
470 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471 return;
472 }
473
474 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475 alloc.ctx = &_PyMem_Debug.raw;
476 alloc.malloc = _PyMem_DebugRawMalloc;
477 alloc.calloc = _PyMem_DebugRawCalloc;
478 alloc.realloc = _PyMem_DebugRawRealloc;
479 alloc.free = _PyMem_DebugRawFree;
480 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481 }
482 else if (domain == PYMEM_DOMAIN_MEM) {
483 if (_PyMem.malloc == _PyMem_DebugMalloc) {
484 return;
485 }
486
487 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488 alloc.ctx = &_PyMem_Debug.mem;
489 alloc.malloc = _PyMem_DebugMalloc;
490 alloc.calloc = _PyMem_DebugCalloc;
491 alloc.realloc = _PyMem_DebugRealloc;
492 alloc.free = _PyMem_DebugFree;
493 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494 }
495 else if (domain == PYMEM_DOMAIN_OBJ) {
496 if (_PyObject.malloc == _PyMem_DebugMalloc) {
497 return;
498 }
499
500 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501 alloc.ctx = &_PyMem_Debug.obj;
502 alloc.malloc = _PyMem_DebugMalloc;
503 alloc.calloc = _PyMem_DebugCalloc;
504 alloc.realloc = _PyMem_DebugRealloc;
505 alloc.free = _PyMem_DebugFree;
506 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507 }
508 }
509
510
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522 switch(domain)
523 {
524 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527 default:
528 /* unknown domain: set all attributes to NULL */
529 allocator->ctx = NULL;
530 allocator->malloc = NULL;
531 allocator->calloc = NULL;
532 allocator->realloc = NULL;
533 allocator->free = NULL;
534 }
535 }
536
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540 switch(domain)
541 {
542 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545 /* ignore unknown domain */
546 }
547 }
548
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552 *allocator = _PyObject_Arena;
553 }
554
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558 _PyObject_Arena = *allocator;
559 }
560
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564 /*
565 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566 * Most python internals blindly use a signed Py_ssize_t to track
567 * things without checking for overflows or negatives.
568 * As size_t is unsigned, checking for size < 0 is not required.
569 */
570 if (size > (size_t)PY_SSIZE_T_MAX)
571 return NULL;
572 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578 /* see PyMem_RawMalloc() */
579 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580 return NULL;
581 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587 /* see PyMem_RawMalloc() */
588 if (new_size > (size_t)PY_SSIZE_T_MAX)
589 return NULL;
590 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597
598
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602 /* see PyMem_RawMalloc() */
603 if (size > (size_t)PY_SSIZE_T_MAX)
604 return NULL;
605 return _PyMem.malloc(_PyMem.ctx, size);
606 }
607
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611 /* see PyMem_RawMalloc() */
612 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613 return NULL;
614 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620 /* see PyMem_RawMalloc() */
621 if (new_size > (size_t)PY_SSIZE_T_MAX)
622 return NULL;
623 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629 _PyMem.free(_PyMem.ctx, ptr);
630 }
631
632
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636 assert(str != NULL);
637
638 size_t len = wcslen(str);
639 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640 return NULL;
641 }
642
643 size_t size = (len + 1) * sizeof(wchar_t);
644 wchar_t *str2 = PyMem_RawMalloc(size);
645 if (str2 == NULL) {
646 return NULL;
647 }
648
649 memcpy(str2, str, size);
650 return str2;
651 }
652
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656 assert(str != NULL);
657 size_t size = strlen(str) + 1;
658 char *copy = PyMem_RawMalloc(size);
659 if (copy == NULL) {
660 return NULL;
661 }
662 memcpy(copy, str, size);
663 return copy;
664 }
665
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669 assert(str != NULL);
670 size_t size = strlen(str) + 1;
671 char *copy = PyMem_Malloc(size);
672 if (copy == NULL) {
673 return NULL;
674 }
675 memcpy(copy, str, size);
676 return copy;
677 }
678
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682 /* see PyMem_RawMalloc() */
683 if (size > (size_t)PY_SSIZE_T_MAX)
684 return NULL;
685 return _PyObject.malloc(_PyObject.ctx, size);
686 }
687
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691 /* see PyMem_RawMalloc() */
692 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693 return NULL;
694 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700 /* see PyMem_RawMalloc() */
701 if (new_size > (size_t)PY_SSIZE_T_MAX)
702 return NULL;
703 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709 _PyObject.free(_PyObject.ctx, ptr);
710 }
711
712
713 /* If we're using GCC, use __builtin_expect() to reduce overhead of
714 the valgrind checks */
715 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
716 # define UNLIKELY(value) __builtin_expect((value), 0)
717 # define LIKELY(value) __builtin_expect((value), 1)
718 #else
719 # define UNLIKELY(value) (value)
720 # define LIKELY(value) (value)
721 #endif
722
723 #ifdef WITH_PYMALLOC
724
725 #ifdef WITH_VALGRIND
726 #include <valgrind/valgrind.h>
727
728 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
729 static int running_on_valgrind = -1;
730 #endif
731
732
733 /* An object allocator for Python.
734
735 Here is an introduction to the layers of the Python memory architecture,
736 showing where the object allocator is actually used (layer +2), It is
737 called for every object allocation and deallocation (PyObject_New/Del),
738 unless the object-specific allocators implement a proprietary allocation
739 scheme (ex.: ints use a simple free list). This is also the place where
740 the cyclic garbage collector operates selectively on container objects.
741
742
743 Object-specific allocators
744 _____ ______ ______ ________
745 [ int ] [ dict ] [ list ] ... [ string ] Python core |
746 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
747 _______________________________ | |
748 [ Python's object allocator ] | |
749 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
750 ______________________________________________________________ |
751 [ Python's raw memory allocator (PyMem_ API) ] |
752 +1 | <----- Python memory (under PyMem manager's control) ------> | |
753 __________________________________________________________________
754 [ Underlying general-purpose allocator (ex: C library malloc) ]
755 0 | <------ Virtual memory allocated for the python process -------> |
756
757 =========================================================================
758 _______________________________________________________________________
759 [ OS-specific Virtual Memory Manager (VMM) ]
760 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
761 __________________________________ __________________________________
762 [ ] [ ]
763 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
764
765 */
766 /*==========================================================================*/
767
768 /* A fast, special-purpose memory allocator for small blocks, to be used
769 on top of a general-purpose malloc -- heavily based on previous art. */
770
771 /* Vladimir Marangozov -- August 2000 */
772
773 /*
774 * "Memory management is where the rubber meets the road -- if we do the wrong
775 * thing at any level, the results will not be good. And if we don't make the
776 * levels work well together, we are in serious trouble." (1)
777 *
778 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
779 * "Dynamic Storage Allocation: A Survey and Critical Review",
780 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
781 */
782
783 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
784
785 /*==========================================================================*/
786
787 /*
788 * Allocation strategy abstract:
789 *
790 * For small requests, the allocator sub-allocates <Big> blocks of memory.
791 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
792 * system's allocator.
793 *
794 * Small requests are grouped in size classes spaced 8 bytes apart, due
795 * to the required valid alignment of the returned address. Requests of
796 * a particular size are serviced from memory pools of 4K (one VMM page).
797 * Pools are fragmented on demand and contain free lists of blocks of one
798 * particular size class. In other words, there is a fixed-size allocator
799 * for each size class. Free pools are shared by the different allocators
800 * thus minimizing the space reserved for a particular size class.
801 *
802 * This allocation strategy is a variant of what is known as "simple
803 * segregated storage based on array of free lists". The main drawback of
804 * simple segregated storage is that we might end up with lot of reserved
805 * memory for the different free lists, which degenerate in time. To avoid
806 * this, we partition each free list in pools and we share dynamically the
807 * reserved space between all free lists. This technique is quite efficient
808 * for memory intensive programs which allocate mainly small-sized blocks.
809 *
810 * For small requests we have the following table:
811 *
812 * Request in bytes Size of allocated block Size class idx
813 * ----------------------------------------------------------------
814 * 1-8 8 0
815 * 9-16 16 1
816 * 17-24 24 2
817 * 25-32 32 3
818 * 33-40 40 4
819 * 41-48 48 5
820 * 49-56 56 6
821 * 57-64 64 7
822 * 65-72 72 8
823 * ... ... ...
824 * 497-504 504 62
825 * 505-512 512 63
826 *
827 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
828 * allocator.
829 */
830
831 /*==========================================================================*/
832
833 /*
834 * -- Main tunable settings section --
835 */
836
837 /*
838 * Alignment of addresses returned to the user. 8-bytes alignment works
839 * on most current architectures (with 32-bit or 64-bit address buses).
840 * The alignment value is also used for grouping small requests in size
841 * classes spaced ALIGNMENT bytes apart.
842 *
843 * You shouldn't change this unless you know what you are doing.
844 */
845
846 #if SIZEOF_VOID_P > 4
847 #define ALIGNMENT 16 /* must be 2^N */
848 #define ALIGNMENT_SHIFT 4
849 #else
850 #define ALIGNMENT 8 /* must be 2^N */
851 #define ALIGNMENT_SHIFT 3
852 #endif
853
854 /* Return the number of bytes in size class I, as a uint. */
855 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
856
857 /*
858 * Max size threshold below which malloc requests are considered to be
859 * small enough in order to use preallocated memory pools. You can tune
860 * this value according to your application behaviour and memory needs.
861 *
862 * Note: a size threshold of 512 guarantees that newly created dictionaries
863 * will be allocated from preallocated memory pools on 64-bit.
864 *
865 * The following invariants must hold:
866 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
867 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
868 *
869 * Although not required, for better performance and space efficiency,
870 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
871 */
872 #define SMALL_REQUEST_THRESHOLD 512
873 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
874
875 /*
876 * The system's VMM page size can be obtained on most unices with a
877 * getpagesize() call or deduced from various header files. To make
878 * things simpler, we assume that it is 4K, which is OK for most systems.
879 * It is probably better if this is the native page size, but it doesn't
880 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
881 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
882 * violation fault. 4K is apparently OK for all the platforms that python
883 * currently targets.
884 */
885 #define SYSTEM_PAGE_SIZE (4 * 1024)
886 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
887
888 /*
889 * Maximum amount of memory managed by the allocator for small requests.
890 */
891 #ifdef WITH_MEMORY_LIMITS
892 #ifndef SMALL_MEMORY_LIMIT
893 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
894 #endif
895 #endif
896
897 #if !defined(WITH_PYMALLOC_RADIX_TREE)
898 /* Use radix-tree to track arena memory regions, for address_in_range().
899 * Enable by default since it allows larger pool sizes. Can be disabled
900 * using -DWITH_PYMALLOC_RADIX_TREE=0 */
901 #define WITH_PYMALLOC_RADIX_TREE 1
902 #endif
903
904 #if SIZEOF_VOID_P > 4
905 /* on 64-bit platforms use larger pools and arenas if we can */
906 #define USE_LARGE_ARENAS
907 #if WITH_PYMALLOC_RADIX_TREE
908 /* large pools only supported if radix-tree is enabled */
909 #define USE_LARGE_POOLS
910 #endif
911 #endif
912
913 /*
914 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
915 * on a page boundary. This is a reserved virtual address space for the
916 * current process (obtained through a malloc()/mmap() call). In no way this
917 * means that the memory arenas will be used entirely. A malloc(<Big>) is
918 * usually an address range reservation for <Big> bytes, unless all pages within
919 * this space are referenced subsequently. So malloc'ing big blocks and not
920 * using them does not mean "wasting memory". It's an addressable range
921 * wastage...
922 *
923 * Arenas are allocated with mmap() on systems supporting anonymous memory
924 * mappings to reduce heap fragmentation.
925 */
926 #ifdef USE_LARGE_ARENAS
927 #define ARENA_BITS 20 /* 1 MiB */
928 #else
929 #define ARENA_BITS 18 /* 256 KiB */
930 #endif
931 #define ARENA_SIZE (1 << ARENA_BITS)
932 #define ARENA_SIZE_MASK (ARENA_SIZE - 1)
933
934 #ifdef WITH_MEMORY_LIMITS
935 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
936 #endif
937
938 /*
939 * Size of the pools used for small blocks. Must be a power of 2.
940 */
941 #ifdef USE_LARGE_POOLS
942 #define POOL_BITS 14 /* 16 KiB */
943 #else
944 #define POOL_BITS 12 /* 4 KiB */
945 #endif
946 #define POOL_SIZE (1 << POOL_BITS)
947 #define POOL_SIZE_MASK (POOL_SIZE - 1)
948
949 #if !WITH_PYMALLOC_RADIX_TREE
950 #if POOL_SIZE != SYSTEM_PAGE_SIZE
951 # error "pool size must be equal to system page size"
952 #endif
953 #endif
954
955 #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
956 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
957 # error "arena size not an exact multiple of pool size"
958 #endif
959
960 /*
961 * -- End of tunable settings section --
962 */
963
964 /*==========================================================================*/
965
966 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
967 typedef uint8_t block;
968
969 /* Pool for small blocks. */
970 struct pool_header {
971 union { block *_padding;
972 uint count; } ref; /* number of allocated blocks */
973 block *freeblock; /* pool's free list head */
974 struct pool_header *nextpool; /* next pool of this size class */
975 struct pool_header *prevpool; /* previous pool "" */
976 uint arenaindex; /* index into arenas of base adr */
977 uint szidx; /* block size class index */
978 uint nextoffset; /* bytes to virgin block */
979 uint maxnextoffset; /* largest valid nextoffset */
980 };
981
982 typedef struct pool_header *poolp;
983
984 /* Record keeping for arenas. */
985 struct arena_object {
986 /* The address of the arena, as returned by malloc. Note that 0
987 * will never be returned by a successful malloc, and is used
988 * here to mark an arena_object that doesn't correspond to an
989 * allocated arena.
990 */
991 uintptr_t address;
992
993 /* Pool-aligned pointer to the next pool to be carved off. */
994 block* pool_address;
995
996 /* The number of available pools in the arena: free pools + never-
997 * allocated pools.
998 */
999 uint nfreepools;
1000
1001 /* The total number of pools in the arena, whether or not available. */
1002 uint ntotalpools;
1003
1004 /* Singly-linked list of available pools. */
1005 struct pool_header* freepools;
1006
1007 /* Whenever this arena_object is not associated with an allocated
1008 * arena, the nextarena member is used to link all unassociated
1009 * arena_objects in the singly-linked `unused_arena_objects` list.
1010 * The prevarena member is unused in this case.
1011 *
1012 * When this arena_object is associated with an allocated arena
1013 * with at least one available pool, both members are used in the
1014 * doubly-linked `usable_arenas` list, which is maintained in
1015 * increasing order of `nfreepools` values.
1016 *
1017 * Else this arena_object is associated with an allocated arena
1018 * all of whose pools are in use. `nextarena` and `prevarena`
1019 * are both meaningless in this case.
1020 */
1021 struct arena_object* nextarena;
1022 struct arena_object* prevarena;
1023 };
1024
1025 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1026
1027 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
1028
1029 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1030 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1031
1032 /* Return total number of blocks in pool of size index I, as a uint. */
1033 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1034
1035 /*==========================================================================*/
1036
1037 /*
1038 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1039
1040 This is involved. For an index i, usedpools[i+i] is the header for a list of
1041 all partially used pools holding small blocks with "size class idx" i. So
1042 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1043 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1044
1045 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1046 member) as needed. Once carved off, a pool is in one of three states forever
1047 after:
1048
1049 used == partially used, neither empty nor full
1050 At least one block in the pool is currently allocated, and at least one
1051 block in the pool is not currently allocated (note this implies a pool
1052 has room for at least two blocks).
1053 This is a pool's initial state, as a pool is created only when malloc
1054 needs space.
1055 The pool holds blocks of a fixed size, and is in the circular list headed
1056 at usedpools[i] (see above). It's linked to the other used pools of the
1057 same size class via the pool_header's nextpool and prevpool members.
1058 If all but one block is currently allocated, a malloc can cause a
1059 transition to the full state. If all but one block is not currently
1060 allocated, a free can cause a transition to the empty state.
1061
1062 full == all the pool's blocks are currently allocated
1063 On transition to full, a pool is unlinked from its usedpools[] list.
1064 It's not linked to from anything then anymore, and its nextpool and
1065 prevpool members are meaningless until it transitions back to used.
1066 A free of a block in a full pool puts the pool back in the used state.
1067 Then it's linked in at the front of the appropriate usedpools[] list, so
1068 that the next allocation for its size class will reuse the freed block.
1069
1070 empty == all the pool's blocks are currently available for allocation
1071 On transition to empty, a pool is unlinked from its usedpools[] list,
1072 and linked to the front of its arena_object's singly-linked freepools list,
1073 via its nextpool member. The prevpool member has no meaning in this case.
1074 Empty pools have no inherent size class: the next time a malloc finds
1075 an empty list in usedpools[], it takes the first pool off of freepools.
1076 If the size class needed happens to be the same as the size class the pool
1077 last had, some pool initialization can be skipped.
1078
1079
1080 Block Management
1081
1082 Blocks within pools are again carved out as needed. pool->freeblock points to
1083 the start of a singly-linked list of free blocks within the pool. When a
1084 block is freed, it's inserted at the front of its pool's freeblock list. Note
1085 that the available blocks in a pool are *not* linked all together when a pool
1086 is initialized. Instead only "the first two" (lowest addresses) blocks are
1087 set up, returning the first such block, and setting pool->freeblock to a
1088 one-block list holding the second such block. This is consistent with that
1089 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1090 of memory until it's actually needed.
1091
1092 So long as a pool is in the used state, we're certain there *is* a block
1093 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
1094 points to the end of the free list before we've carved the entire pool into
1095 blocks, that means we simply haven't yet gotten to one of the higher-address
1096 blocks. The offset from the pool_header to the start of "the next" virgin
1097 block is stored in the pool_header nextoffset member, and the largest value
1098 of nextoffset that makes sense is stored in the maxnextoffset member when a
1099 pool is initialized. All the blocks in a pool have been passed out at least
1100 once when and only when nextoffset > maxnextoffset.
1101
1102
1103 Major obscurity: While the usedpools vector is declared to have poolp
1104 entries, it doesn't really. It really contains two pointers per (conceptual)
1105 poolp entry, the nextpool and prevpool members of a pool_header. The
1106 excruciating initialization code below fools C so that
1107
1108 usedpool[i+i]
1109
1110 "acts like" a genuine poolp, but only so long as you only reference its
1111 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
1112 compensating for that a pool_header's nextpool and prevpool members
1113 immediately follow a pool_header's first two members:
1114
1115 union { block *_padding;
1116 uint count; } ref;
1117 block *freeblock;
1118
1119 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
1120 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1121 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1122 circular list is empty).
1123
1124 It's unclear why the usedpools setup is so convoluted. It could be to
1125 minimize the amount of cache required to hold this heavily-referenced table
1126 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1127 referencing code has to remember to "double the index" and doing so isn't
1128 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1129 on that C doesn't insert any padding anywhere in a pool_header at or before
1130 the prevpool member.
1131 **************************************************************************** */
1132
1133 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1134 #define PT(x) PTA(x), PTA(x)
1135
1136 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1137 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1138 #if NB_SMALL_SIZE_CLASSES > 8
1139 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1140 #if NB_SMALL_SIZE_CLASSES > 16
1141 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1142 #if NB_SMALL_SIZE_CLASSES > 24
1143 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1144 #if NB_SMALL_SIZE_CLASSES > 32
1145 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1146 #if NB_SMALL_SIZE_CLASSES > 40
1147 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1148 #if NB_SMALL_SIZE_CLASSES > 48
1149 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1150 #if NB_SMALL_SIZE_CLASSES > 56
1151 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1152 #if NB_SMALL_SIZE_CLASSES > 64
1153 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1154 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1155 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1156 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1157 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1158 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1159 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1160 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1161 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
1162 };
1163
1164 /*==========================================================================
1165 Arena management.
1166
1167 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
1168 which may not be currently used (== they're arena_objects that aren't
1169 currently associated with an allocated arena). Note that arenas proper are
1170 separately malloc'ed.
1171
1172 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
1173 we do try to free() arenas, and use some mild heuristic strategies to increase
1174 the likelihood that arenas eventually can be freed.
1175
1176 unused_arena_objects
1177
1178 This is a singly-linked list of the arena_objects that are currently not
1179 being used (no arena is associated with them). Objects are taken off the
1180 head of the list in new_arena(), and are pushed on the head of the list in
1181 PyObject_Free() when the arena is empty. Key invariant: an arena_object
1182 is on this list if and only if its .address member is 0.
1183
1184 usable_arenas
1185
1186 This is a doubly-linked list of the arena_objects associated with arenas
1187 that have pools available. These pools are either waiting to be reused,
1188 or have not been used before. The list is sorted to have the most-
1189 allocated arenas first (ascending order based on the nfreepools member).
1190 This means that the next allocation will come from a heavily used arena,
1191 which gives the nearly empty arenas a chance to be returned to the system.
1192 In my unscientific tests this dramatically improved the number of arenas
1193 that could be freed.
1194
1195 Note that an arena_object associated with an arena all of whose pools are
1196 currently in use isn't on either list.
1197
1198 Changed in Python 3.8: keeping usable_arenas sorted by number of free pools
1199 used to be done by one-at-a-time linear search when an arena's number of
1200 free pools changed. That could, overall, consume time quadratic in the
1201 number of arenas. That didn't really matter when there were only a few
1202 hundred arenas (typical!), but could be a timing disaster when there were
1203 hundreds of thousands. See bpo-37029.
1204
1205 Now we have a vector of "search fingers" to eliminate the need to search:
1206 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1207 with nfp free pools. This is NULL if and only if there is no arena with
1208 nfp free pools in usable_arenas.
1209 */
1210
1211 /* Array of objects used to track chunks of memory (arenas). */
1212 static struct arena_object* arenas = NULL;
1213 /* Number of slots currently allocated in the `arenas` vector. */
1214 static uint maxarenas = 0;
1215
1216 /* The head of the singly-linked, NULL-terminated list of available
1217 * arena_objects.
1218 */
1219 static struct arena_object* unused_arena_objects = NULL;
1220
1221 /* The head of the doubly-linked, NULL-terminated at each end, list of
1222 * arena_objects associated with arenas that have pools available.
1223 */
1224 static struct arena_object* usable_arenas = NULL;
1225
1226 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1227 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1228
1229 /* How many arena_objects do we initially allocate?
1230 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1231 * `arenas` vector.
1232 */
1233 #define INITIAL_ARENA_OBJECTS 16
1234
1235 /* Number of arenas allocated that haven't been free()'d. */
1236 static size_t narenas_currently_allocated = 0;
1237
1238 /* Total number of times malloc() called to allocate an arena. */
1239 static size_t ntimes_arena_allocated = 0;
1240 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1241 static size_t narenas_highwater = 0;
1242
1243 static Py_ssize_t raw_allocated_blocks;
1244
1245 Py_ssize_t
_Py_GetAllocatedBlocks(void)1246 _Py_GetAllocatedBlocks(void)
1247 {
1248 Py_ssize_t n = raw_allocated_blocks;
1249 /* add up allocated blocks for used pools */
1250 for (uint i = 0; i < maxarenas; ++i) {
1251 /* Skip arenas which are not allocated. */
1252 if (arenas[i].address == 0) {
1253 continue;
1254 }
1255
1256 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1257
1258 /* visit every pool in the arena */
1259 assert(base <= (uintptr_t) arenas[i].pool_address);
1260 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1261 poolp p = (poolp)base;
1262 n += p->ref.count;
1263 }
1264 }
1265 return n;
1266 }
1267
1268 #if WITH_PYMALLOC_RADIX_TREE
1269 /*==========================================================================*/
1270 /* radix tree for tracking arena usage
1271
1272 bit allocation for keys
1273
1274 64-bit pointers and 2^20 arena size:
1275 16 -> ignored (POINTER_BITS - ADDRESS_BITS)
1276 10 -> MAP_TOP
1277 10 -> MAP_MID
1278 8 -> MAP_BOT
1279 20 -> ideal aligned arena
1280 ----
1281 64
1282
1283 32-bit pointers and 2^18 arena size:
1284 14 -> MAP_BOT
1285 18 -> ideal aligned arena
1286 ----
1287 32
1288
1289 */
1290
1291 #if SIZEOF_VOID_P == 8
1292
1293 /* number of bits in a pointer */
1294 #define POINTER_BITS 64
1295
1296 /* Current 64-bit processors are limited to 48-bit physical addresses. For
1297 * now, the top 17 bits of addresses will all be equal to bit 2**47. If that
1298 * changes in the future, this must be adjusted upwards.
1299 */
1300 #define ADDRESS_BITS 48
1301
1302 /* use the top and mid layers of the radix tree */
1303 #define USE_INTERIOR_NODES
1304
1305 #elif SIZEOF_VOID_P == 4
1306
1307 #define POINTER_BITS 32
1308 #define ADDRESS_BITS 32
1309
1310 #else
1311
1312 /* Currently this code works for 64-bit or 32-bit pointers only. */
1313 #error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1314
1315 #endif /* SIZEOF_VOID_P */
1316
1317 /* arena_coverage_t members require this to be true */
1318 #if ARENA_BITS >= 32
1319 # error "arena size must be < 2^32"
1320 #endif
1321
1322 #ifdef USE_INTERIOR_NODES
1323 /* number of bits used for MAP_TOP and MAP_MID nodes */
1324 #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1325 #else
1326 #define INTERIOR_BITS 0
1327 #endif
1328
1329 #define MAP_TOP_BITS INTERIOR_BITS
1330 #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1331 #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1332
1333 #define MAP_MID_BITS INTERIOR_BITS
1334 #define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1335 #define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1336
1337 #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1338 #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1339 #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1340
1341 #define MAP_BOT_SHIFT ARENA_BITS
1342 #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1343 #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1344
1345 #define AS_UINT(p) ((uintptr_t)(p))
1346 #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1347 #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1348 #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1349
1350 #if ADDRESS_BITS > POINTER_BITS
1351 /* Return non-physical address bits of a pointer. Those bits should be same
1352 * for all valid pointers if ADDRESS_BITS set correctly. Linux has support for
1353 * 57-bit address space (Intel 5-level paging) but will not currently give
1354 * those addresses to user space.
1355 */
1356 #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1357 #else
1358 #define HIGH_BITS(p) 0
1359 #endif
1360
1361
1362 /* This is the leaf of the radix tree. See arena_map_mark_used() for the
1363 * meaning of these members. */
1364 typedef struct {
1365 int32_t tail_hi;
1366 int32_t tail_lo;
1367 } arena_coverage_t;
1368
1369 typedef struct arena_map_bot {
1370 /* The members tail_hi and tail_lo are accessed together. So, it
1371 * better to have them as an array of structs, rather than two
1372 * arrays.
1373 */
1374 arena_coverage_t arenas[MAP_BOT_LENGTH];
1375 } arena_map_bot_t;
1376
1377 #ifdef USE_INTERIOR_NODES
1378 typedef struct arena_map_mid {
1379 struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1380 } arena_map_mid_t;
1381
1382 typedef struct arena_map_top {
1383 struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1384 } arena_map_top_t;
1385 #endif
1386
1387 /* The root of radix tree. Note that by initializing like this, the memory
1388 * should be in the BSS. The OS will only memory map pages as the MAP_MID
1389 * nodes get used (OS pages are demand loaded as needed).
1390 */
1391 #ifdef USE_INTERIOR_NODES
1392 static arena_map_top_t arena_map_root;
1393 /* accounting for number of used interior nodes */
1394 static int arena_map_mid_count;
1395 static int arena_map_bot_count;
1396 #else
1397 static arena_map_bot_t arena_map_root;
1398 #endif
1399
1400 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1401 * it cannot be created */
1402 static arena_map_bot_t *
arena_map_get(block * p,int create)1403 arena_map_get(block *p, int create)
1404 {
1405 #ifdef USE_INTERIOR_NODES
1406 /* sanity check that ADDRESS_BITS is correct */
1407 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1408 int i1 = MAP_TOP_INDEX(p);
1409 if (arena_map_root.ptrs[i1] == NULL) {
1410 if (!create) {
1411 return NULL;
1412 }
1413 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1414 if (n == NULL) {
1415 return NULL;
1416 }
1417 arena_map_root.ptrs[i1] = n;
1418 arena_map_mid_count++;
1419 }
1420 int i2 = MAP_MID_INDEX(p);
1421 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1422 if (!create) {
1423 return NULL;
1424 }
1425 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1426 if (n == NULL) {
1427 return NULL;
1428 }
1429 arena_map_root.ptrs[i1]->ptrs[i2] = n;
1430 arena_map_bot_count++;
1431 }
1432 return arena_map_root.ptrs[i1]->ptrs[i2];
1433 #else
1434 return &arena_map_root;
1435 #endif
1436 }
1437
1438
1439 /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
1440 * away 24 bits of the address. That reduces the space requirement of
1441 * the tree compared to similar radix tree page-map schemes. In
1442 * exchange for slashing the space requirement, it needs more
1443 * computation to check an address.
1444 *
1445 * Tracking coverage is done by "ideal" arena address. It is easier to
1446 * explain in decimal so let's say that the arena size is 100 bytes.
1447 * Then, ideal addresses are 100, 200, 300, etc. For checking if a
1448 * pointer address is inside an actual arena, we have to check two ideal
1449 * arena addresses. E.g. if pointer is 357, we need to check 200 and
1450 * 300. In the rare case that an arena is aligned in the ideal way
1451 * (e.g. base address of arena is 200) then we only have to check one
1452 * ideal address.
1453 *
1454 * The tree nodes for 200 and 300 both store the address of arena.
1455 * There are two cases: the arena starts at a lower ideal arena and
1456 * extends to this one, or the arena starts in this arena and extends to
1457 * the next ideal arena. The tail_lo and tail_hi members correspond to
1458 * these two cases.
1459 */
1460
1461
1462 /* mark or unmark addresses covered by arena */
1463 static int
arena_map_mark_used(uintptr_t arena_base,int is_used)1464 arena_map_mark_used(uintptr_t arena_base, int is_used)
1465 {
1466 /* sanity check that ADDRESS_BITS is correct */
1467 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1468 arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1469 if (n_hi == NULL) {
1470 assert(is_used); /* otherwise node should already exist */
1471 return 0; /* failed to allocate space for node */
1472 }
1473 int i3 = MAP_BOT_INDEX((block *)arena_base);
1474 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1475 if (tail == 0) {
1476 /* is ideal arena address */
1477 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1478 }
1479 else {
1480 /* arena_base address is not ideal (aligned to arena size) and
1481 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
1482 * for the next arena. Note that it might be in different MAP_TOP
1483 * and MAP_MID nodes as well so we need to call arena_map_get()
1484 * again (do the full tree traversal).
1485 */
1486 n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1487 uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1488 /* If arena_base is a legit arena address, so is arena_base_next - 1
1489 * (last address in arena). If arena_base_next overflows then it
1490 * must overflow to 0. However, that would mean arena_base was
1491 * "ideal" and we should not be in this case. */
1492 assert(arena_base < arena_base_next);
1493 arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1494 if (n_lo == NULL) {
1495 assert(is_used); /* otherwise should already exist */
1496 n_hi->arenas[i3].tail_hi = 0;
1497 return 0; /* failed to allocate space for node */
1498 }
1499 int i3_next = MAP_BOT_INDEX(arena_base_next);
1500 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1501 }
1502 return 1;
1503 }
1504
1505 /* Return true if 'p' is a pointer inside an obmalloc arena.
1506 * _PyObject_Free() calls this so it needs to be very fast. */
1507 static int
arena_map_is_used(block * p)1508 arena_map_is_used(block *p)
1509 {
1510 arena_map_bot_t *n = arena_map_get(p, 0);
1511 if (n == NULL) {
1512 return 0;
1513 }
1514 int i3 = MAP_BOT_INDEX(p);
1515 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1516 int32_t hi = n->arenas[i3].tail_hi;
1517 int32_t lo = n->arenas[i3].tail_lo;
1518 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1519 return (tail < lo) || (tail >= hi && hi != 0);
1520 }
1521
1522 /* end of radix tree logic */
1523 /*==========================================================================*/
1524 #endif /* WITH_PYMALLOC_RADIX_TREE */
1525
1526
1527 /* Allocate a new arena. If we run out of memory, return NULL. Else
1528 * allocate a new arena, and return the address of an arena_object
1529 * describing the new arena. It's expected that the caller will set
1530 * `usable_arenas` to the return value.
1531 */
1532 static struct arena_object*
new_arena(void)1533 new_arena(void)
1534 {
1535 struct arena_object* arenaobj;
1536 uint excess; /* number of bytes above pool alignment */
1537 void *address;
1538 static int debug_stats = -1;
1539
1540 if (debug_stats == -1) {
1541 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1542 debug_stats = (opt != NULL && *opt != '\0');
1543 }
1544 if (debug_stats)
1545 _PyObject_DebugMallocStats(stderr);
1546
1547 if (unused_arena_objects == NULL) {
1548 uint i;
1549 uint numarenas;
1550 size_t nbytes;
1551
1552 /* Double the number of arena objects on each allocation.
1553 * Note that it's possible for `numarenas` to overflow.
1554 */
1555 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1556 if (numarenas <= maxarenas)
1557 return NULL; /* overflow */
1558 #if SIZEOF_SIZE_T <= SIZEOF_INT
1559 if (numarenas > SIZE_MAX / sizeof(*arenas))
1560 return NULL; /* overflow */
1561 #endif
1562 nbytes = numarenas * sizeof(*arenas);
1563 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1564 if (arenaobj == NULL)
1565 return NULL;
1566 arenas = arenaobj;
1567
1568 /* We might need to fix pointers that were copied. However,
1569 * new_arena only gets called when all the pages in the
1570 * previous arenas are full. Thus, there are *no* pointers
1571 * into the old array. Thus, we don't have to worry about
1572 * invalid pointers. Just to be sure, some asserts:
1573 */
1574 assert(usable_arenas == NULL);
1575 assert(unused_arena_objects == NULL);
1576
1577 /* Put the new arenas on the unused_arena_objects list. */
1578 for (i = maxarenas; i < numarenas; ++i) {
1579 arenas[i].address = 0; /* mark as unassociated */
1580 arenas[i].nextarena = i < numarenas - 1 ?
1581 &arenas[i+1] : NULL;
1582 }
1583
1584 /* Update globals. */
1585 unused_arena_objects = &arenas[maxarenas];
1586 maxarenas = numarenas;
1587 }
1588
1589 /* Take the next available arena object off the head of the list. */
1590 assert(unused_arena_objects != NULL);
1591 arenaobj = unused_arena_objects;
1592 unused_arena_objects = arenaobj->nextarena;
1593 assert(arenaobj->address == 0);
1594 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1595 #if WITH_PYMALLOC_RADIX_TREE
1596 if (address != NULL) {
1597 if (!arena_map_mark_used((uintptr_t)address, 1)) {
1598 /* marking arena in radix tree failed, abort */
1599 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1600 address = NULL;
1601 }
1602 }
1603 #endif
1604 if (address == NULL) {
1605 /* The allocation failed: return NULL after putting the
1606 * arenaobj back.
1607 */
1608 arenaobj->nextarena = unused_arena_objects;
1609 unused_arena_objects = arenaobj;
1610 return NULL;
1611 }
1612 arenaobj->address = (uintptr_t)address;
1613
1614 ++narenas_currently_allocated;
1615 ++ntimes_arena_allocated;
1616 if (narenas_currently_allocated > narenas_highwater)
1617 narenas_highwater = narenas_currently_allocated;
1618 arenaobj->freepools = NULL;
1619 /* pool_address <- first pool-aligned address in the arena
1620 nfreepools <- number of whole pools that fit after alignment */
1621 arenaobj->pool_address = (block*)arenaobj->address;
1622 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1623 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1624 if (excess != 0) {
1625 --arenaobj->nfreepools;
1626 arenaobj->pool_address += POOL_SIZE - excess;
1627 }
1628 arenaobj->ntotalpools = arenaobj->nfreepools;
1629
1630 return arenaobj;
1631 }
1632
1633
1634
1635 #if WITH_PYMALLOC_RADIX_TREE
1636 /* Return true if and only if P is an address that was allocated by
1637 pymalloc. When the radix tree is used, 'poolp' is unused.
1638 */
1639 static bool
address_in_range(void * p,poolp pool)1640 address_in_range(void *p, poolp pool)
1641 {
1642 return arena_map_is_used(p);
1643 }
1644 #else
1645 /*
1646 address_in_range(P, POOL)
1647
1648 Return true if and only if P is an address that was allocated by pymalloc.
1649 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1650 (the caller is asked to compute this because the macro expands POOL more than
1651 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1652 variable and pass the latter to the macro; because address_in_range is
1653 called on every alloc/realloc/free, micro-efficiency is important here).
1654
1655 Tricky: Let B be the arena base address associated with the pool, B =
1656 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1657
1658 B <= P < B + ARENA_SIZE
1659
1660 Subtracting B throughout, this is true iff
1661
1662 0 <= P-B < ARENA_SIZE
1663
1664 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1665
1666 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1667 before the first arena has been allocated. `arenas` is still NULL in that
1668 case. We're relying on that maxarenas is also 0 in that case, so that
1669 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1670 into a NULL arenas.
1671
1672 Details: given P and POOL, the arena_object corresponding to P is AO =
1673 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1674 stores, etc), POOL is the correct address of P's pool, AO.address is the
1675 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1676 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1677 (NULL)). Therefore address_in_range correctly reports that obmalloc
1678 controls P.
1679
1680 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1681 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1682 in this case -- it may even be uninitialized trash. If the trash arenaindex
1683 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1684 control P.
1685
1686 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1687 allocated arena, obmalloc controls all the memory in slice AO.address :
1688 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1689 so P doesn't lie in that slice, so the macro correctly reports that P is not
1690 controlled by obmalloc.
1691
1692 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1693 arena_object (one not currently associated with an allocated arena),
1694 AO.address is 0, and the second test in the macro reduces to:
1695
1696 P < ARENA_SIZE
1697
1698 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1699 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1700 of the test still passes, and the third clause (AO.address != 0) is necessary
1701 to get the correct result: AO.address is 0 in this case, so the macro
1702 correctly reports that P is not controlled by obmalloc (despite that P lies in
1703 slice AO.address : AO.address + ARENA_SIZE).
1704
1705 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1706 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1707 corresponded to a currently-allocated arena, so the "P is not controlled by
1708 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1709 was impossible.
1710
1711 Note that the logic is excruciating, and reading up possibly uninitialized
1712 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1713 creates problems for some memory debuggers. The overwhelming advantage is
1714 that this test determines whether an arbitrary address is controlled by
1715 obmalloc in a small constant time, independent of the number of arenas
1716 obmalloc controls. Since this test is needed at every entry point, it's
1717 extremely desirable that it be this fast.
1718 */
1719
1720 static bool _Py_NO_SANITIZE_ADDRESS
1721 _Py_NO_SANITIZE_THREAD
1722 _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1723 address_in_range(void *p, poolp pool)
1724 {
1725 // Since address_in_range may be reading from memory which was not allocated
1726 // by Python, it is important that pool->arenaindex is read only once, as
1727 // another thread may be concurrently modifying the value without holding
1728 // the GIL. The following dance forces the compiler to read pool->arenaindex
1729 // only once.
1730 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1731 return arenaindex < maxarenas &&
1732 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1733 arenas[arenaindex].address != 0;
1734 }
1735
1736 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1737
1738 /*==========================================================================*/
1739
1740 // Called when freelist is exhausted. Extend the freelist if there is
1741 // space for a block. Otherwise, remove this pool from usedpools.
1742 static void
pymalloc_pool_extend(poolp pool,uint size)1743 pymalloc_pool_extend(poolp pool, uint size)
1744 {
1745 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1746 /* There is room for another block. */
1747 pool->freeblock = (block*)pool + pool->nextoffset;
1748 pool->nextoffset += INDEX2SIZE(size);
1749 *(block **)(pool->freeblock) = NULL;
1750 return;
1751 }
1752
1753 /* Pool is full, unlink from used pools. */
1754 poolp next;
1755 next = pool->nextpool;
1756 pool = pool->prevpool;
1757 next->prevpool = pool;
1758 pool->nextpool = next;
1759 }
1760
1761 /* called when pymalloc_alloc can not allocate a block from usedpool.
1762 * This function takes new pool and allocate a block from it.
1763 */
1764 static void*
allocate_from_new_pool(uint size)1765 allocate_from_new_pool(uint size)
1766 {
1767 /* There isn't a pool of the right size class immediately
1768 * available: use a free pool.
1769 */
1770 if (UNLIKELY(usable_arenas == NULL)) {
1771 /* No arena has a free pool: allocate a new arena. */
1772 #ifdef WITH_MEMORY_LIMITS
1773 if (narenas_currently_allocated >= MAX_ARENAS) {
1774 return NULL;
1775 }
1776 #endif
1777 usable_arenas = new_arena();
1778 if (usable_arenas == NULL) {
1779 return NULL;
1780 }
1781 usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1782 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1783 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1784 }
1785 assert(usable_arenas->address != 0);
1786
1787 /* This arena already had the smallest nfreepools value, so decreasing
1788 * nfreepools doesn't change that, and we don't need to rearrange the
1789 * usable_arenas list. However, if the arena becomes wholly allocated,
1790 * we need to remove its arena_object from usable_arenas.
1791 */
1792 assert(usable_arenas->nfreepools > 0);
1793 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1794 /* It's the last of this size, so there won't be any. */
1795 nfp2lasta[usable_arenas->nfreepools] = NULL;
1796 }
1797 /* If any free pools will remain, it will be the new smallest. */
1798 if (usable_arenas->nfreepools > 1) {
1799 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1800 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1801 }
1802
1803 /* Try to get a cached free pool. */
1804 poolp pool = usable_arenas->freepools;
1805 if (LIKELY(pool != NULL)) {
1806 /* Unlink from cached pools. */
1807 usable_arenas->freepools = pool->nextpool;
1808 usable_arenas->nfreepools--;
1809 if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1810 /* Wholly allocated: remove. */
1811 assert(usable_arenas->freepools == NULL);
1812 assert(usable_arenas->nextarena == NULL ||
1813 usable_arenas->nextarena->prevarena ==
1814 usable_arenas);
1815 usable_arenas = usable_arenas->nextarena;
1816 if (usable_arenas != NULL) {
1817 usable_arenas->prevarena = NULL;
1818 assert(usable_arenas->address != 0);
1819 }
1820 }
1821 else {
1822 /* nfreepools > 0: it must be that freepools
1823 * isn't NULL, or that we haven't yet carved
1824 * off all the arena's pools for the first
1825 * time.
1826 */
1827 assert(usable_arenas->freepools != NULL ||
1828 usable_arenas->pool_address <=
1829 (block*)usable_arenas->address +
1830 ARENA_SIZE - POOL_SIZE);
1831 }
1832 }
1833 else {
1834 /* Carve off a new pool. */
1835 assert(usable_arenas->nfreepools > 0);
1836 assert(usable_arenas->freepools == NULL);
1837 pool = (poolp)usable_arenas->pool_address;
1838 assert((block*)pool <= (block*)usable_arenas->address +
1839 ARENA_SIZE - POOL_SIZE);
1840 pool->arenaindex = (uint)(usable_arenas - arenas);
1841 assert(&arenas[pool->arenaindex] == usable_arenas);
1842 pool->szidx = DUMMY_SIZE_IDX;
1843 usable_arenas->pool_address += POOL_SIZE;
1844 --usable_arenas->nfreepools;
1845
1846 if (usable_arenas->nfreepools == 0) {
1847 assert(usable_arenas->nextarena == NULL ||
1848 usable_arenas->nextarena->prevarena ==
1849 usable_arenas);
1850 /* Unlink the arena: it is completely allocated. */
1851 usable_arenas = usable_arenas->nextarena;
1852 if (usable_arenas != NULL) {
1853 usable_arenas->prevarena = NULL;
1854 assert(usable_arenas->address != 0);
1855 }
1856 }
1857 }
1858
1859 /* Frontlink to used pools. */
1860 block *bp;
1861 poolp next = usedpools[size + size]; /* == prev */
1862 pool->nextpool = next;
1863 pool->prevpool = next;
1864 next->nextpool = pool;
1865 next->prevpool = pool;
1866 pool->ref.count = 1;
1867 if (pool->szidx == size) {
1868 /* Luckily, this pool last contained blocks
1869 * of the same size class, so its header
1870 * and free list are already initialized.
1871 */
1872 bp = pool->freeblock;
1873 assert(bp != NULL);
1874 pool->freeblock = *(block **)bp;
1875 return bp;
1876 }
1877 /*
1878 * Initialize the pool header, set up the free list to
1879 * contain just the second block, and return the first
1880 * block.
1881 */
1882 pool->szidx = size;
1883 size = INDEX2SIZE(size);
1884 bp = (block *)pool + POOL_OVERHEAD;
1885 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1886 pool->maxnextoffset = POOL_SIZE - size;
1887 pool->freeblock = bp + size;
1888 *(block **)(pool->freeblock) = NULL;
1889 return bp;
1890 }
1891
1892 /* pymalloc allocator
1893
1894 Return a pointer to newly allocated memory if pymalloc allocated memory.
1895
1896 Return NULL if pymalloc failed to allocate the memory block: on bigger
1897 requests, on error in the code below (as a last chance to serve the request)
1898 or when the max memory limit has been reached.
1899 */
1900 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1901 pymalloc_alloc(void *ctx, size_t nbytes)
1902 {
1903 #ifdef WITH_VALGRIND
1904 if (UNLIKELY(running_on_valgrind == -1)) {
1905 running_on_valgrind = RUNNING_ON_VALGRIND;
1906 }
1907 if (UNLIKELY(running_on_valgrind)) {
1908 return NULL;
1909 }
1910 #endif
1911
1912 if (UNLIKELY(nbytes == 0)) {
1913 return NULL;
1914 }
1915 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1916 return NULL;
1917 }
1918
1919 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1920 poolp pool = usedpools[size + size];
1921 block *bp;
1922
1923 if (LIKELY(pool != pool->nextpool)) {
1924 /*
1925 * There is a used pool for this size class.
1926 * Pick up the head block of its free list.
1927 */
1928 ++pool->ref.count;
1929 bp = pool->freeblock;
1930 assert(bp != NULL);
1931
1932 if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1933 // Reached the end of the free list, try to extend it.
1934 pymalloc_pool_extend(pool, size);
1935 }
1936 }
1937 else {
1938 /* There isn't a pool of the right size class immediately
1939 * available: use a free pool.
1940 */
1941 bp = allocate_from_new_pool(size);
1942 }
1943
1944 return (void *)bp;
1945 }
1946
1947
1948 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1949 _PyObject_Malloc(void *ctx, size_t nbytes)
1950 {
1951 void* ptr = pymalloc_alloc(ctx, nbytes);
1952 if (LIKELY(ptr != NULL)) {
1953 return ptr;
1954 }
1955
1956 ptr = PyMem_RawMalloc(nbytes);
1957 if (ptr != NULL) {
1958 raw_allocated_blocks++;
1959 }
1960 return ptr;
1961 }
1962
1963
1964 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1965 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1966 {
1967 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1968 size_t nbytes = nelem * elsize;
1969
1970 void* ptr = pymalloc_alloc(ctx, nbytes);
1971 if (LIKELY(ptr != NULL)) {
1972 memset(ptr, 0, nbytes);
1973 return ptr;
1974 }
1975
1976 ptr = PyMem_RawCalloc(nelem, elsize);
1977 if (ptr != NULL) {
1978 raw_allocated_blocks++;
1979 }
1980 return ptr;
1981 }
1982
1983
1984 static void
insert_to_usedpool(poolp pool)1985 insert_to_usedpool(poolp pool)
1986 {
1987 assert(pool->ref.count > 0); /* else the pool is empty */
1988
1989 uint size = pool->szidx;
1990 poolp next = usedpools[size + size];
1991 poolp prev = next->prevpool;
1992
1993 /* insert pool before next: prev <-> pool <-> next */
1994 pool->nextpool = next;
1995 pool->prevpool = prev;
1996 next->prevpool = pool;
1997 prev->nextpool = pool;
1998 }
1999
2000 static void
insert_to_freepool(poolp pool)2001 insert_to_freepool(poolp pool)
2002 {
2003 poolp next = pool->nextpool;
2004 poolp prev = pool->prevpool;
2005 next->prevpool = prev;
2006 prev->nextpool = next;
2007
2008 /* Link the pool to freepools. This is a singly-linked
2009 * list, and pool->prevpool isn't used there.
2010 */
2011 struct arena_object *ao = &arenas[pool->arenaindex];
2012 pool->nextpool = ao->freepools;
2013 ao->freepools = pool;
2014 uint nf = ao->nfreepools;
2015 /* If this is the rightmost arena with this number of free pools,
2016 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
2017 * are no arenas in usable_arenas with that value.
2018 */
2019 struct arena_object* lastnf = nfp2lasta[nf];
2020 assert((nf == 0 && lastnf == NULL) ||
2021 (nf > 0 &&
2022 lastnf != NULL &&
2023 lastnf->nfreepools == nf &&
2024 (lastnf->nextarena == NULL ||
2025 nf < lastnf->nextarena->nfreepools)));
2026 if (lastnf == ao) { /* it is the rightmost */
2027 struct arena_object* p = ao->prevarena;
2028 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2029 }
2030 ao->nfreepools = ++nf;
2031
2032 /* All the rest is arena management. We just freed
2033 * a pool, and there are 4 cases for arena mgmt:
2034 * 1. If all the pools are free, return the arena to
2035 * the system free(). Except if this is the last
2036 * arena in the list, keep it to avoid thrashing:
2037 * keeping one wholly free arena in the list avoids
2038 * pathological cases where a simple loop would
2039 * otherwise provoke needing to allocate and free an
2040 * arena on every iteration. See bpo-37257.
2041 * 2. If this is the only free pool in the arena,
2042 * add the arena back to the `usable_arenas` list.
2043 * 3. If the "next" arena has a smaller count of free
2044 * pools, we have to "slide this arena right" to
2045 * restore that usable_arenas is sorted in order of
2046 * nfreepools.
2047 * 4. Else there's nothing more to do.
2048 */
2049 if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2050 /* Case 1. First unlink ao from usable_arenas.
2051 */
2052 assert(ao->prevarena == NULL ||
2053 ao->prevarena->address != 0);
2054 assert(ao ->nextarena == NULL ||
2055 ao->nextarena->address != 0);
2056
2057 /* Fix the pointer in the prevarena, or the
2058 * usable_arenas pointer.
2059 */
2060 if (ao->prevarena == NULL) {
2061 usable_arenas = ao->nextarena;
2062 assert(usable_arenas == NULL ||
2063 usable_arenas->address != 0);
2064 }
2065 else {
2066 assert(ao->prevarena->nextarena == ao);
2067 ao->prevarena->nextarena =
2068 ao->nextarena;
2069 }
2070 /* Fix the pointer in the nextarena. */
2071 if (ao->nextarena != NULL) {
2072 assert(ao->nextarena->prevarena == ao);
2073 ao->nextarena->prevarena =
2074 ao->prevarena;
2075 }
2076 /* Record that this arena_object slot is
2077 * available to be reused.
2078 */
2079 ao->nextarena = unused_arena_objects;
2080 unused_arena_objects = ao;
2081
2082 #if WITH_PYMALLOC_RADIX_TREE
2083 /* mark arena region as not under control of obmalloc */
2084 arena_map_mark_used(ao->address, 0);
2085 #endif
2086
2087 /* Free the entire arena. */
2088 _PyObject_Arena.free(_PyObject_Arena.ctx,
2089 (void *)ao->address, ARENA_SIZE);
2090 ao->address = 0; /* mark unassociated */
2091 --narenas_currently_allocated;
2092
2093 return;
2094 }
2095
2096 if (nf == 1) {
2097 /* Case 2. Put ao at the head of
2098 * usable_arenas. Note that because
2099 * ao->nfreepools was 0 before, ao isn't
2100 * currently on the usable_arenas list.
2101 */
2102 ao->nextarena = usable_arenas;
2103 ao->prevarena = NULL;
2104 if (usable_arenas)
2105 usable_arenas->prevarena = ao;
2106 usable_arenas = ao;
2107 assert(usable_arenas->address != 0);
2108 if (nfp2lasta[1] == NULL) {
2109 nfp2lasta[1] = ao;
2110 }
2111
2112 return;
2113 }
2114
2115 /* If this arena is now out of order, we need to keep
2116 * the list sorted. The list is kept sorted so that
2117 * the "most full" arenas are used first, which allows
2118 * the nearly empty arenas to be completely freed. In
2119 * a few un-scientific tests, it seems like this
2120 * approach allowed a lot more memory to be freed.
2121 */
2122 /* If this is the only arena with nf, record that. */
2123 if (nfp2lasta[nf] == NULL) {
2124 nfp2lasta[nf] = ao;
2125 } /* else the rightmost with nf doesn't change */
2126 /* If this was the rightmost of the old size, it remains in place. */
2127 if (ao == lastnf) {
2128 /* Case 4. Nothing to do. */
2129 return;
2130 }
2131 /* If ao were the only arena in the list, the last block would have
2132 * gotten us out.
2133 */
2134 assert(ao->nextarena != NULL);
2135
2136 /* Case 3: We have to move the arena towards the end of the list,
2137 * because it has more free pools than the arena to its right. It needs
2138 * to move to follow lastnf.
2139 * First unlink ao from usable_arenas.
2140 */
2141 if (ao->prevarena != NULL) {
2142 /* ao isn't at the head of the list */
2143 assert(ao->prevarena->nextarena == ao);
2144 ao->prevarena->nextarena = ao->nextarena;
2145 }
2146 else {
2147 /* ao is at the head of the list */
2148 assert(usable_arenas == ao);
2149 usable_arenas = ao->nextarena;
2150 }
2151 ao->nextarena->prevarena = ao->prevarena;
2152 /* And insert after lastnf. */
2153 ao->prevarena = lastnf;
2154 ao->nextarena = lastnf->nextarena;
2155 if (ao->nextarena != NULL) {
2156 ao->nextarena->prevarena = ao;
2157 }
2158 lastnf->nextarena = ao;
2159 /* Verify that the swaps worked. */
2160 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2161 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2162 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2163 assert((usable_arenas == ao && ao->prevarena == NULL)
2164 || ao->prevarena->nextarena == ao);
2165 }
2166
2167 /* Free a memory block allocated by pymalloc_alloc().
2168 Return 1 if it was freed.
2169 Return 0 if the block was not allocated by pymalloc_alloc(). */
2170 static inline int
pymalloc_free(void * ctx,void * p)2171 pymalloc_free(void *ctx, void *p)
2172 {
2173 assert(p != NULL);
2174
2175 #ifdef WITH_VALGRIND
2176 if (UNLIKELY(running_on_valgrind > 0)) {
2177 return 0;
2178 }
2179 #endif
2180
2181 poolp pool = POOL_ADDR(p);
2182 if (UNLIKELY(!address_in_range(p, pool))) {
2183 return 0;
2184 }
2185 /* We allocated this address. */
2186
2187 /* Link p to the start of the pool's freeblock list. Since
2188 * the pool had at least the p block outstanding, the pool
2189 * wasn't empty (so it's already in a usedpools[] list, or
2190 * was full and is in no list -- it's not in the freeblocks
2191 * list in any case).
2192 */
2193 assert(pool->ref.count > 0); /* else it was empty */
2194 block *lastfree = pool->freeblock;
2195 *(block **)p = lastfree;
2196 pool->freeblock = (block *)p;
2197 pool->ref.count--;
2198
2199 if (UNLIKELY(lastfree == NULL)) {
2200 /* Pool was full, so doesn't currently live in any list:
2201 * link it to the front of the appropriate usedpools[] list.
2202 * This mimics LRU pool usage for new allocations and
2203 * targets optimal filling when several pools contain
2204 * blocks of the same size class.
2205 */
2206 insert_to_usedpool(pool);
2207 return 1;
2208 }
2209
2210 /* freeblock wasn't NULL, so the pool wasn't full,
2211 * and the pool is in a usedpools[] list.
2212 */
2213 if (LIKELY(pool->ref.count != 0)) {
2214 /* pool isn't empty: leave it in usedpools */
2215 return 1;
2216 }
2217
2218 /* Pool is now empty: unlink from usedpools, and
2219 * link to the front of freepools. This ensures that
2220 * previously freed pools will be allocated later
2221 * (being not referenced, they are perhaps paged out).
2222 */
2223 insert_to_freepool(pool);
2224 return 1;
2225 }
2226
2227
2228 static void
_PyObject_Free(void * ctx,void * p)2229 _PyObject_Free(void *ctx, void *p)
2230 {
2231 /* PyObject_Free(NULL) has no effect */
2232 if (p == NULL) {
2233 return;
2234 }
2235
2236 if (UNLIKELY(!pymalloc_free(ctx, p))) {
2237 /* pymalloc didn't allocate this address */
2238 PyMem_RawFree(p);
2239 raw_allocated_blocks--;
2240 }
2241 }
2242
2243
2244 /* pymalloc realloc.
2245
2246 If nbytes==0, then as the Python docs promise, we do not treat this like
2247 free(p), and return a non-NULL result.
2248
2249 Return 1 if pymalloc reallocated memory and wrote the new pointer into
2250 newptr_p.
2251
2252 Return 0 if pymalloc didn't allocated p. */
2253 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2254 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2255 {
2256 void *bp;
2257 poolp pool;
2258 size_t size;
2259
2260 assert(p != NULL);
2261
2262 #ifdef WITH_VALGRIND
2263 /* Treat running_on_valgrind == -1 the same as 0 */
2264 if (UNLIKELY(running_on_valgrind > 0)) {
2265 return 0;
2266 }
2267 #endif
2268
2269 pool = POOL_ADDR(p);
2270 if (!address_in_range(p, pool)) {
2271 /* pymalloc is not managing this block.
2272
2273 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2274 over this block. However, if we do, we need to copy the valid data
2275 from the C-managed block to one of our blocks, and there's no
2276 portable way to know how much of the memory space starting at p is
2277 valid.
2278
2279 As bug 1185883 pointed out the hard way, it's possible that the
2280 C-managed block is "at the end" of allocated VM space, so that a
2281 memory fault can occur if we try to copy nbytes bytes starting at p.
2282 Instead we punt: let C continue to manage this block. */
2283 return 0;
2284 }
2285
2286 /* pymalloc is in charge of this block */
2287 size = INDEX2SIZE(pool->szidx);
2288 if (nbytes <= size) {
2289 /* The block is staying the same or shrinking.
2290
2291 If it's shrinking, there's a tradeoff: it costs cycles to copy the
2292 block to a smaller size class, but it wastes memory not to copy it.
2293
2294 The compromise here is to copy on shrink only if at least 25% of
2295 size can be shaved off. */
2296 if (4 * nbytes > 3 * size) {
2297 /* It's the same, or shrinking and new/old > 3/4. */
2298 *newptr_p = p;
2299 return 1;
2300 }
2301 size = nbytes;
2302 }
2303
2304 bp = _PyObject_Malloc(ctx, nbytes);
2305 if (bp != NULL) {
2306 memcpy(bp, p, size);
2307 _PyObject_Free(ctx, p);
2308 }
2309 *newptr_p = bp;
2310 return 1;
2311 }
2312
2313
2314 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2315 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2316 {
2317 void *ptr2;
2318
2319 if (ptr == NULL) {
2320 return _PyObject_Malloc(ctx, nbytes);
2321 }
2322
2323 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2324 return ptr2;
2325 }
2326
2327 return PyMem_RawRealloc(ptr, nbytes);
2328 }
2329
2330 #else /* ! WITH_PYMALLOC */
2331
2332 /*==========================================================================*/
2333 /* pymalloc not enabled: Redirect the entry points to malloc. These will
2334 * only be used by extensions that are compiled with pymalloc enabled. */
2335
2336 Py_ssize_t
_Py_GetAllocatedBlocks(void)2337 _Py_GetAllocatedBlocks(void)
2338 {
2339 return 0;
2340 }
2341
2342 #endif /* WITH_PYMALLOC */
2343
2344
2345 /*==========================================================================*/
2346 /* A x-platform debugging allocator. This doesn't manage memory directly,
2347 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2348 */
2349
2350 /* Uncomment this define to add the "serialno" field */
2351 /* #define PYMEM_DEBUG_SERIALNO */
2352
2353 #ifdef PYMEM_DEBUG_SERIALNO
2354 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
2355
2356 /* serialno is always incremented via calling this routine. The point is
2357 * to supply a single place to set a breakpoint.
2358 */
2359 static void
bumpserialno(void)2360 bumpserialno(void)
2361 {
2362 ++serialno;
2363 }
2364 #endif
2365
2366 #define SST SIZEOF_SIZE_T
2367
2368 #ifdef PYMEM_DEBUG_SERIALNO
2369 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2370 #else
2371 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2372 #endif
2373
2374 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2375 static size_t
read_size_t(const void * p)2376 read_size_t(const void *p)
2377 {
2378 const uint8_t *q = (const uint8_t *)p;
2379 size_t result = *q++;
2380 int i;
2381
2382 for (i = SST; --i > 0; ++q)
2383 result = (result << 8) | *q;
2384 return result;
2385 }
2386
2387 /* Write n as a big-endian size_t, MSB at address p, LSB at
2388 * p + sizeof(size_t) - 1.
2389 */
2390 static void
write_size_t(void * p,size_t n)2391 write_size_t(void *p, size_t n)
2392 {
2393 uint8_t *q = (uint8_t *)p + SST - 1;
2394 int i;
2395
2396 for (i = SST; --i >= 0; --q) {
2397 *q = (uint8_t)(n & 0xff);
2398 n >>= 8;
2399 }
2400 }
2401
2402 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2403 fills them with useful stuff, here calling the underlying malloc's result p:
2404
2405 p[0: S]
2406 Number of bytes originally asked for. This is a size_t, big-endian (easier
2407 to read in a memory dump).
2408 p[S]
2409 API ID. See PEP 445. This is a character, but seems undocumented.
2410 p[S+1: 2*S]
2411 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2412 p[2*S: 2*S+n]
2413 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2414 Used to catch reference to uninitialized memory.
2415 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2416 handled the request itself.
2417 p[2*S+n: 2*S+n+S]
2418 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2419 p[2*S+n+S: 2*S+n+2*S]
2420 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2421 and _PyMem_DebugRealloc.
2422 This is a big-endian size_t.
2423 If "bad memory" is detected later, the serial number gives an
2424 excellent way to set a breakpoint on the next run, to capture the
2425 instant at which this block was passed out.
2426
2427 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2428 for 3 * S extra bytes, and omits the last serialno field.
2429 */
2430
2431 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2432 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2433 {
2434 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2435 uint8_t *p; /* base address of malloc'ed pad block */
2436 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2437 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2438 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2439
2440 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2441 /* integer overflow: can't represent total as a Py_ssize_t */
2442 return NULL;
2443 }
2444 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2445
2446 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2447 ^--- p ^--- data ^--- tail
2448 S: nbytes stored as size_t
2449 I: API identifier (1 byte)
2450 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2451 C: Clean bytes used later to store actual data
2452 N: Serial number stored as size_t
2453
2454 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2455 is omitted. */
2456
2457 if (use_calloc) {
2458 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2459 }
2460 else {
2461 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2462 }
2463 if (p == NULL) {
2464 return NULL;
2465 }
2466 data = p + 2*SST;
2467
2468 #ifdef PYMEM_DEBUG_SERIALNO
2469 bumpserialno();
2470 #endif
2471
2472 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2473 write_size_t(p, nbytes);
2474 p[SST] = (uint8_t)api->api_id;
2475 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2476
2477 if (nbytes > 0 && !use_calloc) {
2478 memset(data, PYMEM_CLEANBYTE, nbytes);
2479 }
2480
2481 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2482 tail = data + nbytes;
2483 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2484 #ifdef PYMEM_DEBUG_SERIALNO
2485 write_size_t(tail + SST, serialno);
2486 #endif
2487
2488 return data;
2489 }
2490
2491 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2492 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2493 {
2494 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2495 }
2496
2497 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2498 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2499 {
2500 size_t nbytes;
2501 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2502 nbytes = nelem * elsize;
2503 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2504 }
2505
2506
2507 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2508 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2509 Then fills the original bytes with PYMEM_DEADBYTE.
2510 Then calls the underlying free.
2511 */
2512 static void
_PyMem_DebugRawFree(void * ctx,void * p)2513 _PyMem_DebugRawFree(void *ctx, void *p)
2514 {
2515 /* PyMem_Free(NULL) has no effect */
2516 if (p == NULL) {
2517 return;
2518 }
2519
2520 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2521 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2522 size_t nbytes;
2523
2524 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2525 nbytes = read_size_t(q);
2526 nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2527 memset(q, PYMEM_DEADBYTE, nbytes);
2528 api->alloc.free(api->alloc.ctx, q);
2529 }
2530
2531
2532 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2533 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2534 {
2535 if (p == NULL) {
2536 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2537 }
2538
2539 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2540 uint8_t *head; /* base address of malloc'ed pad block */
2541 uint8_t *data; /* pointer to data bytes */
2542 uint8_t *r;
2543 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2544 size_t total; /* 2 * SST + nbytes + 2 * SST */
2545 size_t original_nbytes;
2546 #define ERASED_SIZE 64
2547 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2548
2549 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2550
2551 data = (uint8_t *)p;
2552 head = data - 2*SST;
2553 original_nbytes = read_size_t(head);
2554 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2555 /* integer overflow: can't represent total as a Py_ssize_t */
2556 return NULL;
2557 }
2558 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2559
2560 tail = data + original_nbytes;
2561 #ifdef PYMEM_DEBUG_SERIALNO
2562 size_t block_serialno = read_size_t(tail + SST);
2563 #endif
2564 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2565 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2566 */
2567 if (original_nbytes <= sizeof(save)) {
2568 memcpy(save, data, original_nbytes);
2569 memset(data - 2 * SST, PYMEM_DEADBYTE,
2570 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2571 }
2572 else {
2573 memcpy(save, data, ERASED_SIZE);
2574 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2575 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2576 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2577 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2578 }
2579
2580 /* Resize and add decorations. */
2581 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2582 if (r == NULL) {
2583 /* if realloc() failed: rewrite header and footer which have
2584 just been erased */
2585 nbytes = original_nbytes;
2586 }
2587 else {
2588 head = r;
2589 #ifdef PYMEM_DEBUG_SERIALNO
2590 bumpserialno();
2591 block_serialno = serialno;
2592 #endif
2593 }
2594 data = head + 2*SST;
2595
2596 write_size_t(head, nbytes);
2597 head[SST] = (uint8_t)api->api_id;
2598 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2599
2600 tail = data + nbytes;
2601 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2602 #ifdef PYMEM_DEBUG_SERIALNO
2603 write_size_t(tail + SST, block_serialno);
2604 #endif
2605
2606 /* Restore saved bytes. */
2607 if (original_nbytes <= sizeof(save)) {
2608 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2609 }
2610 else {
2611 size_t i = original_nbytes - ERASED_SIZE;
2612 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2613 if (nbytes > i) {
2614 memcpy(data + i, &save[ERASED_SIZE],
2615 Py_MIN(nbytes - i, ERASED_SIZE));
2616 }
2617 }
2618
2619 if (r == NULL) {
2620 return NULL;
2621 }
2622
2623 if (nbytes > original_nbytes) {
2624 /* growing: mark new extra memory clean */
2625 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2626 nbytes - original_nbytes);
2627 }
2628
2629 return data;
2630 }
2631
2632 static inline void
_PyMem_DebugCheckGIL(const char * func)2633 _PyMem_DebugCheckGIL(const char *func)
2634 {
2635 if (!PyGILState_Check()) {
2636 _Py_FatalErrorFunc(func,
2637 "Python memory allocator called "
2638 "without holding the GIL");
2639 }
2640 }
2641
2642 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2643 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2644 {
2645 _PyMem_DebugCheckGIL(__func__);
2646 return _PyMem_DebugRawMalloc(ctx, nbytes);
2647 }
2648
2649 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2650 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2651 {
2652 _PyMem_DebugCheckGIL(__func__);
2653 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2654 }
2655
2656
2657 static void
_PyMem_DebugFree(void * ctx,void * ptr)2658 _PyMem_DebugFree(void *ctx, void *ptr)
2659 {
2660 _PyMem_DebugCheckGIL(__func__);
2661 _PyMem_DebugRawFree(ctx, ptr);
2662 }
2663
2664
2665 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2666 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2667 {
2668 _PyMem_DebugCheckGIL(__func__);
2669 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2670 }
2671
2672 /* Check the forbidden bytes on both ends of the memory allocated for p.
2673 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2674 * and call Py_FatalError to kill the program.
2675 * The API id, is also checked.
2676 */
2677 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2678 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2679 {
2680 assert(p != NULL);
2681
2682 const uint8_t *q = (const uint8_t *)p;
2683 size_t nbytes;
2684 const uint8_t *tail;
2685 int i;
2686 char id;
2687
2688 /* Check the API id */
2689 id = (char)q[-SST];
2690 if (id != api) {
2691 _PyObject_DebugDumpAddress(p);
2692 _Py_FatalErrorFormat(func,
2693 "bad ID: Allocated using API '%c', "
2694 "verified using API '%c'",
2695 id, api);
2696 }
2697
2698 /* Check the stuff at the start of p first: if there's underwrite
2699 * corruption, the number-of-bytes field may be nuts, and checking
2700 * the tail could lead to a segfault then.
2701 */
2702 for (i = SST-1; i >= 1; --i) {
2703 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2704 _PyObject_DebugDumpAddress(p);
2705 _Py_FatalErrorFunc(func, "bad leading pad byte");
2706 }
2707 }
2708
2709 nbytes = read_size_t(q - 2*SST);
2710 tail = q + nbytes;
2711 for (i = 0; i < SST; ++i) {
2712 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2713 _PyObject_DebugDumpAddress(p);
2714 _Py_FatalErrorFunc(func, "bad trailing pad byte");
2715 }
2716 }
2717 }
2718
2719 /* Display info to stderr about the memory block at p. */
2720 static void
_PyObject_DebugDumpAddress(const void * p)2721 _PyObject_DebugDumpAddress(const void *p)
2722 {
2723 const uint8_t *q = (const uint8_t *)p;
2724 const uint8_t *tail;
2725 size_t nbytes;
2726 int i;
2727 int ok;
2728 char id;
2729
2730 fprintf(stderr, "Debug memory block at address p=%p:", p);
2731 if (p == NULL) {
2732 fprintf(stderr, "\n");
2733 return;
2734 }
2735 id = (char)q[-SST];
2736 fprintf(stderr, " API '%c'\n", id);
2737
2738 nbytes = read_size_t(q - 2*SST);
2739 fprintf(stderr, " %zu bytes originally requested\n", nbytes);
2740
2741 /* In case this is nuts, check the leading pad bytes first. */
2742 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2743 ok = 1;
2744 for (i = 1; i <= SST-1; ++i) {
2745 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2746 ok = 0;
2747 break;
2748 }
2749 }
2750 if (ok)
2751 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2752 else {
2753 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2754 PYMEM_FORBIDDENBYTE);
2755 for (i = SST-1; i >= 1; --i) {
2756 const uint8_t byte = *(q-i);
2757 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2758 if (byte != PYMEM_FORBIDDENBYTE)
2759 fputs(" *** OUCH", stderr);
2760 fputc('\n', stderr);
2761 }
2762
2763 fputs(" Because memory is corrupted at the start, the "
2764 "count of bytes requested\n"
2765 " may be bogus, and checking the trailing pad "
2766 "bytes may segfault.\n", stderr);
2767 }
2768
2769 tail = q + nbytes;
2770 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
2771 ok = 1;
2772 for (i = 0; i < SST; ++i) {
2773 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2774 ok = 0;
2775 break;
2776 }
2777 }
2778 if (ok)
2779 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2780 else {
2781 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2782 PYMEM_FORBIDDENBYTE);
2783 for (i = 0; i < SST; ++i) {
2784 const uint8_t byte = tail[i];
2785 fprintf(stderr, " at tail+%d: 0x%02x",
2786 i, byte);
2787 if (byte != PYMEM_FORBIDDENBYTE)
2788 fputs(" *** OUCH", stderr);
2789 fputc('\n', stderr);
2790 }
2791 }
2792
2793 #ifdef PYMEM_DEBUG_SERIALNO
2794 size_t serial = read_size_t(tail + SST);
2795 fprintf(stderr,
2796 " The block was made by call #%zu to debug malloc/realloc.\n",
2797 serial);
2798 #endif
2799
2800 if (nbytes > 0) {
2801 i = 0;
2802 fputs(" Data at p:", stderr);
2803 /* print up to 8 bytes at the start */
2804 while (q < tail && i < 8) {
2805 fprintf(stderr, " %02x", *q);
2806 ++i;
2807 ++q;
2808 }
2809 /* and up to 8 at the end */
2810 if (q < tail) {
2811 if (tail - q > 8) {
2812 fputs(" ...", stderr);
2813 q = tail - 8;
2814 }
2815 while (q < tail) {
2816 fprintf(stderr, " %02x", *q);
2817 ++q;
2818 }
2819 }
2820 fputc('\n', stderr);
2821 }
2822 fputc('\n', stderr);
2823
2824 fflush(stderr);
2825 _PyMem_DumpTraceback(fileno(stderr), p);
2826 }
2827
2828
2829 static size_t
printone(FILE * out,const char * msg,size_t value)2830 printone(FILE *out, const char* msg, size_t value)
2831 {
2832 int i, k;
2833 char buf[100];
2834 size_t origvalue = value;
2835
2836 fputs(msg, out);
2837 for (i = (int)strlen(msg); i < 35; ++i)
2838 fputc(' ', out);
2839 fputc('=', out);
2840
2841 /* Write the value with commas. */
2842 i = 22;
2843 buf[i--] = '\0';
2844 buf[i--] = '\n';
2845 k = 3;
2846 do {
2847 size_t nextvalue = value / 10;
2848 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2849 value = nextvalue;
2850 buf[i--] = (char)(digit + '0');
2851 --k;
2852 if (k == 0 && value && i >= 0) {
2853 k = 3;
2854 buf[i--] = ',';
2855 }
2856 } while (value && i >= 0);
2857
2858 while (i >= 0)
2859 buf[i--] = ' ';
2860 fputs(buf, out);
2861
2862 return origvalue;
2863 }
2864
2865 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2866 _PyDebugAllocatorStats(FILE *out,
2867 const char *block_name, int num_blocks, size_t sizeof_block)
2868 {
2869 char buf1[128];
2870 char buf2[128];
2871 PyOS_snprintf(buf1, sizeof(buf1),
2872 "%d %ss * %zd bytes each",
2873 num_blocks, block_name, sizeof_block);
2874 PyOS_snprintf(buf2, sizeof(buf2),
2875 "%48s ", buf1);
2876 (void)printone(out, buf2, num_blocks * sizeof_block);
2877 }
2878
2879
2880 #ifdef WITH_PYMALLOC
2881
2882 #ifdef Py_DEBUG
2883 /* Is target in the list? The list is traversed via the nextpool pointers.
2884 * The list may be NULL-terminated, or circular. Return 1 if target is in
2885 * list, else 0.
2886 */
2887 static int
pool_is_in_list(const poolp target,poolp list)2888 pool_is_in_list(const poolp target, poolp list)
2889 {
2890 poolp origlist = list;
2891 assert(target != NULL);
2892 if (list == NULL)
2893 return 0;
2894 do {
2895 if (target == list)
2896 return 1;
2897 list = list->nextpool;
2898 } while (list != NULL && list != origlist);
2899 return 0;
2900 }
2901 #endif
2902
2903 /* Print summary info to "out" about the state of pymalloc's structures.
2904 * In Py_DEBUG mode, also perform some expensive internal consistency
2905 * checks.
2906 *
2907 * Return 0 if the memory debug hooks are not installed or no statistics was
2908 * written into out, return 1 otherwise.
2909 */
2910 int
_PyObject_DebugMallocStats(FILE * out)2911 _PyObject_DebugMallocStats(FILE *out)
2912 {
2913 if (!_PyMem_PymallocEnabled()) {
2914 return 0;
2915 }
2916
2917 uint i;
2918 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2919 /* # of pools, allocated blocks, and free blocks per class index */
2920 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2921 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2922 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2923 /* total # of allocated bytes in used and full pools */
2924 size_t allocated_bytes = 0;
2925 /* total # of available bytes in used pools */
2926 size_t available_bytes = 0;
2927 /* # of free pools + pools not yet carved out of current arena */
2928 uint numfreepools = 0;
2929 /* # of bytes for arena alignment padding */
2930 size_t arena_alignment = 0;
2931 /* # of bytes in used and full pools used for pool_headers */
2932 size_t pool_header_bytes = 0;
2933 /* # of bytes in used and full pools wasted due to quantization,
2934 * i.e. the necessarily leftover space at the ends of used and
2935 * full pools.
2936 */
2937 size_t quantization = 0;
2938 /* # of arenas actually allocated. */
2939 size_t narenas = 0;
2940 /* running total -- should equal narenas * ARENA_SIZE */
2941 size_t total;
2942 char buf[128];
2943
2944 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2945 SMALL_REQUEST_THRESHOLD, numclasses);
2946
2947 for (i = 0; i < numclasses; ++i)
2948 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2949
2950 /* Because full pools aren't linked to from anything, it's easiest
2951 * to march over all the arenas. If we're lucky, most of the memory
2952 * will be living in full pools -- would be a shame to miss them.
2953 */
2954 for (i = 0; i < maxarenas; ++i) {
2955 uint j;
2956 uintptr_t base = arenas[i].address;
2957
2958 /* Skip arenas which are not allocated. */
2959 if (arenas[i].address == (uintptr_t)NULL)
2960 continue;
2961 narenas += 1;
2962
2963 numfreepools += arenas[i].nfreepools;
2964
2965 /* round up to pool alignment */
2966 if (base & (uintptr_t)POOL_SIZE_MASK) {
2967 arena_alignment += POOL_SIZE;
2968 base &= ~(uintptr_t)POOL_SIZE_MASK;
2969 base += POOL_SIZE;
2970 }
2971
2972 /* visit every pool in the arena */
2973 assert(base <= (uintptr_t) arenas[i].pool_address);
2974 for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2975 ++j, base += POOL_SIZE) {
2976 poolp p = (poolp)base;
2977 const uint sz = p->szidx;
2978 uint freeblocks;
2979
2980 if (p->ref.count == 0) {
2981 /* currently unused */
2982 #ifdef Py_DEBUG
2983 assert(pool_is_in_list(p, arenas[i].freepools));
2984 #endif
2985 continue;
2986 }
2987 ++numpools[sz];
2988 numblocks[sz] += p->ref.count;
2989 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2990 numfreeblocks[sz] += freeblocks;
2991 #ifdef Py_DEBUG
2992 if (freeblocks > 0)
2993 assert(pool_is_in_list(p, usedpools[sz + sz]));
2994 #endif
2995 }
2996 }
2997 assert(narenas == narenas_currently_allocated);
2998
2999 fputc('\n', out);
3000 fputs("class size num pools blocks in use avail blocks\n"
3001 "----- ---- --------- ------------- ------------\n",
3002 out);
3003
3004 for (i = 0; i < numclasses; ++i) {
3005 size_t p = numpools[i];
3006 size_t b = numblocks[i];
3007 size_t f = numfreeblocks[i];
3008 uint size = INDEX2SIZE(i);
3009 if (p == 0) {
3010 assert(b == 0 && f == 0);
3011 continue;
3012 }
3013 fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3014 i, size, p, b, f);
3015 allocated_bytes += b * size;
3016 available_bytes += f * size;
3017 pool_header_bytes += p * POOL_OVERHEAD;
3018 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3019 }
3020 fputc('\n', out);
3021 #ifdef PYMEM_DEBUG_SERIALNO
3022 if (_PyMem_DebugEnabled()) {
3023 (void)printone(out, "# times object malloc called", serialno);
3024 }
3025 #endif
3026 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3027 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3028 (void)printone(out, "# arenas highwater mark", narenas_highwater);
3029 (void)printone(out, "# arenas allocated current", narenas);
3030
3031 PyOS_snprintf(buf, sizeof(buf),
3032 "%zu arenas * %d bytes/arena",
3033 narenas, ARENA_SIZE);
3034 (void)printone(out, buf, narenas * ARENA_SIZE);
3035
3036 fputc('\n', out);
3037
3038 /* Account for what all of those arena bytes are being used for. */
3039 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3040 total += printone(out, "# bytes in available blocks", available_bytes);
3041
3042 PyOS_snprintf(buf, sizeof(buf),
3043 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3044 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3045
3046 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3047 total += printone(out, "# bytes lost to quantization", quantization);
3048 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3049 (void)printone(out, "Total", total);
3050 assert(narenas * ARENA_SIZE == total);
3051
3052 #if WITH_PYMALLOC_RADIX_TREE
3053 fputs("\narena map counts\n", out);
3054 #ifdef USE_INTERIOR_NODES
3055 (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3056 (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3057 fputc('\n', out);
3058 #endif
3059 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3060 #ifdef USE_INTERIOR_NODES
3061 total += printone(out, "# bytes lost to arena map mid",
3062 sizeof(arena_map_mid_t) * arena_map_mid_count);
3063 total += printone(out, "# bytes lost to arena map bot",
3064 sizeof(arena_map_bot_t) * arena_map_bot_count);
3065 (void)printone(out, "Total", total);
3066 #endif
3067 #endif
3068
3069 return 1;
3070 }
3071
3072 #endif /* #ifdef WITH_PYMALLOC */
3073