1 #include "Python.h"
2
3 #include <stdbool.h>
4
5
6 /* Defined in tracemalloc.c */
7 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
8
9
10 /* Python's malloc wrappers (see pymem.h) */
11
12 #undef uint
13 #define uint unsigned int /* assuming >= 16 bits */
14
15 /* Forward declaration */
16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
19 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
20
21 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
24 static void _PyMem_DebugFree(void *ctx, void *p);
25
26 static void _PyObject_DebugDumpAddress(const void *p);
27 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
28
29 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
30
31 #if defined(__has_feature) /* Clang */
32 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
33 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
34 __attribute__((no_address_safety_analysis))
35 # endif
36 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
37 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
38 # endif
39 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
40 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
41 # endif
42 #elif defined(__GNUC__)
43 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
44 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
45 __attribute__((no_address_safety_analysis))
46 # endif
47 // TSAN is supported since GCC 4.8, but __SANITIZE_THREAD__ macro
48 // is provided only since GCC 7.
49 # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
50 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
51 # endif
52 #endif
53
54 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS
55 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS
56 #endif
57 #ifndef _Py_NO_SANITIZE_THREAD
58 # define _Py_NO_SANITIZE_THREAD
59 #endif
60 #ifndef _Py_NO_SANITIZE_MEMORY
61 # define _Py_NO_SANITIZE_MEMORY
62 #endif
63
64 #ifdef WITH_PYMALLOC
65
66 #ifdef MS_WINDOWS
67 # include <windows.h>
68 #elif defined(HAVE_MMAP)
69 # include <sys/mman.h>
70 # ifdef MAP_ANONYMOUS
71 # define ARENAS_USE_MMAP
72 # endif
73 #endif
74
75 /* Forward declaration */
76 static void* _PyObject_Malloc(void *ctx, size_t size);
77 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
78 static void _PyObject_Free(void *ctx, void *p);
79 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
80 #endif
81
82
83 static void *
_PyMem_RawMalloc(void * ctx,size_t size)84 _PyMem_RawMalloc(void *ctx, size_t size)
85 {
86 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
87 for malloc(0), which would be treated as an error. Some platforms would
88 return a pointer with no memory behind it, which would break pymalloc.
89 To solve these problems, allocate an extra byte. */
90 if (size == 0)
91 size = 1;
92 return malloc(size);
93 }
94
95 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)96 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
97 {
98 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
99 for calloc(0, 0), which would be treated as an error. Some platforms
100 would return a pointer with no memory behind it, which would break
101 pymalloc. To solve these problems, allocate an extra byte. */
102 if (nelem == 0 || elsize == 0) {
103 nelem = 1;
104 elsize = 1;
105 }
106 return calloc(nelem, elsize);
107 }
108
109 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)110 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
111 {
112 if (size == 0)
113 size = 1;
114 return realloc(ptr, size);
115 }
116
117 static void
_PyMem_RawFree(void * ctx,void * ptr)118 _PyMem_RawFree(void *ctx, void *ptr)
119 {
120 free(ptr);
121 }
122
123
124 #ifdef MS_WINDOWS
125 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)126 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
127 {
128 return VirtualAlloc(NULL, size,
129 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
130 }
131
132 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)133 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
134 {
135 VirtualFree(ptr, 0, MEM_RELEASE);
136 }
137
138 #elif defined(ARENAS_USE_MMAP)
139 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)140 _PyObject_ArenaMmap(void *ctx, size_t size)
141 {
142 void *ptr;
143 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
144 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
145 if (ptr == MAP_FAILED)
146 return NULL;
147 assert(ptr != NULL);
148 return ptr;
149 }
150
151 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)152 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
153 {
154 munmap(ptr, size);
155 }
156
157 #else
158 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)159 _PyObject_ArenaMalloc(void *ctx, size_t size)
160 {
161 return malloc(size);
162 }
163
164 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)165 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
166 {
167 free(ptr);
168 }
169 #endif
170
171 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
172 #ifdef WITH_PYMALLOC
173 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
174 #endif
175
176 #define PYRAW_ALLOC MALLOC_ALLOC
177 #ifdef WITH_PYMALLOC
178 # define PYOBJ_ALLOC PYMALLOC_ALLOC
179 #else
180 # define PYOBJ_ALLOC MALLOC_ALLOC
181 #endif
182 #define PYMEM_ALLOC PYOBJ_ALLOC
183
184 typedef struct {
185 /* We tag each block with an API ID in order to tag API violations */
186 char api_id;
187 PyMemAllocatorEx alloc;
188 } debug_alloc_api_t;
189 static struct {
190 debug_alloc_api_t raw;
191 debug_alloc_api_t mem;
192 debug_alloc_api_t obj;
193 } _PyMem_Debug = {
194 {'r', PYRAW_ALLOC},
195 {'m', PYMEM_ALLOC},
196 {'o', PYOBJ_ALLOC}
197 };
198
199 #define PYDBGRAW_ALLOC \
200 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
201 #define PYDBGMEM_ALLOC \
202 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
203 #define PYDBGOBJ_ALLOC \
204 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
205
206 #ifdef Py_DEBUG
207 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
208 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
209 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
210 #else
211 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
212 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
213 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
214 #endif
215
216
217 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)218 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
219 PyMemAllocatorEx *old_alloc)
220 {
221 if (old_alloc != NULL) {
222 PyMem_GetAllocator(domain, old_alloc);
223 }
224
225
226 PyMemAllocatorEx new_alloc;
227 switch(domain)
228 {
229 case PYMEM_DOMAIN_RAW:
230 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
231 break;
232 case PYMEM_DOMAIN_MEM:
233 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
234 break;
235 case PYMEM_DOMAIN_OBJ:
236 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
237 break;
238 default:
239 /* unknown domain */
240 return -1;
241 }
242 PyMem_SetAllocator(domain, &new_alloc);
243 if (debug) {
244 _PyMem_SetupDebugHooksDomain(domain);
245 }
246 return 0;
247 }
248
249
250 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)251 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
252 PyMemAllocatorEx *old_alloc)
253 {
254 #ifdef Py_DEBUG
255 const int debug = 1;
256 #else
257 const int debug = 0;
258 #endif
259 return pymem_set_default_allocator(domain, debug, old_alloc);
260 }
261
262
263 int
_PyMem_SetupAllocators(const char * opt)264 _PyMem_SetupAllocators(const char *opt)
265 {
266 if (opt == NULL || *opt == '\0') {
267 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
268 options): use default memory allocators */
269 opt = "default";
270 }
271
272 if (strcmp(opt, "default") == 0) {
273 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
274 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
275 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
276 }
277 else if (strcmp(opt, "debug") == 0) {
278 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
279 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
280 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
281 }
282 #ifdef WITH_PYMALLOC
283 else if (strcmp(opt, "pymalloc") == 0 || strcmp(opt, "pymalloc_debug") == 0) {
284 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
285 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
286
287 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
288 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
289 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
290
291 if (strcmp(opt, "pymalloc_debug") == 0) {
292 PyMem_SetupDebugHooks();
293 }
294 }
295 #endif
296 else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) {
297 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
298 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
299 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
300 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
301
302 if (strcmp(opt, "malloc_debug") == 0) {
303 PyMem_SetupDebugHooks();
304 }
305 }
306 else {
307 /* unknown allocator */
308 return -1;
309 }
310 return 0;
311 }
312
313
314 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)315 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
316 {
317 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
318 }
319
320
321 const char*
_PyMem_GetAllocatorsName(void)322 _PyMem_GetAllocatorsName(void)
323 {
324 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
325 #ifdef WITH_PYMALLOC
326 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
327 #endif
328
329 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
330 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
331 pymemallocator_eq(&_PyObject, &malloc_alloc))
332 {
333 return "malloc";
334 }
335 #ifdef WITH_PYMALLOC
336 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
337 pymemallocator_eq(&_PyMem, &pymalloc) &&
338 pymemallocator_eq(&_PyObject, &pymalloc))
339 {
340 return "pymalloc";
341 }
342 #endif
343
344 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
345 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
346 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
347
348 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
349 pymemallocator_eq(&_PyMem, &dbg_mem) &&
350 pymemallocator_eq(&_PyObject, &dbg_obj))
351 {
352 /* Debug hooks installed */
353 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
354 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
355 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
356 {
357 return "malloc_debug";
358 }
359 #ifdef WITH_PYMALLOC
360 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
361 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
362 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
363 {
364 return "pymalloc_debug";
365 }
366 #endif
367 }
368 return NULL;
369 }
370
371
372 #undef MALLOC_ALLOC
373 #undef PYMALLOC_ALLOC
374 #undef PYRAW_ALLOC
375 #undef PYMEM_ALLOC
376 #undef PYOBJ_ALLOC
377 #undef PYDBGRAW_ALLOC
378 #undef PYDBGMEM_ALLOC
379 #undef PYDBGOBJ_ALLOC
380
381
382 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
383 #ifdef MS_WINDOWS
384 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
385 #elif defined(ARENAS_USE_MMAP)
386 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
387 #else
388 _PyObject_ArenaMalloc, _PyObject_ArenaFree
389 #endif
390 };
391
392 #ifdef WITH_PYMALLOC
393 static int
_PyMem_DebugEnabled(void)394 _PyMem_DebugEnabled(void)
395 {
396 return (_PyObject.malloc == _PyMem_DebugMalloc);
397 }
398
399 static int
_PyMem_PymallocEnabled(void)400 _PyMem_PymallocEnabled(void)
401 {
402 if (_PyMem_DebugEnabled()) {
403 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
404 }
405 else {
406 return (_PyObject.malloc == _PyObject_Malloc);
407 }
408 }
409 #endif
410
411
412 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)413 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
414 {
415 PyMemAllocatorEx alloc;
416
417 if (domain == PYMEM_DOMAIN_RAW) {
418 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
419 return;
420 }
421
422 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
423 alloc.ctx = &_PyMem_Debug.raw;
424 alloc.malloc = _PyMem_DebugRawMalloc;
425 alloc.calloc = _PyMem_DebugRawCalloc;
426 alloc.realloc = _PyMem_DebugRawRealloc;
427 alloc.free = _PyMem_DebugRawFree;
428 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
429 }
430 else if (domain == PYMEM_DOMAIN_MEM) {
431 if (_PyMem.malloc == _PyMem_DebugMalloc) {
432 return;
433 }
434
435 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
436 alloc.ctx = &_PyMem_Debug.mem;
437 alloc.malloc = _PyMem_DebugMalloc;
438 alloc.calloc = _PyMem_DebugCalloc;
439 alloc.realloc = _PyMem_DebugRealloc;
440 alloc.free = _PyMem_DebugFree;
441 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
442 }
443 else if (domain == PYMEM_DOMAIN_OBJ) {
444 if (_PyObject.malloc == _PyMem_DebugMalloc) {
445 return;
446 }
447
448 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
449 alloc.ctx = &_PyMem_Debug.obj;
450 alloc.malloc = _PyMem_DebugMalloc;
451 alloc.calloc = _PyMem_DebugCalloc;
452 alloc.realloc = _PyMem_DebugRealloc;
453 alloc.free = _PyMem_DebugFree;
454 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
455 }
456 }
457
458
459 void
PyMem_SetupDebugHooks(void)460 PyMem_SetupDebugHooks(void)
461 {
462 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
463 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
464 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
465 }
466
467 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)468 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
469 {
470 switch(domain)
471 {
472 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
473 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
474 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
475 default:
476 /* unknown domain: set all attributes to NULL */
477 allocator->ctx = NULL;
478 allocator->malloc = NULL;
479 allocator->calloc = NULL;
480 allocator->realloc = NULL;
481 allocator->free = NULL;
482 }
483 }
484
485 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)486 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
487 {
488 switch(domain)
489 {
490 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
491 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
492 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
493 /* ignore unknown domain */
494 }
495 }
496
497 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)498 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
499 {
500 *allocator = _PyObject_Arena;
501 }
502
503 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)504 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
505 {
506 _PyObject_Arena = *allocator;
507 }
508
509 void *
PyMem_RawMalloc(size_t size)510 PyMem_RawMalloc(size_t size)
511 {
512 /*
513 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
514 * Most python internals blindly use a signed Py_ssize_t to track
515 * things without checking for overflows or negatives.
516 * As size_t is unsigned, checking for size < 0 is not required.
517 */
518 if (size > (size_t)PY_SSIZE_T_MAX)
519 return NULL;
520 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
521 }
522
523 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)524 PyMem_RawCalloc(size_t nelem, size_t elsize)
525 {
526 /* see PyMem_RawMalloc() */
527 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
528 return NULL;
529 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
530 }
531
532 void*
PyMem_RawRealloc(void * ptr,size_t new_size)533 PyMem_RawRealloc(void *ptr, size_t new_size)
534 {
535 /* see PyMem_RawMalloc() */
536 if (new_size > (size_t)PY_SSIZE_T_MAX)
537 return NULL;
538 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
539 }
540
PyMem_RawFree(void * ptr)541 void PyMem_RawFree(void *ptr)
542 {
543 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
544 }
545
546
547 void *
PyMem_Malloc(size_t size)548 PyMem_Malloc(size_t size)
549 {
550 /* see PyMem_RawMalloc() */
551 if (size > (size_t)PY_SSIZE_T_MAX)
552 return NULL;
553 return _PyMem.malloc(_PyMem.ctx, size);
554 }
555
556 void *
PyMem_Calloc(size_t nelem,size_t elsize)557 PyMem_Calloc(size_t nelem, size_t elsize)
558 {
559 /* see PyMem_RawMalloc() */
560 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
561 return NULL;
562 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
563 }
564
565 void *
PyMem_Realloc(void * ptr,size_t new_size)566 PyMem_Realloc(void *ptr, size_t new_size)
567 {
568 /* see PyMem_RawMalloc() */
569 if (new_size > (size_t)PY_SSIZE_T_MAX)
570 return NULL;
571 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
572 }
573
574 void
PyMem_Free(void * ptr)575 PyMem_Free(void *ptr)
576 {
577 _PyMem.free(_PyMem.ctx, ptr);
578 }
579
580
581 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)582 _PyMem_RawWcsdup(const wchar_t *str)
583 {
584 assert(str != NULL);
585
586 size_t len = wcslen(str);
587 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
588 return NULL;
589 }
590
591 size_t size = (len + 1) * sizeof(wchar_t);
592 wchar_t *str2 = PyMem_RawMalloc(size);
593 if (str2 == NULL) {
594 return NULL;
595 }
596
597 memcpy(str2, str, size);
598 return str2;
599 }
600
601 char *
_PyMem_RawStrdup(const char * str)602 _PyMem_RawStrdup(const char *str)
603 {
604 assert(str != NULL);
605 size_t size = strlen(str) + 1;
606 char *copy = PyMem_RawMalloc(size);
607 if (copy == NULL) {
608 return NULL;
609 }
610 memcpy(copy, str, size);
611 return copy;
612 }
613
614 char *
_PyMem_Strdup(const char * str)615 _PyMem_Strdup(const char *str)
616 {
617 assert(str != NULL);
618 size_t size = strlen(str) + 1;
619 char *copy = PyMem_Malloc(size);
620 if (copy == NULL) {
621 return NULL;
622 }
623 memcpy(copy, str, size);
624 return copy;
625 }
626
627 void *
PyObject_Malloc(size_t size)628 PyObject_Malloc(size_t size)
629 {
630 /* see PyMem_RawMalloc() */
631 if (size > (size_t)PY_SSIZE_T_MAX)
632 return NULL;
633 return _PyObject.malloc(_PyObject.ctx, size);
634 }
635
636 void *
PyObject_Calloc(size_t nelem,size_t elsize)637 PyObject_Calloc(size_t nelem, size_t elsize)
638 {
639 /* see PyMem_RawMalloc() */
640 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
641 return NULL;
642 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
643 }
644
645 void *
PyObject_Realloc(void * ptr,size_t new_size)646 PyObject_Realloc(void *ptr, size_t new_size)
647 {
648 /* see PyMem_RawMalloc() */
649 if (new_size > (size_t)PY_SSIZE_T_MAX)
650 return NULL;
651 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
652 }
653
654 void
PyObject_Free(void * ptr)655 PyObject_Free(void *ptr)
656 {
657 _PyObject.free(_PyObject.ctx, ptr);
658 }
659
660
661 #ifdef WITH_PYMALLOC
662
663 #ifdef WITH_VALGRIND
664 #include <valgrind/valgrind.h>
665
666 /* If we're using GCC, use __builtin_expect() to reduce overhead of
667 the valgrind checks */
668 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
669 # define UNLIKELY(value) __builtin_expect((value), 0)
670 #else
671 # define UNLIKELY(value) (value)
672 #endif
673
674 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
675 static int running_on_valgrind = -1;
676 #endif
677
678
679 /* An object allocator for Python.
680
681 Here is an introduction to the layers of the Python memory architecture,
682 showing where the object allocator is actually used (layer +2), It is
683 called for every object allocation and deallocation (PyObject_New/Del),
684 unless the object-specific allocators implement a proprietary allocation
685 scheme (ex.: ints use a simple free list). This is also the place where
686 the cyclic garbage collector operates selectively on container objects.
687
688
689 Object-specific allocators
690 _____ ______ ______ ________
691 [ int ] [ dict ] [ list ] ... [ string ] Python core |
692 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
693 _______________________________ | |
694 [ Python's object allocator ] | |
695 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
696 ______________________________________________________________ |
697 [ Python's raw memory allocator (PyMem_ API) ] |
698 +1 | <----- Python memory (under PyMem manager's control) ------> | |
699 __________________________________________________________________
700 [ Underlying general-purpose allocator (ex: C library malloc) ]
701 0 | <------ Virtual memory allocated for the python process -------> |
702
703 =========================================================================
704 _______________________________________________________________________
705 [ OS-specific Virtual Memory Manager (VMM) ]
706 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
707 __________________________________ __________________________________
708 [ ] [ ]
709 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
710
711 */
712 /*==========================================================================*/
713
714 /* A fast, special-purpose memory allocator for small blocks, to be used
715 on top of a general-purpose malloc -- heavily based on previous art. */
716
717 /* Vladimir Marangozov -- August 2000 */
718
719 /*
720 * "Memory management is where the rubber meets the road -- if we do the wrong
721 * thing at any level, the results will not be good. And if we don't make the
722 * levels work well together, we are in serious trouble." (1)
723 *
724 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
725 * "Dynamic Storage Allocation: A Survey and Critical Review",
726 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
727 */
728
729 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
730
731 /*==========================================================================*/
732
733 /*
734 * Allocation strategy abstract:
735 *
736 * For small requests, the allocator sub-allocates <Big> blocks of memory.
737 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
738 * system's allocator.
739 *
740 * Small requests are grouped in size classes spaced 8 bytes apart, due
741 * to the required valid alignment of the returned address. Requests of
742 * a particular size are serviced from memory pools of 4K (one VMM page).
743 * Pools are fragmented on demand and contain free lists of blocks of one
744 * particular size class. In other words, there is a fixed-size allocator
745 * for each size class. Free pools are shared by the different allocators
746 * thus minimizing the space reserved for a particular size class.
747 *
748 * This allocation strategy is a variant of what is known as "simple
749 * segregated storage based on array of free lists". The main drawback of
750 * simple segregated storage is that we might end up with lot of reserved
751 * memory for the different free lists, which degenerate in time. To avoid
752 * this, we partition each free list in pools and we share dynamically the
753 * reserved space between all free lists. This technique is quite efficient
754 * for memory intensive programs which allocate mainly small-sized blocks.
755 *
756 * For small requests we have the following table:
757 *
758 * Request in bytes Size of allocated block Size class idx
759 * ----------------------------------------------------------------
760 * 1-8 8 0
761 * 9-16 16 1
762 * 17-24 24 2
763 * 25-32 32 3
764 * 33-40 40 4
765 * 41-48 48 5
766 * 49-56 56 6
767 * 57-64 64 7
768 * 65-72 72 8
769 * ... ... ...
770 * 497-504 504 62
771 * 505-512 512 63
772 *
773 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
774 * allocator.
775 */
776
777 /*==========================================================================*/
778
779 /*
780 * -- Main tunable settings section --
781 */
782
783 /*
784 * Alignment of addresses returned to the user. 8-bytes alignment works
785 * on most current architectures (with 32-bit or 64-bit address busses).
786 * The alignment value is also used for grouping small requests in size
787 * classes spaced ALIGNMENT bytes apart.
788 *
789 * You shouldn't change this unless you know what you are doing.
790 */
791 #define ALIGNMENT 8 /* must be 2^N */
792 #define ALIGNMENT_SHIFT 3
793
794 /* Return the number of bytes in size class I, as a uint. */
795 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
796
797 /*
798 * Max size threshold below which malloc requests are considered to be
799 * small enough in order to use preallocated memory pools. You can tune
800 * this value according to your application behaviour and memory needs.
801 *
802 * Note: a size threshold of 512 guarantees that newly created dictionaries
803 * will be allocated from preallocated memory pools on 64-bit.
804 *
805 * The following invariants must hold:
806 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
807 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
808 *
809 * Although not required, for better performance and space efficiency,
810 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
811 */
812 #define SMALL_REQUEST_THRESHOLD 512
813 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
814
815 /*
816 * The system's VMM page size can be obtained on most unices with a
817 * getpagesize() call or deduced from various header files. To make
818 * things simpler, we assume that it is 4K, which is OK for most systems.
819 * It is probably better if this is the native page size, but it doesn't
820 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
821 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
822 * violation fault. 4K is apparently OK for all the platforms that python
823 * currently targets.
824 */
825 #define SYSTEM_PAGE_SIZE (4 * 1024)
826 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
827
828 /*
829 * Maximum amount of memory managed by the allocator for small requests.
830 */
831 #ifdef WITH_MEMORY_LIMITS
832 #ifndef SMALL_MEMORY_LIMIT
833 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
834 #endif
835 #endif
836
837 /*
838 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
839 * on a page boundary. This is a reserved virtual address space for the
840 * current process (obtained through a malloc()/mmap() call). In no way this
841 * means that the memory arenas will be used entirely. A malloc(<Big>) is
842 * usually an address range reservation for <Big> bytes, unless all pages within
843 * this space are referenced subsequently. So malloc'ing big blocks and not
844 * using them does not mean "wasting memory". It's an addressable range
845 * wastage...
846 *
847 * Arenas are allocated with mmap() on systems supporting anonymous memory
848 * mappings to reduce heap fragmentation.
849 */
850 #define ARENA_SIZE (256 << 10) /* 256KB */
851
852 #ifdef WITH_MEMORY_LIMITS
853 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
854 #endif
855
856 /*
857 * Size of the pools used for small blocks. Should be a power of 2,
858 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
859 */
860 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
861 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
862
863 /*
864 * -- End of tunable settings section --
865 */
866
867 /*==========================================================================*/
868
869 /*
870 * Locking
871 *
872 * To reduce lock contention, it would probably be better to refine the
873 * crude function locking with per size class locking. I'm not positive
874 * however, whether it's worth switching to such locking policy because
875 * of the performance penalty it might introduce.
876 *
877 * The following macros describe the simplest (should also be the fastest)
878 * lock object on a particular platform and the init/fini/lock/unlock
879 * operations on it. The locks defined here are not expected to be recursive
880 * because it is assumed that they will always be called in the order:
881 * INIT, [LOCK, UNLOCK]*, FINI.
882 */
883
884 /*
885 * Python's threads are serialized, so object malloc locking is disabled.
886 */
887 #define SIMPLELOCK_DECL(lock) /* simple lock declaration */
888 #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
889 #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
890 #define SIMPLELOCK_LOCK(lock) /* acquire released lock */
891 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
892
893 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
894 typedef uint8_t block;
895
896 /* Pool for small blocks. */
897 struct pool_header {
898 union { block *_padding;
899 uint count; } ref; /* number of allocated blocks */
900 block *freeblock; /* pool's free list head */
901 struct pool_header *nextpool; /* next pool of this size class */
902 struct pool_header *prevpool; /* previous pool "" */
903 uint arenaindex; /* index into arenas of base adr */
904 uint szidx; /* block size class index */
905 uint nextoffset; /* bytes to virgin block */
906 uint maxnextoffset; /* largest valid nextoffset */
907 };
908
909 typedef struct pool_header *poolp;
910
911 /* Record keeping for arenas. */
912 struct arena_object {
913 /* The address of the arena, as returned by malloc. Note that 0
914 * will never be returned by a successful malloc, and is used
915 * here to mark an arena_object that doesn't correspond to an
916 * allocated arena.
917 */
918 uintptr_t address;
919
920 /* Pool-aligned pointer to the next pool to be carved off. */
921 block* pool_address;
922
923 /* The number of available pools in the arena: free pools + never-
924 * allocated pools.
925 */
926 uint nfreepools;
927
928 /* The total number of pools in the arena, whether or not available. */
929 uint ntotalpools;
930
931 /* Singly-linked list of available pools. */
932 struct pool_header* freepools;
933
934 /* Whenever this arena_object is not associated with an allocated
935 * arena, the nextarena member is used to link all unassociated
936 * arena_objects in the singly-linked `unused_arena_objects` list.
937 * The prevarena member is unused in this case.
938 *
939 * When this arena_object is associated with an allocated arena
940 * with at least one available pool, both members are used in the
941 * doubly-linked `usable_arenas` list, which is maintained in
942 * increasing order of `nfreepools` values.
943 *
944 * Else this arena_object is associated with an allocated arena
945 * all of whose pools are in use. `nextarena` and `prevarena`
946 * are both meaningless in this case.
947 */
948 struct arena_object* nextarena;
949 struct arena_object* prevarena;
950 };
951
952 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
953
954 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
955
956 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
957 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
958
959 /* Return total number of blocks in pool of size index I, as a uint. */
960 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
961
962 /*==========================================================================*/
963
964 /*
965 * This malloc lock
966 */
967 SIMPLELOCK_DECL(_malloc_lock)
968 #define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
969 #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
970 #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
971 #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
972
973 /*
974 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
975
976 This is involved. For an index i, usedpools[i+i] is the header for a list of
977 all partially used pools holding small blocks with "size class idx" i. So
978 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
979 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
980
981 Pools are carved off an arena's highwater mark (an arena_object's pool_address
982 member) as needed. Once carved off, a pool is in one of three states forever
983 after:
984
985 used == partially used, neither empty nor full
986 At least one block in the pool is currently allocated, and at least one
987 block in the pool is not currently allocated (note this implies a pool
988 has room for at least two blocks).
989 This is a pool's initial state, as a pool is created only when malloc
990 needs space.
991 The pool holds blocks of a fixed size, and is in the circular list headed
992 at usedpools[i] (see above). It's linked to the other used pools of the
993 same size class via the pool_header's nextpool and prevpool members.
994 If all but one block is currently allocated, a malloc can cause a
995 transition to the full state. If all but one block is not currently
996 allocated, a free can cause a transition to the empty state.
997
998 full == all the pool's blocks are currently allocated
999 On transition to full, a pool is unlinked from its usedpools[] list.
1000 It's not linked to from anything then anymore, and its nextpool and
1001 prevpool members are meaningless until it transitions back to used.
1002 A free of a block in a full pool puts the pool back in the used state.
1003 Then it's linked in at the front of the appropriate usedpools[] list, so
1004 that the next allocation for its size class will reuse the freed block.
1005
1006 empty == all the pool's blocks are currently available for allocation
1007 On transition to empty, a pool is unlinked from its usedpools[] list,
1008 and linked to the front of its arena_object's singly-linked freepools list,
1009 via its nextpool member. The prevpool member has no meaning in this case.
1010 Empty pools have no inherent size class: the next time a malloc finds
1011 an empty list in usedpools[], it takes the first pool off of freepools.
1012 If the size class needed happens to be the same as the size class the pool
1013 last had, some pool initialization can be skipped.
1014
1015
1016 Block Management
1017
1018 Blocks within pools are again carved out as needed. pool->freeblock points to
1019 the start of a singly-linked list of free blocks within the pool. When a
1020 block is freed, it's inserted at the front of its pool's freeblock list. Note
1021 that the available blocks in a pool are *not* linked all together when a pool
1022 is initialized. Instead only "the first two" (lowest addresses) blocks are
1023 set up, returning the first such block, and setting pool->freeblock to a
1024 one-block list holding the second such block. This is consistent with that
1025 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1026 of memory until it's actually needed.
1027
1028 So long as a pool is in the used state, we're certain there *is* a block
1029 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
1030 points to the end of the free list before we've carved the entire pool into
1031 blocks, that means we simply haven't yet gotten to one of the higher-address
1032 blocks. The offset from the pool_header to the start of "the next" virgin
1033 block is stored in the pool_header nextoffset member, and the largest value
1034 of nextoffset that makes sense is stored in the maxnextoffset member when a
1035 pool is initialized. All the blocks in a pool have been passed out at least
1036 once when and only when nextoffset > maxnextoffset.
1037
1038
1039 Major obscurity: While the usedpools vector is declared to have poolp
1040 entries, it doesn't really. It really contains two pointers per (conceptual)
1041 poolp entry, the nextpool and prevpool members of a pool_header. The
1042 excruciating initialization code below fools C so that
1043
1044 usedpool[i+i]
1045
1046 "acts like" a genuine poolp, but only so long as you only reference its
1047 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
1048 compensating for that a pool_header's nextpool and prevpool members
1049 immediately follow a pool_header's first two members:
1050
1051 union { block *_padding;
1052 uint count; } ref;
1053 block *freeblock;
1054
1055 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
1056 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1057 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1058 circular list is empty).
1059
1060 It's unclear why the usedpools setup is so convoluted. It could be to
1061 minimize the amount of cache required to hold this heavily-referenced table
1062 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1063 referencing code has to remember to "double the index" and doing so isn't
1064 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1065 on that C doesn't insert any padding anywhere in a pool_header at or before
1066 the prevpool member.
1067 **************************************************************************** */
1068
1069 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1070 #define PT(x) PTA(x), PTA(x)
1071
1072 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1073 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1074 #if NB_SMALL_SIZE_CLASSES > 8
1075 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1076 #if NB_SMALL_SIZE_CLASSES > 16
1077 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1078 #if NB_SMALL_SIZE_CLASSES > 24
1079 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1080 #if NB_SMALL_SIZE_CLASSES > 32
1081 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1082 #if NB_SMALL_SIZE_CLASSES > 40
1083 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1084 #if NB_SMALL_SIZE_CLASSES > 48
1085 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1086 #if NB_SMALL_SIZE_CLASSES > 56
1087 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1088 #if NB_SMALL_SIZE_CLASSES > 64
1089 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1090 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1091 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1092 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1093 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1094 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1095 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1096 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1097 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
1098 };
1099
1100 /*==========================================================================
1101 Arena management.
1102
1103 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
1104 which may not be currently used (== they're arena_objects that aren't
1105 currently associated with an allocated arena). Note that arenas proper are
1106 separately malloc'ed.
1107
1108 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
1109 we do try to free() arenas, and use some mild heuristic strategies to increase
1110 the likelihood that arenas eventually can be freed.
1111
1112 unused_arena_objects
1113
1114 This is a singly-linked list of the arena_objects that are currently not
1115 being used (no arena is associated with them). Objects are taken off the
1116 head of the list in new_arena(), and are pushed on the head of the list in
1117 PyObject_Free() when the arena is empty. Key invariant: an arena_object
1118 is on this list if and only if its .address member is 0.
1119
1120 usable_arenas
1121
1122 This is a doubly-linked list of the arena_objects associated with arenas
1123 that have pools available. These pools are either waiting to be reused,
1124 or have not been used before. The list is sorted to have the most-
1125 allocated arenas first (ascending order based on the nfreepools member).
1126 This means that the next allocation will come from a heavily used arena,
1127 which gives the nearly empty arenas a chance to be returned to the system.
1128 In my unscientific tests this dramatically improved the number of arenas
1129 that could be freed.
1130
1131 Note that an arena_object associated with an arena all of whose pools are
1132 currently in use isn't on either list.
1133 */
1134
1135 /* Array of objects used to track chunks of memory (arenas). */
1136 static struct arena_object* arenas = NULL;
1137 /* Number of slots currently allocated in the `arenas` vector. */
1138 static uint maxarenas = 0;
1139
1140 /* The head of the singly-linked, NULL-terminated list of available
1141 * arena_objects.
1142 */
1143 static struct arena_object* unused_arena_objects = NULL;
1144
1145 /* The head of the doubly-linked, NULL-terminated at each end, list of
1146 * arena_objects associated with arenas that have pools available.
1147 */
1148 static struct arena_object* usable_arenas = NULL;
1149
1150 /* How many arena_objects do we initially allocate?
1151 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1152 * `arenas` vector.
1153 */
1154 #define INITIAL_ARENA_OBJECTS 16
1155
1156 /* Number of arenas allocated that haven't been free()'d. */
1157 static size_t narenas_currently_allocated = 0;
1158
1159 /* Total number of times malloc() called to allocate an arena. */
1160 static size_t ntimes_arena_allocated = 0;
1161 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1162 static size_t narenas_highwater = 0;
1163
1164 static Py_ssize_t _Py_AllocatedBlocks = 0;
1165
1166 Py_ssize_t
_Py_GetAllocatedBlocks(void)1167 _Py_GetAllocatedBlocks(void)
1168 {
1169 return _Py_AllocatedBlocks;
1170 }
1171
1172
1173 /* Allocate a new arena. If we run out of memory, return NULL. Else
1174 * allocate a new arena, and return the address of an arena_object
1175 * describing the new arena. It's expected that the caller will set
1176 * `usable_arenas` to the return value.
1177 */
1178 static struct arena_object*
new_arena(void)1179 new_arena(void)
1180 {
1181 struct arena_object* arenaobj;
1182 uint excess; /* number of bytes above pool alignment */
1183 void *address;
1184 static int debug_stats = -1;
1185
1186 if (debug_stats == -1) {
1187 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1188 debug_stats = (opt != NULL && *opt != '\0');
1189 }
1190 if (debug_stats)
1191 _PyObject_DebugMallocStats(stderr);
1192
1193 if (unused_arena_objects == NULL) {
1194 uint i;
1195 uint numarenas;
1196 size_t nbytes;
1197
1198 /* Double the number of arena objects on each allocation.
1199 * Note that it's possible for `numarenas` to overflow.
1200 */
1201 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1202 if (numarenas <= maxarenas)
1203 return NULL; /* overflow */
1204 #if SIZEOF_SIZE_T <= SIZEOF_INT
1205 if (numarenas > SIZE_MAX / sizeof(*arenas))
1206 return NULL; /* overflow */
1207 #endif
1208 nbytes = numarenas * sizeof(*arenas);
1209 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1210 if (arenaobj == NULL)
1211 return NULL;
1212 arenas = arenaobj;
1213
1214 /* We might need to fix pointers that were copied. However,
1215 * new_arena only gets called when all the pages in the
1216 * previous arenas are full. Thus, there are *no* pointers
1217 * into the old array. Thus, we don't have to worry about
1218 * invalid pointers. Just to be sure, some asserts:
1219 */
1220 assert(usable_arenas == NULL);
1221 assert(unused_arena_objects == NULL);
1222
1223 /* Put the new arenas on the unused_arena_objects list. */
1224 for (i = maxarenas; i < numarenas; ++i) {
1225 arenas[i].address = 0; /* mark as unassociated */
1226 arenas[i].nextarena = i < numarenas - 1 ?
1227 &arenas[i+1] : NULL;
1228 }
1229
1230 /* Update globals. */
1231 unused_arena_objects = &arenas[maxarenas];
1232 maxarenas = numarenas;
1233 }
1234
1235 /* Take the next available arena object off the head of the list. */
1236 assert(unused_arena_objects != NULL);
1237 arenaobj = unused_arena_objects;
1238 unused_arena_objects = arenaobj->nextarena;
1239 assert(arenaobj->address == 0);
1240 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1241 if (address == NULL) {
1242 /* The allocation failed: return NULL after putting the
1243 * arenaobj back.
1244 */
1245 arenaobj->nextarena = unused_arena_objects;
1246 unused_arena_objects = arenaobj;
1247 return NULL;
1248 }
1249 arenaobj->address = (uintptr_t)address;
1250
1251 ++narenas_currently_allocated;
1252 ++ntimes_arena_allocated;
1253 if (narenas_currently_allocated > narenas_highwater)
1254 narenas_highwater = narenas_currently_allocated;
1255 arenaobj->freepools = NULL;
1256 /* pool_address <- first pool-aligned address in the arena
1257 nfreepools <- number of whole pools that fit after alignment */
1258 arenaobj->pool_address = (block*)arenaobj->address;
1259 arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
1260 assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
1261 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1262 if (excess != 0) {
1263 --arenaobj->nfreepools;
1264 arenaobj->pool_address += POOL_SIZE - excess;
1265 }
1266 arenaobj->ntotalpools = arenaobj->nfreepools;
1267
1268 return arenaobj;
1269 }
1270
1271
1272 /*
1273 address_in_range(P, POOL)
1274
1275 Return true if and only if P is an address that was allocated by pymalloc.
1276 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1277 (the caller is asked to compute this because the macro expands POOL more than
1278 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1279 variable and pass the latter to the macro; because address_in_range is
1280 called on every alloc/realloc/free, micro-efficiency is important here).
1281
1282 Tricky: Let B be the arena base address associated with the pool, B =
1283 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1284
1285 B <= P < B + ARENA_SIZE
1286
1287 Subtracting B throughout, this is true iff
1288
1289 0 <= P-B < ARENA_SIZE
1290
1291 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1292
1293 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1294 before the first arena has been allocated. `arenas` is still NULL in that
1295 case. We're relying on that maxarenas is also 0 in that case, so that
1296 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1297 into a NULL arenas.
1298
1299 Details: given P and POOL, the arena_object corresponding to P is AO =
1300 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1301 stores, etc), POOL is the correct address of P's pool, AO.address is the
1302 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1303 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1304 (NULL)). Therefore address_in_range correctly reports that obmalloc
1305 controls P.
1306
1307 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1308 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1309 in this case -- it may even be uninitialized trash. If the trash arenaindex
1310 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1311 control P.
1312
1313 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1314 allocated arena, obmalloc controls all the memory in slice AO.address :
1315 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1316 so P doesn't lie in that slice, so the macro correctly reports that P is not
1317 controlled by obmalloc.
1318
1319 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1320 arena_object (one not currently associated with an allocated arena),
1321 AO.address is 0, and the second test in the macro reduces to:
1322
1323 P < ARENA_SIZE
1324
1325 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1326 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1327 of the test still passes, and the third clause (AO.address != 0) is necessary
1328 to get the correct result: AO.address is 0 in this case, so the macro
1329 correctly reports that P is not controlled by obmalloc (despite that P lies in
1330 slice AO.address : AO.address + ARENA_SIZE).
1331
1332 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1333 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1334 corresponded to a currently-allocated arena, so the "P is not controlled by
1335 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1336 was impossible.
1337
1338 Note that the logic is excruciating, and reading up possibly uninitialized
1339 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1340 creates problems for some memory debuggers. The overwhelming advantage is
1341 that this test determines whether an arbitrary address is controlled by
1342 obmalloc in a small constant time, independent of the number of arenas
1343 obmalloc controls. Since this test is needed at every entry point, it's
1344 extremely desirable that it be this fast.
1345 */
1346
1347 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS
1348 _Py_NO_SANITIZE_THREAD
1349 _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1350 address_in_range(void *p, poolp pool)
1351 {
1352 // Since address_in_range may be reading from memory which was not allocated
1353 // by Python, it is important that pool->arenaindex is read only once, as
1354 // another thread may be concurrently modifying the value without holding
1355 // the GIL. The following dance forces the compiler to read pool->arenaindex
1356 // only once.
1357 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1358 return arenaindex < maxarenas &&
1359 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1360 arenas[arenaindex].address != 0;
1361 }
1362
1363
1364 /*==========================================================================*/
1365
1366 /* pymalloc allocator
1367
1368 The basic blocks are ordered by decreasing execution frequency,
1369 which minimizes the number of jumps in the most common cases,
1370 improves branching prediction and instruction scheduling (small
1371 block allocations typically result in a couple of instructions).
1372 Unless the optimizer reorders everything, being too smart...
1373
1374 Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
1375
1376 Return 0 if pymalloc failed to allocate the memory block: on bigger
1377 requests, on error in the code below (as a last chance to serve the request)
1378 or when the max memory limit has been reached. */
1379 static int
pymalloc_alloc(void * ctx,void ** ptr_p,size_t nbytes)1380 pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
1381 {
1382 block *bp;
1383 poolp pool;
1384 poolp next;
1385 uint size;
1386
1387 #ifdef WITH_VALGRIND
1388 if (UNLIKELY(running_on_valgrind == -1)) {
1389 running_on_valgrind = RUNNING_ON_VALGRIND;
1390 }
1391 if (UNLIKELY(running_on_valgrind)) {
1392 return 0;
1393 }
1394 #endif
1395
1396 if (nbytes == 0) {
1397 return 0;
1398 }
1399 if (nbytes > SMALL_REQUEST_THRESHOLD) {
1400 return 0;
1401 }
1402
1403 LOCK();
1404 /*
1405 * Most frequent paths first
1406 */
1407 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1408 pool = usedpools[size + size];
1409 if (pool != pool->nextpool) {
1410 /*
1411 * There is a used pool for this size class.
1412 * Pick up the head block of its free list.
1413 */
1414 ++pool->ref.count;
1415 bp = pool->freeblock;
1416 assert(bp != NULL);
1417 if ((pool->freeblock = *(block **)bp) != NULL) {
1418 goto success;
1419 }
1420
1421 /*
1422 * Reached the end of the free list, try to extend it.
1423 */
1424 if (pool->nextoffset <= pool->maxnextoffset) {
1425 /* There is room for another block. */
1426 pool->freeblock = (block*)pool +
1427 pool->nextoffset;
1428 pool->nextoffset += INDEX2SIZE(size);
1429 *(block **)(pool->freeblock) = NULL;
1430 goto success;
1431 }
1432
1433 /* Pool is full, unlink from used pools. */
1434 next = pool->nextpool;
1435 pool = pool->prevpool;
1436 next->prevpool = pool;
1437 pool->nextpool = next;
1438 goto success;
1439 }
1440
1441 /* There isn't a pool of the right size class immediately
1442 * available: use a free pool.
1443 */
1444 if (usable_arenas == NULL) {
1445 /* No arena has a free pool: allocate a new arena. */
1446 #ifdef WITH_MEMORY_LIMITS
1447 if (narenas_currently_allocated >= MAX_ARENAS) {
1448 goto failed;
1449 }
1450 #endif
1451 usable_arenas = new_arena();
1452 if (usable_arenas == NULL) {
1453 goto failed;
1454 }
1455 usable_arenas->nextarena =
1456 usable_arenas->prevarena = NULL;
1457 }
1458 assert(usable_arenas->address != 0);
1459
1460 /* Try to get a cached free pool. */
1461 pool = usable_arenas->freepools;
1462 if (pool != NULL) {
1463 /* Unlink from cached pools. */
1464 usable_arenas->freepools = pool->nextpool;
1465
1466 /* This arena already had the smallest nfreepools
1467 * value, so decreasing nfreepools doesn't change
1468 * that, and we don't need to rearrange the
1469 * usable_arenas list. However, if the arena has
1470 * become wholly allocated, we need to remove its
1471 * arena_object from usable_arenas.
1472 */
1473 --usable_arenas->nfreepools;
1474 if (usable_arenas->nfreepools == 0) {
1475 /* Wholly allocated: remove. */
1476 assert(usable_arenas->freepools == NULL);
1477 assert(usable_arenas->nextarena == NULL ||
1478 usable_arenas->nextarena->prevarena ==
1479 usable_arenas);
1480
1481 usable_arenas = usable_arenas->nextarena;
1482 if (usable_arenas != NULL) {
1483 usable_arenas->prevarena = NULL;
1484 assert(usable_arenas->address != 0);
1485 }
1486 }
1487 else {
1488 /* nfreepools > 0: it must be that freepools
1489 * isn't NULL, or that we haven't yet carved
1490 * off all the arena's pools for the first
1491 * time.
1492 */
1493 assert(usable_arenas->freepools != NULL ||
1494 usable_arenas->pool_address <=
1495 (block*)usable_arenas->address +
1496 ARENA_SIZE - POOL_SIZE);
1497 }
1498
1499 init_pool:
1500 /* Frontlink to used pools. */
1501 next = usedpools[size + size]; /* == prev */
1502 pool->nextpool = next;
1503 pool->prevpool = next;
1504 next->nextpool = pool;
1505 next->prevpool = pool;
1506 pool->ref.count = 1;
1507 if (pool->szidx == size) {
1508 /* Luckily, this pool last contained blocks
1509 * of the same size class, so its header
1510 * and free list are already initialized.
1511 */
1512 bp = pool->freeblock;
1513 assert(bp != NULL);
1514 pool->freeblock = *(block **)bp;
1515 goto success;
1516 }
1517 /*
1518 * Initialize the pool header, set up the free list to
1519 * contain just the second block, and return the first
1520 * block.
1521 */
1522 pool->szidx = size;
1523 size = INDEX2SIZE(size);
1524 bp = (block *)pool + POOL_OVERHEAD;
1525 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1526 pool->maxnextoffset = POOL_SIZE - size;
1527 pool->freeblock = bp + size;
1528 *(block **)(pool->freeblock) = NULL;
1529 goto success;
1530 }
1531
1532 /* Carve off a new pool. */
1533 assert(usable_arenas->nfreepools > 0);
1534 assert(usable_arenas->freepools == NULL);
1535 pool = (poolp)usable_arenas->pool_address;
1536 assert((block*)pool <= (block*)usable_arenas->address +
1537 ARENA_SIZE - POOL_SIZE);
1538 pool->arenaindex = (uint)(usable_arenas - arenas);
1539 assert(&arenas[pool->arenaindex] == usable_arenas);
1540 pool->szidx = DUMMY_SIZE_IDX;
1541 usable_arenas->pool_address += POOL_SIZE;
1542 --usable_arenas->nfreepools;
1543
1544 if (usable_arenas->nfreepools == 0) {
1545 assert(usable_arenas->nextarena == NULL ||
1546 usable_arenas->nextarena->prevarena ==
1547 usable_arenas);
1548 /* Unlink the arena: it is completely allocated. */
1549 usable_arenas = usable_arenas->nextarena;
1550 if (usable_arenas != NULL) {
1551 usable_arenas->prevarena = NULL;
1552 assert(usable_arenas->address != 0);
1553 }
1554 }
1555
1556 goto init_pool;
1557
1558 success:
1559 UNLOCK();
1560 assert(bp != NULL);
1561 *ptr_p = (void *)bp;
1562 return 1;
1563
1564 failed:
1565 UNLOCK();
1566 return 0;
1567 }
1568
1569
1570 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1571 _PyObject_Malloc(void *ctx, size_t nbytes)
1572 {
1573 void* ptr;
1574 if (pymalloc_alloc(ctx, &ptr, nbytes)) {
1575 _Py_AllocatedBlocks++;
1576 return ptr;
1577 }
1578
1579 ptr = PyMem_RawMalloc(nbytes);
1580 if (ptr != NULL) {
1581 _Py_AllocatedBlocks++;
1582 }
1583 return ptr;
1584 }
1585
1586
1587 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1588 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1589 {
1590 void* ptr;
1591
1592 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1593 size_t nbytes = nelem * elsize;
1594
1595 if (pymalloc_alloc(ctx, &ptr, nbytes)) {
1596 memset(ptr, 0, nbytes);
1597 _Py_AllocatedBlocks++;
1598 return ptr;
1599 }
1600
1601 ptr = PyMem_RawCalloc(nelem, elsize);
1602 if (ptr != NULL) {
1603 _Py_AllocatedBlocks++;
1604 }
1605 return ptr;
1606 }
1607
1608
1609 /* Free a memory block allocated by pymalloc_alloc().
1610 Return 1 if it was freed.
1611 Return 0 if the block was not allocated by pymalloc_alloc(). */
1612 static int
pymalloc_free(void * ctx,void * p)1613 pymalloc_free(void *ctx, void *p)
1614 {
1615 poolp pool;
1616 block *lastfree;
1617 poolp next, prev;
1618 uint size;
1619
1620 assert(p != NULL);
1621
1622 #ifdef WITH_VALGRIND
1623 if (UNLIKELY(running_on_valgrind > 0)) {
1624 return 0;
1625 }
1626 #endif
1627
1628 pool = POOL_ADDR(p);
1629 if (!address_in_range(p, pool)) {
1630 return 0;
1631 }
1632 /* We allocated this address. */
1633
1634 LOCK();
1635
1636 /* Link p to the start of the pool's freeblock list. Since
1637 * the pool had at least the p block outstanding, the pool
1638 * wasn't empty (so it's already in a usedpools[] list, or
1639 * was full and is in no list -- it's not in the freeblocks
1640 * list in any case).
1641 */
1642 assert(pool->ref.count > 0); /* else it was empty */
1643 *(block **)p = lastfree = pool->freeblock;
1644 pool->freeblock = (block *)p;
1645 if (!lastfree) {
1646 /* Pool was full, so doesn't currently live in any list:
1647 * link it to the front of the appropriate usedpools[] list.
1648 * This mimics LRU pool usage for new allocations and
1649 * targets optimal filling when several pools contain
1650 * blocks of the same size class.
1651 */
1652 --pool->ref.count;
1653 assert(pool->ref.count > 0); /* else the pool is empty */
1654 size = pool->szidx;
1655 next = usedpools[size + size];
1656 prev = next->prevpool;
1657
1658 /* insert pool before next: prev <-> pool <-> next */
1659 pool->nextpool = next;
1660 pool->prevpool = prev;
1661 next->prevpool = pool;
1662 prev->nextpool = pool;
1663 goto success;
1664 }
1665
1666 struct arena_object* ao;
1667 uint nf; /* ao->nfreepools */
1668
1669 /* freeblock wasn't NULL, so the pool wasn't full,
1670 * and the pool is in a usedpools[] list.
1671 */
1672 if (--pool->ref.count != 0) {
1673 /* pool isn't empty: leave it in usedpools */
1674 goto success;
1675 }
1676 /* Pool is now empty: unlink from usedpools, and
1677 * link to the front of freepools. This ensures that
1678 * previously freed pools will be allocated later
1679 * (being not referenced, they are perhaps paged out).
1680 */
1681 next = pool->nextpool;
1682 prev = pool->prevpool;
1683 next->prevpool = prev;
1684 prev->nextpool = next;
1685
1686 /* Link the pool to freepools. This is a singly-linked
1687 * list, and pool->prevpool isn't used there.
1688 */
1689 ao = &arenas[pool->arenaindex];
1690 pool->nextpool = ao->freepools;
1691 ao->freepools = pool;
1692 nf = ++ao->nfreepools;
1693
1694 /* All the rest is arena management. We just freed
1695 * a pool, and there are 4 cases for arena mgmt:
1696 * 1. If all the pools are free, return the arena to
1697 * the system free().
1698 * 2. If this is the only free pool in the arena,
1699 * add the arena back to the `usable_arenas` list.
1700 * 3. If the "next" arena has a smaller count of free
1701 * pools, we have to "slide this arena right" to
1702 * restore that usable_arenas is sorted in order of
1703 * nfreepools.
1704 * 4. Else there's nothing more to do.
1705 */
1706 if (nf == ao->ntotalpools) {
1707 /* Case 1. First unlink ao from usable_arenas.
1708 */
1709 assert(ao->prevarena == NULL ||
1710 ao->prevarena->address != 0);
1711 assert(ao ->nextarena == NULL ||
1712 ao->nextarena->address != 0);
1713
1714 /* Fix the pointer in the prevarena, or the
1715 * usable_arenas pointer.
1716 */
1717 if (ao->prevarena == NULL) {
1718 usable_arenas = ao->nextarena;
1719 assert(usable_arenas == NULL ||
1720 usable_arenas->address != 0);
1721 }
1722 else {
1723 assert(ao->prevarena->nextarena == ao);
1724 ao->prevarena->nextarena =
1725 ao->nextarena;
1726 }
1727 /* Fix the pointer in the nextarena. */
1728 if (ao->nextarena != NULL) {
1729 assert(ao->nextarena->prevarena == ao);
1730 ao->nextarena->prevarena =
1731 ao->prevarena;
1732 }
1733 /* Record that this arena_object slot is
1734 * available to be reused.
1735 */
1736 ao->nextarena = unused_arena_objects;
1737 unused_arena_objects = ao;
1738
1739 /* Free the entire arena. */
1740 _PyObject_Arena.free(_PyObject_Arena.ctx,
1741 (void *)ao->address, ARENA_SIZE);
1742 ao->address = 0; /* mark unassociated */
1743 --narenas_currently_allocated;
1744
1745 goto success;
1746 }
1747
1748 if (nf == 1) {
1749 /* Case 2. Put ao at the head of
1750 * usable_arenas. Note that because
1751 * ao->nfreepools was 0 before, ao isn't
1752 * currently on the usable_arenas list.
1753 */
1754 ao->nextarena = usable_arenas;
1755 ao->prevarena = NULL;
1756 if (usable_arenas)
1757 usable_arenas->prevarena = ao;
1758 usable_arenas = ao;
1759 assert(usable_arenas->address != 0);
1760
1761 goto success;
1762 }
1763
1764 /* If this arena is now out of order, we need to keep
1765 * the list sorted. The list is kept sorted so that
1766 * the "most full" arenas are used first, which allows
1767 * the nearly empty arenas to be completely freed. In
1768 * a few un-scientific tests, it seems like this
1769 * approach allowed a lot more memory to be freed.
1770 */
1771 if (ao->nextarena == NULL ||
1772 nf <= ao->nextarena->nfreepools) {
1773 /* Case 4. Nothing to do. */
1774 goto success;
1775 }
1776 /* Case 3: We have to move the arena towards the end
1777 * of the list, because it has more free pools than
1778 * the arena to its right.
1779 * First unlink ao from usable_arenas.
1780 */
1781 if (ao->prevarena != NULL) {
1782 /* ao isn't at the head of the list */
1783 assert(ao->prevarena->nextarena == ao);
1784 ao->prevarena->nextarena = ao->nextarena;
1785 }
1786 else {
1787 /* ao is at the head of the list */
1788 assert(usable_arenas == ao);
1789 usable_arenas = ao->nextarena;
1790 }
1791 ao->nextarena->prevarena = ao->prevarena;
1792
1793 /* Locate the new insertion point by iterating over
1794 * the list, using our nextarena pointer.
1795 */
1796 while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
1797 ao->prevarena = ao->nextarena;
1798 ao->nextarena = ao->nextarena->nextarena;
1799 }
1800
1801 /* Insert ao at this point. */
1802 assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
1803 assert(ao->prevarena->nextarena == ao->nextarena);
1804
1805 ao->prevarena->nextarena = ao;
1806 if (ao->nextarena != NULL) {
1807 ao->nextarena->prevarena = ao;
1808 }
1809
1810 /* Verify that the swaps worked. */
1811 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1812 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1813 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1814 assert((usable_arenas == ao && ao->prevarena == NULL)
1815 || ao->prevarena->nextarena == ao);
1816
1817 goto success;
1818
1819 success:
1820 UNLOCK();
1821 return 1;
1822 }
1823
1824
1825 static void
_PyObject_Free(void * ctx,void * p)1826 _PyObject_Free(void *ctx, void *p)
1827 {
1828 /* PyObject_Free(NULL) has no effect */
1829 if (p == NULL) {
1830 return;
1831 }
1832
1833 _Py_AllocatedBlocks--;
1834 if (!pymalloc_free(ctx, p)) {
1835 /* pymalloc didn't allocate this address */
1836 PyMem_RawFree(p);
1837 }
1838 }
1839
1840
1841 /* pymalloc realloc.
1842
1843 If nbytes==0, then as the Python docs promise, we do not treat this like
1844 free(p), and return a non-NULL result.
1845
1846 Return 1 if pymalloc reallocated memory and wrote the new pointer into
1847 newptr_p.
1848
1849 Return 0 if pymalloc didn't allocated p. */
1850 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1851 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1852 {
1853 void *bp;
1854 poolp pool;
1855 size_t size;
1856
1857 assert(p != NULL);
1858
1859 #ifdef WITH_VALGRIND
1860 /* Treat running_on_valgrind == -1 the same as 0 */
1861 if (UNLIKELY(running_on_valgrind > 0)) {
1862 return 0;
1863 }
1864 #endif
1865
1866 pool = POOL_ADDR(p);
1867 if (!address_in_range(p, pool)) {
1868 /* pymalloc is not managing this block.
1869
1870 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1871 over this block. However, if we do, we need to copy the valid data
1872 from the C-managed block to one of our blocks, and there's no
1873 portable way to know how much of the memory space starting at p is
1874 valid.
1875
1876 As bug 1185883 pointed out the hard way, it's possible that the
1877 C-managed block is "at the end" of allocated VM space, so that a
1878 memory fault can occur if we try to copy nbytes bytes starting at p.
1879 Instead we punt: let C continue to manage this block. */
1880 return 0;
1881 }
1882
1883 /* pymalloc is in charge of this block */
1884 size = INDEX2SIZE(pool->szidx);
1885 if (nbytes <= size) {
1886 /* The block is staying the same or shrinking.
1887
1888 If it's shrinking, there's a tradeoff: it costs cycles to copy the
1889 block to a smaller size class, but it wastes memory not to copy it.
1890
1891 The compromise here is to copy on shrink only if at least 25% of
1892 size can be shaved off. */
1893 if (4 * nbytes > 3 * size) {
1894 /* It's the same, or shrinking and new/old > 3/4. */
1895 *newptr_p = p;
1896 return 1;
1897 }
1898 size = nbytes;
1899 }
1900
1901 bp = _PyObject_Malloc(ctx, nbytes);
1902 if (bp != NULL) {
1903 memcpy(bp, p, size);
1904 _PyObject_Free(ctx, p);
1905 }
1906 *newptr_p = bp;
1907 return 1;
1908 }
1909
1910
1911 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1912 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
1913 {
1914 void *ptr2;
1915
1916 if (ptr == NULL) {
1917 return _PyObject_Malloc(ctx, nbytes);
1918 }
1919
1920 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
1921 return ptr2;
1922 }
1923
1924 return PyMem_RawRealloc(ptr, nbytes);
1925 }
1926
1927 #else /* ! WITH_PYMALLOC */
1928
1929 /*==========================================================================*/
1930 /* pymalloc not enabled: Redirect the entry points to malloc. These will
1931 * only be used by extensions that are compiled with pymalloc enabled. */
1932
1933 Py_ssize_t
_Py_GetAllocatedBlocks(void)1934 _Py_GetAllocatedBlocks(void)
1935 {
1936 return 0;
1937 }
1938
1939 #endif /* WITH_PYMALLOC */
1940
1941
1942 /*==========================================================================*/
1943 /* A x-platform debugging allocator. This doesn't manage memory directly,
1944 * it wraps a real allocator, adding extra debugging info to the memory blocks.
1945 */
1946
1947 /* Special bytes broadcast into debug memory blocks at appropriate times.
1948 * Strings of these are unlikely to be valid addresses, floats, ints or
1949 * 7-bit ASCII.
1950 */
1951 #undef CLEANBYTE
1952 #undef DEADBYTE
1953 #undef FORBIDDENBYTE
1954 #define CLEANBYTE 0xCB /* clean (newly allocated) memory */
1955 #define DEADBYTE 0xDB /* dead (newly freed) memory */
1956 #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
1957
1958 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
1959
1960 /* serialno is always incremented via calling this routine. The point is
1961 * to supply a single place to set a breakpoint.
1962 */
1963 static void
bumpserialno(void)1964 bumpserialno(void)
1965 {
1966 ++serialno;
1967 }
1968
1969 #define SST SIZEOF_SIZE_T
1970
1971 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1972 static size_t
read_size_t(const void * p)1973 read_size_t(const void *p)
1974 {
1975 const uint8_t *q = (const uint8_t *)p;
1976 size_t result = *q++;
1977 int i;
1978
1979 for (i = SST; --i > 0; ++q)
1980 result = (result << 8) | *q;
1981 return result;
1982 }
1983
1984 /* Write n as a big-endian size_t, MSB at address p, LSB at
1985 * p + sizeof(size_t) - 1.
1986 */
1987 static void
write_size_t(void * p,size_t n)1988 write_size_t(void *p, size_t n)
1989 {
1990 uint8_t *q = (uint8_t *)p + SST - 1;
1991 int i;
1992
1993 for (i = SST; --i >= 0; --q) {
1994 *q = (uint8_t)(n & 0xff);
1995 n >>= 8;
1996 }
1997 }
1998
1999 /* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
2000 fills them with useful stuff, here calling the underlying malloc's result p:
2001
2002 p[0: S]
2003 Number of bytes originally asked for. This is a size_t, big-endian (easier
2004 to read in a memory dump).
2005 p[S]
2006 API ID. See PEP 445. This is a character, but seems undocumented.
2007 p[S+1: 2*S]
2008 Copies of FORBIDDENBYTE. Used to catch under- writes and reads.
2009 p[2*S: 2*S+n]
2010 The requested memory, filled with copies of CLEANBYTE.
2011 Used to catch reference to uninitialized memory.
2012 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2013 handled the request itself.
2014 p[2*S+n: 2*S+n+S]
2015 Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
2016 p[2*S+n+S: 2*S+n+2*S]
2017 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2018 and _PyMem_DebugRealloc.
2019 This is a big-endian size_t.
2020 If "bad memory" is detected later, the serial number gives an
2021 excellent way to set a breakpoint on the next run, to capture the
2022 instant at which this block was passed out.
2023 */
2024
2025 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2026 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2027 {
2028 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2029 uint8_t *p; /* base address of malloc'ed pad block */
2030 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2031 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2032 size_t total; /* 2 * SST + nbytes + 2 * SST */
2033
2034 if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) {
2035 /* integer overflow: can't represent total as a Py_ssize_t */
2036 return NULL;
2037 }
2038 total = nbytes + 4 * SST;
2039
2040 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2041 * ^--- p ^--- data ^--- tail
2042 S: nbytes stored as size_t
2043 I: API identifier (1 byte)
2044 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2045 C: Clean bytes used later to store actual data
2046 N: Serial number stored as size_t */
2047
2048 if (use_calloc) {
2049 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2050 }
2051 else {
2052 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2053 }
2054 if (p == NULL) {
2055 return NULL;
2056 }
2057 data = p + 2*SST;
2058
2059 bumpserialno();
2060
2061 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2062 write_size_t(p, nbytes);
2063 p[SST] = (uint8_t)api->api_id;
2064 memset(p + SST + 1, FORBIDDENBYTE, SST-1);
2065
2066 if (nbytes > 0 && !use_calloc) {
2067 memset(data, CLEANBYTE, nbytes);
2068 }
2069
2070 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2071 tail = data + nbytes;
2072 memset(tail, FORBIDDENBYTE, SST);
2073 write_size_t(tail + SST, serialno);
2074
2075 return data;
2076 }
2077
2078 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2079 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2080 {
2081 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2082 }
2083
2084 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2085 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2086 {
2087 size_t nbytes;
2088 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2089 nbytes = nelem * elsize;
2090 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2091 }
2092
2093
2094 /* Heuristic checking if the memory has been freed. Rely on the debug hooks on
2095 Python memory allocators which fills the memory with DEADBYTE (0xDB) when
2096 memory is deallocated. */
2097 int
_PyMem_IsFreed(void * ptr,size_t size)2098 _PyMem_IsFreed(void *ptr, size_t size)
2099 {
2100 unsigned char *bytes = ptr;
2101 for (size_t i=0; i < size; i++) {
2102 if (bytes[i] != DEADBYTE) {
2103 return 0;
2104 }
2105 }
2106 return 1;
2107 }
2108
2109
2110 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2111 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2112 Then fills the original bytes with DEADBYTE.
2113 Then calls the underlying free.
2114 */
2115 static void
_PyMem_DebugRawFree(void * ctx,void * p)2116 _PyMem_DebugRawFree(void *ctx, void *p)
2117 {
2118 /* PyMem_Free(NULL) has no effect */
2119 if (p == NULL) {
2120 return;
2121 }
2122
2123 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2124 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2125 size_t nbytes;
2126
2127 _PyMem_DebugCheckAddress(api->api_id, p);
2128 nbytes = read_size_t(q);
2129 nbytes += 4 * SST;
2130 memset(q, DEADBYTE, nbytes);
2131 api->alloc.free(api->alloc.ctx, q);
2132 }
2133
2134
2135 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2136 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2137 {
2138 if (p == NULL) {
2139 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2140 }
2141
2142 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2143 uint8_t *head; /* base address of malloc'ed pad block */
2144 uint8_t *data; /* pointer to data bytes */
2145 uint8_t *r;
2146 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2147 size_t total; /* 2 * SST + nbytes + 2 * SST */
2148 size_t original_nbytes;
2149 size_t block_serialno;
2150 #define ERASED_SIZE 64
2151 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2152
2153 _PyMem_DebugCheckAddress(api->api_id, p);
2154
2155 data = (uint8_t *)p;
2156 head = data - 2*SST;
2157 original_nbytes = read_size_t(head);
2158 if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) {
2159 /* integer overflow: can't represent total as a Py_ssize_t */
2160 return NULL;
2161 }
2162 total = nbytes + 4*SST;
2163
2164 tail = data + original_nbytes;
2165 block_serialno = read_size_t(tail + SST);
2166 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2167 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2168 */
2169 if (original_nbytes <= sizeof(save)) {
2170 memcpy(save, data, original_nbytes);
2171 memset(data - 2*SST, DEADBYTE, original_nbytes + 4*SST);
2172 }
2173 else {
2174 memcpy(save, data, ERASED_SIZE);
2175 memset(head, DEADBYTE, ERASED_SIZE + 2*SST);
2176 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2177 memset(tail - ERASED_SIZE, DEADBYTE, ERASED_SIZE + 2*SST);
2178 }
2179
2180 /* Resize and add decorations. */
2181 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2182 if (r == NULL) {
2183 nbytes = original_nbytes;
2184 }
2185 else {
2186 head = r;
2187 bumpserialno();
2188 block_serialno = serialno;
2189 }
2190
2191 write_size_t(head, nbytes);
2192 head[SST] = (uint8_t)api->api_id;
2193 memset(head + SST + 1, FORBIDDENBYTE, SST-1);
2194 data = head + 2*SST;
2195
2196 tail = data + nbytes;
2197 memset(tail, FORBIDDENBYTE, SST);
2198 write_size_t(tail + SST, block_serialno);
2199
2200 /* Restore saved bytes. */
2201 if (original_nbytes <= sizeof(save)) {
2202 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2203 }
2204 else {
2205 size_t i = original_nbytes - ERASED_SIZE;
2206 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2207 if (nbytes > i) {
2208 memcpy(data + i, &save[ERASED_SIZE],
2209 Py_MIN(nbytes - i, ERASED_SIZE));
2210 }
2211 }
2212
2213 if (r == NULL) {
2214 return NULL;
2215 }
2216
2217 if (nbytes > original_nbytes) {
2218 /* growing: mark new extra memory clean */
2219 memset(data + original_nbytes, CLEANBYTE, nbytes - original_nbytes);
2220 }
2221
2222 return data;
2223 }
2224
2225 static void
_PyMem_DebugCheckGIL(void)2226 _PyMem_DebugCheckGIL(void)
2227 {
2228 if (!PyGILState_Check())
2229 Py_FatalError("Python memory allocator called "
2230 "without holding the GIL");
2231 }
2232
2233 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2234 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2235 {
2236 _PyMem_DebugCheckGIL();
2237 return _PyMem_DebugRawMalloc(ctx, nbytes);
2238 }
2239
2240 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2241 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2242 {
2243 _PyMem_DebugCheckGIL();
2244 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2245 }
2246
2247
2248 static void
_PyMem_DebugFree(void * ctx,void * ptr)2249 _PyMem_DebugFree(void *ctx, void *ptr)
2250 {
2251 _PyMem_DebugCheckGIL();
2252 _PyMem_DebugRawFree(ctx, ptr);
2253 }
2254
2255
2256 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2257 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2258 {
2259 _PyMem_DebugCheckGIL();
2260 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2261 }
2262
2263 /* Check the forbidden bytes on both ends of the memory allocated for p.
2264 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2265 * and call Py_FatalError to kill the program.
2266 * The API id, is also checked.
2267 */
2268 static void
_PyMem_DebugCheckAddress(char api,const void * p)2269 _PyMem_DebugCheckAddress(char api, const void *p)
2270 {
2271 const uint8_t *q = (const uint8_t *)p;
2272 char msgbuf[64];
2273 const char *msg;
2274 size_t nbytes;
2275 const uint8_t *tail;
2276 int i;
2277 char id;
2278
2279 if (p == NULL) {
2280 msg = "didn't expect a NULL pointer";
2281 goto error;
2282 }
2283
2284 /* Check the API id */
2285 id = (char)q[-SST];
2286 if (id != api) {
2287 msg = msgbuf;
2288 snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2289 msgbuf[sizeof(msgbuf)-1] = 0;
2290 goto error;
2291 }
2292
2293 /* Check the stuff at the start of p first: if there's underwrite
2294 * corruption, the number-of-bytes field may be nuts, and checking
2295 * the tail could lead to a segfault then.
2296 */
2297 for (i = SST-1; i >= 1; --i) {
2298 if (*(q-i) != FORBIDDENBYTE) {
2299 msg = "bad leading pad byte";
2300 goto error;
2301 }
2302 }
2303
2304 nbytes = read_size_t(q - 2*SST);
2305 tail = q + nbytes;
2306 for (i = 0; i < SST; ++i) {
2307 if (tail[i] != FORBIDDENBYTE) {
2308 msg = "bad trailing pad byte";
2309 goto error;
2310 }
2311 }
2312
2313 return;
2314
2315 error:
2316 _PyObject_DebugDumpAddress(p);
2317 Py_FatalError(msg);
2318 }
2319
2320 /* Display info to stderr about the memory block at p. */
2321 static void
_PyObject_DebugDumpAddress(const void * p)2322 _PyObject_DebugDumpAddress(const void *p)
2323 {
2324 const uint8_t *q = (const uint8_t *)p;
2325 const uint8_t *tail;
2326 size_t nbytes, serial;
2327 int i;
2328 int ok;
2329 char id;
2330
2331 fprintf(stderr, "Debug memory block at address p=%p:", p);
2332 if (p == NULL) {
2333 fprintf(stderr, "\n");
2334 return;
2335 }
2336 id = (char)q[-SST];
2337 fprintf(stderr, " API '%c'\n", id);
2338
2339 nbytes = read_size_t(q - 2*SST);
2340 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
2341 "requested\n", nbytes);
2342
2343 /* In case this is nuts, check the leading pad bytes first. */
2344 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2345 ok = 1;
2346 for (i = 1; i <= SST-1; ++i) {
2347 if (*(q-i) != FORBIDDENBYTE) {
2348 ok = 0;
2349 break;
2350 }
2351 }
2352 if (ok)
2353 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2354 else {
2355 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2356 FORBIDDENBYTE);
2357 for (i = SST-1; i >= 1; --i) {
2358 const uint8_t byte = *(q-i);
2359 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2360 if (byte != FORBIDDENBYTE)
2361 fputs(" *** OUCH", stderr);
2362 fputc('\n', stderr);
2363 }
2364
2365 fputs(" Because memory is corrupted at the start, the "
2366 "count of bytes requested\n"
2367 " may be bogus, and checking the trailing pad "
2368 "bytes may segfault.\n", stderr);
2369 }
2370
2371 tail = q + nbytes;
2372 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail);
2373 ok = 1;
2374 for (i = 0; i < SST; ++i) {
2375 if (tail[i] != FORBIDDENBYTE) {
2376 ok = 0;
2377 break;
2378 }
2379 }
2380 if (ok)
2381 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2382 else {
2383 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2384 FORBIDDENBYTE);
2385 for (i = 0; i < SST; ++i) {
2386 const uint8_t byte = tail[i];
2387 fprintf(stderr, " at tail+%d: 0x%02x",
2388 i, byte);
2389 if (byte != FORBIDDENBYTE)
2390 fputs(" *** OUCH", stderr);
2391 fputc('\n', stderr);
2392 }
2393 }
2394
2395 serial = read_size_t(tail + SST);
2396 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
2397 "u to debug malloc/realloc.\n", serial);
2398
2399 if (nbytes > 0) {
2400 i = 0;
2401 fputs(" Data at p:", stderr);
2402 /* print up to 8 bytes at the start */
2403 while (q < tail && i < 8) {
2404 fprintf(stderr, " %02x", *q);
2405 ++i;
2406 ++q;
2407 }
2408 /* and up to 8 at the end */
2409 if (q < tail) {
2410 if (tail - q > 8) {
2411 fputs(" ...", stderr);
2412 q = tail - 8;
2413 }
2414 while (q < tail) {
2415 fprintf(stderr, " %02x", *q);
2416 ++q;
2417 }
2418 }
2419 fputc('\n', stderr);
2420 }
2421 fputc('\n', stderr);
2422
2423 fflush(stderr);
2424 _PyMem_DumpTraceback(fileno(stderr), p);
2425 }
2426
2427
2428 static size_t
printone(FILE * out,const char * msg,size_t value)2429 printone(FILE *out, const char* msg, size_t value)
2430 {
2431 int i, k;
2432 char buf[100];
2433 size_t origvalue = value;
2434
2435 fputs(msg, out);
2436 for (i = (int)strlen(msg); i < 35; ++i)
2437 fputc(' ', out);
2438 fputc('=', out);
2439
2440 /* Write the value with commas. */
2441 i = 22;
2442 buf[i--] = '\0';
2443 buf[i--] = '\n';
2444 k = 3;
2445 do {
2446 size_t nextvalue = value / 10;
2447 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2448 value = nextvalue;
2449 buf[i--] = (char)(digit + '0');
2450 --k;
2451 if (k == 0 && value && i >= 0) {
2452 k = 3;
2453 buf[i--] = ',';
2454 }
2455 } while (value && i >= 0);
2456
2457 while (i >= 0)
2458 buf[i--] = ' ';
2459 fputs(buf, out);
2460
2461 return origvalue;
2462 }
2463
2464 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2465 _PyDebugAllocatorStats(FILE *out,
2466 const char *block_name, int num_blocks, size_t sizeof_block)
2467 {
2468 char buf1[128];
2469 char buf2[128];
2470 PyOS_snprintf(buf1, sizeof(buf1),
2471 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2472 num_blocks, block_name, sizeof_block);
2473 PyOS_snprintf(buf2, sizeof(buf2),
2474 "%48s ", buf1);
2475 (void)printone(out, buf2, num_blocks * sizeof_block);
2476 }
2477
2478
2479 #ifdef WITH_PYMALLOC
2480
2481 #ifdef Py_DEBUG
2482 /* Is target in the list? The list is traversed via the nextpool pointers.
2483 * The list may be NULL-terminated, or circular. Return 1 if target is in
2484 * list, else 0.
2485 */
2486 static int
pool_is_in_list(const poolp target,poolp list)2487 pool_is_in_list(const poolp target, poolp list)
2488 {
2489 poolp origlist = list;
2490 assert(target != NULL);
2491 if (list == NULL)
2492 return 0;
2493 do {
2494 if (target == list)
2495 return 1;
2496 list = list->nextpool;
2497 } while (list != NULL && list != origlist);
2498 return 0;
2499 }
2500 #endif
2501
2502 /* Print summary info to "out" about the state of pymalloc's structures.
2503 * In Py_DEBUG mode, also perform some expensive internal consistency
2504 * checks.
2505 *
2506 * Return 0 if the memory debug hooks are not installed or no statistics was
2507 * written into out, return 1 otherwise.
2508 */
2509 int
_PyObject_DebugMallocStats(FILE * out)2510 _PyObject_DebugMallocStats(FILE *out)
2511 {
2512 if (!_PyMem_PymallocEnabled()) {
2513 return 0;
2514 }
2515
2516 uint i;
2517 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2518 /* # of pools, allocated blocks, and free blocks per class index */
2519 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2520 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2521 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2522 /* total # of allocated bytes in used and full pools */
2523 size_t allocated_bytes = 0;
2524 /* total # of available bytes in used pools */
2525 size_t available_bytes = 0;
2526 /* # of free pools + pools not yet carved out of current arena */
2527 uint numfreepools = 0;
2528 /* # of bytes for arena alignment padding */
2529 size_t arena_alignment = 0;
2530 /* # of bytes in used and full pools used for pool_headers */
2531 size_t pool_header_bytes = 0;
2532 /* # of bytes in used and full pools wasted due to quantization,
2533 * i.e. the necessarily leftover space at the ends of used and
2534 * full pools.
2535 */
2536 size_t quantization = 0;
2537 /* # of arenas actually allocated. */
2538 size_t narenas = 0;
2539 /* running total -- should equal narenas * ARENA_SIZE */
2540 size_t total;
2541 char buf[128];
2542
2543 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2544 SMALL_REQUEST_THRESHOLD, numclasses);
2545
2546 for (i = 0; i < numclasses; ++i)
2547 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2548
2549 /* Because full pools aren't linked to from anything, it's easiest
2550 * to march over all the arenas. If we're lucky, most of the memory
2551 * will be living in full pools -- would be a shame to miss them.
2552 */
2553 for (i = 0; i < maxarenas; ++i) {
2554 uint j;
2555 uintptr_t base = arenas[i].address;
2556
2557 /* Skip arenas which are not allocated. */
2558 if (arenas[i].address == (uintptr_t)NULL)
2559 continue;
2560 narenas += 1;
2561
2562 numfreepools += arenas[i].nfreepools;
2563
2564 /* round up to pool alignment */
2565 if (base & (uintptr_t)POOL_SIZE_MASK) {
2566 arena_alignment += POOL_SIZE;
2567 base &= ~(uintptr_t)POOL_SIZE_MASK;
2568 base += POOL_SIZE;
2569 }
2570
2571 /* visit every pool in the arena */
2572 assert(base <= (uintptr_t) arenas[i].pool_address);
2573 for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2574 ++j, base += POOL_SIZE) {
2575 poolp p = (poolp)base;
2576 const uint sz = p->szidx;
2577 uint freeblocks;
2578
2579 if (p->ref.count == 0) {
2580 /* currently unused */
2581 #ifdef Py_DEBUG
2582 assert(pool_is_in_list(p, arenas[i].freepools));
2583 #endif
2584 continue;
2585 }
2586 ++numpools[sz];
2587 numblocks[sz] += p->ref.count;
2588 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2589 numfreeblocks[sz] += freeblocks;
2590 #ifdef Py_DEBUG
2591 if (freeblocks > 0)
2592 assert(pool_is_in_list(p, usedpools[sz + sz]));
2593 #endif
2594 }
2595 }
2596 assert(narenas == narenas_currently_allocated);
2597
2598 fputc('\n', out);
2599 fputs("class size num pools blocks in use avail blocks\n"
2600 "----- ---- --------- ------------- ------------\n",
2601 out);
2602
2603 for (i = 0; i < numclasses; ++i) {
2604 size_t p = numpools[i];
2605 size_t b = numblocks[i];
2606 size_t f = numfreeblocks[i];
2607 uint size = INDEX2SIZE(i);
2608 if (p == 0) {
2609 assert(b == 0 && f == 0);
2610 continue;
2611 }
2612 fprintf(out, "%5u %6u "
2613 "%11" PY_FORMAT_SIZE_T "u "
2614 "%15" PY_FORMAT_SIZE_T "u "
2615 "%13" PY_FORMAT_SIZE_T "u\n",
2616 i, size, p, b, f);
2617 allocated_bytes += b * size;
2618 available_bytes += f * size;
2619 pool_header_bytes += p * POOL_OVERHEAD;
2620 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2621 }
2622 fputc('\n', out);
2623 if (_PyMem_DebugEnabled())
2624 (void)printone(out, "# times object malloc called", serialno);
2625 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2626 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2627 (void)printone(out, "# arenas highwater mark", narenas_highwater);
2628 (void)printone(out, "# arenas allocated current", narenas);
2629
2630 PyOS_snprintf(buf, sizeof(buf),
2631 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2632 narenas, ARENA_SIZE);
2633 (void)printone(out, buf, narenas * ARENA_SIZE);
2634
2635 fputc('\n', out);
2636
2637 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2638 total += printone(out, "# bytes in available blocks", available_bytes);
2639
2640 PyOS_snprintf(buf, sizeof(buf),
2641 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2642 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2643
2644 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2645 total += printone(out, "# bytes lost to quantization", quantization);
2646 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2647 (void)printone(out, "Total", total);
2648 return 1;
2649 }
2650
2651 #endif /* #ifdef WITH_PYMALLOC */
2652