1 #include "Python.h"
2 #include "pycore_pymem.h" // _PyTraceMalloc_Config
3
4 #include <stdbool.h>
5
6
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9
10
11 /* Python's malloc wrappers (see pymem.h) */
12
13 #undef uint
14 #define uint unsigned int /* assuming >= 16 bits */
15
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
29
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31
32 #if defined(__has_feature) /* Clang */
33 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 # define _Py_NO_SANITIZE_ADDRESS \
35 __attribute__((no_sanitize("address")))
36 # endif
37 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
38 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 # endif
40 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
41 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 # endif
43 #elif defined(__GNUC__)
44 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
45 # define _Py_NO_SANITIZE_ADDRESS \
46 __attribute__((no_sanitize_address))
47 # endif
48 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49 // is provided only since GCC 7.
50 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 # endif
53 #endif
54
55 #ifndef _Py_NO_SANITIZE_ADDRESS
56 # define _Py_NO_SANITIZE_ADDRESS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 # define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 # define _Py_NO_SANITIZE_MEMORY
63 #endif
64
65 #ifdef WITH_PYMALLOC
66
67 #ifdef MS_WINDOWS
68 # include <windows.h>
69 #elif defined(HAVE_MMAP)
70 # include <sys/mman.h>
71 # ifdef MAP_ANONYMOUS
72 # define ARENAS_USE_MMAP
73 # endif
74 #endif
75
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82
83
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86 library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88
89
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94 for malloc(0), which would be treated as an error. Some platforms would
95 return a pointer with no memory behind it, which would break pymalloc.
96 To solve these problems, allocate an extra byte. */
97 if (size == 0)
98 size = 1;
99 return malloc(size);
100 }
101
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106 for calloc(0, 0), which would be treated as an error. Some platforms
107 would return a pointer with no memory behind it, which would break
108 pymalloc. To solve these problems, allocate an extra byte. */
109 if (nelem == 0 || elsize == 0) {
110 nelem = 1;
111 elsize = 1;
112 }
113 return calloc(nelem, elsize);
114 }
115
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119 if (size == 0)
120 size = 1;
121 return realloc(ptr, size);
122 }
123
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127 free(ptr);
128 }
129
130
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135 return VirtualAlloc(NULL, size,
136 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142 VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149 void *ptr;
150 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152 if (ptr == MAP_FAILED)
153 return NULL;
154 assert(ptr != NULL);
155 return ptr;
156 }
157
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161 munmap(ptr, size);
162 }
163
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168 return malloc(size);
169 }
170
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174 free(ptr);
175 }
176 #endif
177
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 # define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 # define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190
191 typedef struct {
192 /* We tag each block with an API ID in order to tag API violations */
193 char api_id;
194 PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197 debug_alloc_api_t raw;
198 debug_alloc_api_t mem;
199 debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201 {'r', PYRAW_ALLOC},
202 {'m', PYMEM_ALLOC},
203 {'o', PYOBJ_ALLOC}
204 };
205
206 #define PYDBGRAW_ALLOC \
207 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222
223
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226 PyMemAllocatorEx *old_alloc)
227 {
228 if (old_alloc != NULL) {
229 PyMem_GetAllocator(domain, old_alloc);
230 }
231
232
233 PyMemAllocatorEx new_alloc;
234 switch(domain)
235 {
236 case PYMEM_DOMAIN_RAW:
237 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238 break;
239 case PYMEM_DOMAIN_MEM:
240 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241 break;
242 case PYMEM_DOMAIN_OBJ:
243 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244 break;
245 default:
246 /* unknown domain */
247 return -1;
248 }
249 PyMem_SetAllocator(domain, &new_alloc);
250 if (debug) {
251 _PyMem_SetupDebugHooksDomain(domain);
252 }
253 return 0;
254 }
255
256
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259 PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262 const int debug = 1;
263 #else
264 const int debug = 0;
265 #endif
266 return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268
269
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273 if (name == NULL || *name == '\0') {
274 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275 nameions): use default memory allocators */
276 *allocator = PYMEM_ALLOCATOR_DEFAULT;
277 }
278 else if (strcmp(name, "default") == 0) {
279 *allocator = PYMEM_ALLOCATOR_DEFAULT;
280 }
281 else if (strcmp(name, "debug") == 0) {
282 *allocator = PYMEM_ALLOCATOR_DEBUG;
283 }
284 #ifdef WITH_PYMALLOC
285 else if (strcmp(name, "pymalloc") == 0) {
286 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287 }
288 else if (strcmp(name, "pymalloc_debug") == 0) {
289 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290 }
291 #endif
292 else if (strcmp(name, "malloc") == 0) {
293 *allocator = PYMEM_ALLOCATOR_MALLOC;
294 }
295 else if (strcmp(name, "malloc_debug") == 0) {
296 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297 }
298 else {
299 /* unknown allocator */
300 return -1;
301 }
302 return 0;
303 }
304
305
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309 switch (allocator) {
310 case PYMEM_ALLOCATOR_NOT_SET:
311 /* do nothing */
312 break;
313
314 case PYMEM_ALLOCATOR_DEFAULT:
315 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318 break;
319
320 case PYMEM_ALLOCATOR_DEBUG:
321 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324 break;
325
326 #ifdef WITH_PYMALLOC
327 case PYMEM_ALLOCATOR_PYMALLOC:
328 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329 {
330 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332
333 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336
337 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338 PyMem_SetupDebugHooks();
339 }
340 break;
341 }
342 #endif
343
344 case PYMEM_ALLOCATOR_MALLOC:
345 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346 {
347 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351
352 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353 PyMem_SetupDebugHooks();
354 }
355 break;
356 }
357
358 default:
359 /* unknown allocator */
360 return -1;
361 }
362 return 0;
363 }
364
365
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371
372
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380
381 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383 pymemallocator_eq(&_PyObject, &malloc_alloc))
384 {
385 return "malloc";
386 }
387 #ifdef WITH_PYMALLOC
388 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389 pymemallocator_eq(&_PyMem, &pymalloc) &&
390 pymemallocator_eq(&_PyObject, &pymalloc))
391 {
392 return "pymalloc";
393 }
394 #endif
395
396 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399
400 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401 pymemallocator_eq(&_PyMem, &dbg_mem) &&
402 pymemallocator_eq(&_PyObject, &dbg_obj))
403 {
404 /* Debug hooks installed */
405 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408 {
409 return "malloc_debug";
410 }
411 #ifdef WITH_PYMALLOC
412 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415 {
416 return "pymalloc_debug";
417 }
418 #endif
419 }
420 return NULL;
421 }
422
423
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432
433
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440 _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442 };
443
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448 return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454 if (_PyMem_DebugEnabled()) {
455 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456 }
457 else {
458 return (_PyObject.malloc == _PyObject_Malloc);
459 }
460 }
461 #endif
462
463
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467 PyMemAllocatorEx alloc;
468
469 if (domain == PYMEM_DOMAIN_RAW) {
470 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471 return;
472 }
473
474 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475 alloc.ctx = &_PyMem_Debug.raw;
476 alloc.malloc = _PyMem_DebugRawMalloc;
477 alloc.calloc = _PyMem_DebugRawCalloc;
478 alloc.realloc = _PyMem_DebugRawRealloc;
479 alloc.free = _PyMem_DebugRawFree;
480 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481 }
482 else if (domain == PYMEM_DOMAIN_MEM) {
483 if (_PyMem.malloc == _PyMem_DebugMalloc) {
484 return;
485 }
486
487 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488 alloc.ctx = &_PyMem_Debug.mem;
489 alloc.malloc = _PyMem_DebugMalloc;
490 alloc.calloc = _PyMem_DebugCalloc;
491 alloc.realloc = _PyMem_DebugRealloc;
492 alloc.free = _PyMem_DebugFree;
493 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494 }
495 else if (domain == PYMEM_DOMAIN_OBJ) {
496 if (_PyObject.malloc == _PyMem_DebugMalloc) {
497 return;
498 }
499
500 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501 alloc.ctx = &_PyMem_Debug.obj;
502 alloc.malloc = _PyMem_DebugMalloc;
503 alloc.calloc = _PyMem_DebugCalloc;
504 alloc.realloc = _PyMem_DebugRealloc;
505 alloc.free = _PyMem_DebugFree;
506 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507 }
508 }
509
510
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522 switch(domain)
523 {
524 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527 default:
528 /* unknown domain: set all attributes to NULL */
529 allocator->ctx = NULL;
530 allocator->malloc = NULL;
531 allocator->calloc = NULL;
532 allocator->realloc = NULL;
533 allocator->free = NULL;
534 }
535 }
536
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540 switch(domain)
541 {
542 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545 /* ignore unknown domain */
546 }
547 }
548
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552 *allocator = _PyObject_Arena;
553 }
554
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558 _PyObject_Arena = *allocator;
559 }
560
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564 /*
565 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566 * Most python internals blindly use a signed Py_ssize_t to track
567 * things without checking for overflows or negatives.
568 * As size_t is unsigned, checking for size < 0 is not required.
569 */
570 if (size > (size_t)PY_SSIZE_T_MAX)
571 return NULL;
572 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578 /* see PyMem_RawMalloc() */
579 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580 return NULL;
581 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587 /* see PyMem_RawMalloc() */
588 if (new_size > (size_t)PY_SSIZE_T_MAX)
589 return NULL;
590 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597
598
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602 /* see PyMem_RawMalloc() */
603 if (size > (size_t)PY_SSIZE_T_MAX)
604 return NULL;
605 return _PyMem.malloc(_PyMem.ctx, size);
606 }
607
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611 /* see PyMem_RawMalloc() */
612 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613 return NULL;
614 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620 /* see PyMem_RawMalloc() */
621 if (new_size > (size_t)PY_SSIZE_T_MAX)
622 return NULL;
623 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629 _PyMem.free(_PyMem.ctx, ptr);
630 }
631
632
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636 assert(str != NULL);
637
638 size_t len = wcslen(str);
639 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640 return NULL;
641 }
642
643 size_t size = (len + 1) * sizeof(wchar_t);
644 wchar_t *str2 = PyMem_RawMalloc(size);
645 if (str2 == NULL) {
646 return NULL;
647 }
648
649 memcpy(str2, str, size);
650 return str2;
651 }
652
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656 assert(str != NULL);
657 size_t size = strlen(str) + 1;
658 char *copy = PyMem_RawMalloc(size);
659 if (copy == NULL) {
660 return NULL;
661 }
662 memcpy(copy, str, size);
663 return copy;
664 }
665
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669 assert(str != NULL);
670 size_t size = strlen(str) + 1;
671 char *copy = PyMem_Malloc(size);
672 if (copy == NULL) {
673 return NULL;
674 }
675 memcpy(copy, str, size);
676 return copy;
677 }
678
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682 /* see PyMem_RawMalloc() */
683 if (size > (size_t)PY_SSIZE_T_MAX)
684 return NULL;
685 return _PyObject.malloc(_PyObject.ctx, size);
686 }
687
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691 /* see PyMem_RawMalloc() */
692 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693 return NULL;
694 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700 /* see PyMem_RawMalloc() */
701 if (new_size > (size_t)PY_SSIZE_T_MAX)
702 return NULL;
703 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709 _PyObject.free(_PyObject.ctx, ptr);
710 }
711
712
713 /* If we're using GCC, use __builtin_expect() to reduce overhead of
714 the valgrind checks */
715 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
716 # define UNLIKELY(value) __builtin_expect((value), 0)
717 # define LIKELY(value) __builtin_expect((value), 1)
718 #else
719 # define UNLIKELY(value) (value)
720 # define LIKELY(value) (value)
721 #endif
722
723 #ifdef WITH_PYMALLOC
724
725 #ifdef WITH_VALGRIND
726 #include <valgrind/valgrind.h>
727
728 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
729 static int running_on_valgrind = -1;
730 #endif
731
732
733 /* An object allocator for Python.
734
735 Here is an introduction to the layers of the Python memory architecture,
736 showing where the object allocator is actually used (layer +2), It is
737 called for every object allocation and deallocation (PyObject_New/Del),
738 unless the object-specific allocators implement a proprietary allocation
739 scheme (ex.: ints use a simple free list). This is also the place where
740 the cyclic garbage collector operates selectively on container objects.
741
742
743 Object-specific allocators
744 _____ ______ ______ ________
745 [ int ] [ dict ] [ list ] ... [ string ] Python core |
746 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
747 _______________________________ | |
748 [ Python's object allocator ] | |
749 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
750 ______________________________________________________________ |
751 [ Python's raw memory allocator (PyMem_ API) ] |
752 +1 | <----- Python memory (under PyMem manager's control) ------> | |
753 __________________________________________________________________
754 [ Underlying general-purpose allocator (ex: C library malloc) ]
755 0 | <------ Virtual memory allocated for the python process -------> |
756
757 =========================================================================
758 _______________________________________________________________________
759 [ OS-specific Virtual Memory Manager (VMM) ]
760 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
761 __________________________________ __________________________________
762 [ ] [ ]
763 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
764
765 */
766 /*==========================================================================*/
767
768 /* A fast, special-purpose memory allocator for small blocks, to be used
769 on top of a general-purpose malloc -- heavily based on previous art. */
770
771 /* Vladimir Marangozov -- August 2000 */
772
773 /*
774 * "Memory management is where the rubber meets the road -- if we do the wrong
775 * thing at any level, the results will not be good. And if we don't make the
776 * levels work well together, we are in serious trouble." (1)
777 *
778 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
779 * "Dynamic Storage Allocation: A Survey and Critical Review",
780 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
781 */
782
783 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
784
785 /*==========================================================================*/
786
787 /*
788 * Allocation strategy abstract:
789 *
790 * For small requests, the allocator sub-allocates <Big> blocks of memory.
791 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
792 * system's allocator.
793 *
794 * Small requests are grouped in size classes spaced 8 bytes apart, due
795 * to the required valid alignment of the returned address. Requests of
796 * a particular size are serviced from memory pools of 4K (one VMM page).
797 * Pools are fragmented on demand and contain free lists of blocks of one
798 * particular size class. In other words, there is a fixed-size allocator
799 * for each size class. Free pools are shared by the different allocators
800 * thus minimizing the space reserved for a particular size class.
801 *
802 * This allocation strategy is a variant of what is known as "simple
803 * segregated storage based on array of free lists". The main drawback of
804 * simple segregated storage is that we might end up with lot of reserved
805 * memory for the different free lists, which degenerate in time. To avoid
806 * this, we partition each free list in pools and we share dynamically the
807 * reserved space between all free lists. This technique is quite efficient
808 * for memory intensive programs which allocate mainly small-sized blocks.
809 *
810 * For small requests we have the following table:
811 *
812 * Request in bytes Size of allocated block Size class idx
813 * ----------------------------------------------------------------
814 * 1-8 8 0
815 * 9-16 16 1
816 * 17-24 24 2
817 * 25-32 32 3
818 * 33-40 40 4
819 * 41-48 48 5
820 * 49-56 56 6
821 * 57-64 64 7
822 * 65-72 72 8
823 * ... ... ...
824 * 497-504 504 62
825 * 505-512 512 63
826 *
827 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
828 * allocator.
829 */
830
831 /*==========================================================================*/
832
833 /*
834 * -- Main tunable settings section --
835 */
836
837 /*
838 * Alignment of addresses returned to the user. 8-bytes alignment works
839 * on most current architectures (with 32-bit or 64-bit address busses).
840 * The alignment value is also used for grouping small requests in size
841 * classes spaced ALIGNMENT bytes apart.
842 *
843 * You shouldn't change this unless you know what you are doing.
844 */
845
846 #if SIZEOF_VOID_P > 4
847 #define ALIGNMENT 16 /* must be 2^N */
848 #define ALIGNMENT_SHIFT 4
849 #else
850 #define ALIGNMENT 8 /* must be 2^N */
851 #define ALIGNMENT_SHIFT 3
852 #endif
853
854 /* Return the number of bytes in size class I, as a uint. */
855 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
856
857 /*
858 * Max size threshold below which malloc requests are considered to be
859 * small enough in order to use preallocated memory pools. You can tune
860 * this value according to your application behaviour and memory needs.
861 *
862 * Note: a size threshold of 512 guarantees that newly created dictionaries
863 * will be allocated from preallocated memory pools on 64-bit.
864 *
865 * The following invariants must hold:
866 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
867 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
868 *
869 * Although not required, for better performance and space efficiency,
870 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
871 */
872 #define SMALL_REQUEST_THRESHOLD 512
873 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
874
875 /*
876 * The system's VMM page size can be obtained on most unices with a
877 * getpagesize() call or deduced from various header files. To make
878 * things simpler, we assume that it is 4K, which is OK for most systems.
879 * It is probably better if this is the native page size, but it doesn't
880 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
881 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
882 * violation fault. 4K is apparently OK for all the platforms that python
883 * currently targets.
884 */
885 #define SYSTEM_PAGE_SIZE (4 * 1024)
886 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
887
888 /*
889 * Maximum amount of memory managed by the allocator for small requests.
890 */
891 #ifdef WITH_MEMORY_LIMITS
892 #ifndef SMALL_MEMORY_LIMIT
893 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
894 #endif
895 #endif
896
897 /*
898 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
899 * on a page boundary. This is a reserved virtual address space for the
900 * current process (obtained through a malloc()/mmap() call). In no way this
901 * means that the memory arenas will be used entirely. A malloc(<Big>) is
902 * usually an address range reservation for <Big> bytes, unless all pages within
903 * this space are referenced subsequently. So malloc'ing big blocks and not
904 * using them does not mean "wasting memory". It's an addressable range
905 * wastage...
906 *
907 * Arenas are allocated with mmap() on systems supporting anonymous memory
908 * mappings to reduce heap fragmentation.
909 */
910 #define ARENA_SIZE (256 << 10) /* 256KB */
911
912 #ifdef WITH_MEMORY_LIMITS
913 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
914 #endif
915
916 /*
917 * Size of the pools used for small blocks. Should be a power of 2,
918 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
919 */
920 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
921 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
922
923 #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
924 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
925 # error "arena size not an exact multiple of pool size"
926 #endif
927
928 /*
929 * -- End of tunable settings section --
930 */
931
932 /*==========================================================================*/
933
934 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
935 typedef uint8_t block;
936
937 /* Pool for small blocks. */
938 struct pool_header {
939 union { block *_padding;
940 uint count; } ref; /* number of allocated blocks */
941 block *freeblock; /* pool's free list head */
942 struct pool_header *nextpool; /* next pool of this size class */
943 struct pool_header *prevpool; /* previous pool "" */
944 uint arenaindex; /* index into arenas of base adr */
945 uint szidx; /* block size class index */
946 uint nextoffset; /* bytes to virgin block */
947 uint maxnextoffset; /* largest valid nextoffset */
948 };
949
950 typedef struct pool_header *poolp;
951
952 /* Record keeping for arenas. */
953 struct arena_object {
954 /* The address of the arena, as returned by malloc. Note that 0
955 * will never be returned by a successful malloc, and is used
956 * here to mark an arena_object that doesn't correspond to an
957 * allocated arena.
958 */
959 uintptr_t address;
960
961 /* Pool-aligned pointer to the next pool to be carved off. */
962 block* pool_address;
963
964 /* The number of available pools in the arena: free pools + never-
965 * allocated pools.
966 */
967 uint nfreepools;
968
969 /* The total number of pools in the arena, whether or not available. */
970 uint ntotalpools;
971
972 /* Singly-linked list of available pools. */
973 struct pool_header* freepools;
974
975 /* Whenever this arena_object is not associated with an allocated
976 * arena, the nextarena member is used to link all unassociated
977 * arena_objects in the singly-linked `unused_arena_objects` list.
978 * The prevarena member is unused in this case.
979 *
980 * When this arena_object is associated with an allocated arena
981 * with at least one available pool, both members are used in the
982 * doubly-linked `usable_arenas` list, which is maintained in
983 * increasing order of `nfreepools` values.
984 *
985 * Else this arena_object is associated with an allocated arena
986 * all of whose pools are in use. `nextarena` and `prevarena`
987 * are both meaningless in this case.
988 */
989 struct arena_object* nextarena;
990 struct arena_object* prevarena;
991 };
992
993 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
994
995 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
996
997 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
998 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
999
1000 /* Return total number of blocks in pool of size index I, as a uint. */
1001 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1002
1003 /*==========================================================================*/
1004
1005 /*
1006 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1007
1008 This is involved. For an index i, usedpools[i+i] is the header for a list of
1009 all partially used pools holding small blocks with "size class idx" i. So
1010 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1011 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1012
1013 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1014 member) as needed. Once carved off, a pool is in one of three states forever
1015 after:
1016
1017 used == partially used, neither empty nor full
1018 At least one block in the pool is currently allocated, and at least one
1019 block in the pool is not currently allocated (note this implies a pool
1020 has room for at least two blocks).
1021 This is a pool's initial state, as a pool is created only when malloc
1022 needs space.
1023 The pool holds blocks of a fixed size, and is in the circular list headed
1024 at usedpools[i] (see above). It's linked to the other used pools of the
1025 same size class via the pool_header's nextpool and prevpool members.
1026 If all but one block is currently allocated, a malloc can cause a
1027 transition to the full state. If all but one block is not currently
1028 allocated, a free can cause a transition to the empty state.
1029
1030 full == all the pool's blocks are currently allocated
1031 On transition to full, a pool is unlinked from its usedpools[] list.
1032 It's not linked to from anything then anymore, and its nextpool and
1033 prevpool members are meaningless until it transitions back to used.
1034 A free of a block in a full pool puts the pool back in the used state.
1035 Then it's linked in at the front of the appropriate usedpools[] list, so
1036 that the next allocation for its size class will reuse the freed block.
1037
1038 empty == all the pool's blocks are currently available for allocation
1039 On transition to empty, a pool is unlinked from its usedpools[] list,
1040 and linked to the front of its arena_object's singly-linked freepools list,
1041 via its nextpool member. The prevpool member has no meaning in this case.
1042 Empty pools have no inherent size class: the next time a malloc finds
1043 an empty list in usedpools[], it takes the first pool off of freepools.
1044 If the size class needed happens to be the same as the size class the pool
1045 last had, some pool initialization can be skipped.
1046
1047
1048 Block Management
1049
1050 Blocks within pools are again carved out as needed. pool->freeblock points to
1051 the start of a singly-linked list of free blocks within the pool. When a
1052 block is freed, it's inserted at the front of its pool's freeblock list. Note
1053 that the available blocks in a pool are *not* linked all together when a pool
1054 is initialized. Instead only "the first two" (lowest addresses) blocks are
1055 set up, returning the first such block, and setting pool->freeblock to a
1056 one-block list holding the second such block. This is consistent with that
1057 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1058 of memory until it's actually needed.
1059
1060 So long as a pool is in the used state, we're certain there *is* a block
1061 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
1062 points to the end of the free list before we've carved the entire pool into
1063 blocks, that means we simply haven't yet gotten to one of the higher-address
1064 blocks. The offset from the pool_header to the start of "the next" virgin
1065 block is stored in the pool_header nextoffset member, and the largest value
1066 of nextoffset that makes sense is stored in the maxnextoffset member when a
1067 pool is initialized. All the blocks in a pool have been passed out at least
1068 once when and only when nextoffset > maxnextoffset.
1069
1070
1071 Major obscurity: While the usedpools vector is declared to have poolp
1072 entries, it doesn't really. It really contains two pointers per (conceptual)
1073 poolp entry, the nextpool and prevpool members of a pool_header. The
1074 excruciating initialization code below fools C so that
1075
1076 usedpool[i+i]
1077
1078 "acts like" a genuine poolp, but only so long as you only reference its
1079 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
1080 compensating for that a pool_header's nextpool and prevpool members
1081 immediately follow a pool_header's first two members:
1082
1083 union { block *_padding;
1084 uint count; } ref;
1085 block *freeblock;
1086
1087 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
1088 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1089 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1090 circular list is empty).
1091
1092 It's unclear why the usedpools setup is so convoluted. It could be to
1093 minimize the amount of cache required to hold this heavily-referenced table
1094 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1095 referencing code has to remember to "double the index" and doing so isn't
1096 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1097 on that C doesn't insert any padding anywhere in a pool_header at or before
1098 the prevpool member.
1099 **************************************************************************** */
1100
1101 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1102 #define PT(x) PTA(x), PTA(x)
1103
1104 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1105 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1106 #if NB_SMALL_SIZE_CLASSES > 8
1107 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1108 #if NB_SMALL_SIZE_CLASSES > 16
1109 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1110 #if NB_SMALL_SIZE_CLASSES > 24
1111 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1112 #if NB_SMALL_SIZE_CLASSES > 32
1113 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1114 #if NB_SMALL_SIZE_CLASSES > 40
1115 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1116 #if NB_SMALL_SIZE_CLASSES > 48
1117 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1118 #if NB_SMALL_SIZE_CLASSES > 56
1119 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1120 #if NB_SMALL_SIZE_CLASSES > 64
1121 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1122 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1123 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1124 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1125 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1126 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1127 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1128 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1129 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
1130 };
1131
1132 /*==========================================================================
1133 Arena management.
1134
1135 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
1136 which may not be currently used (== they're arena_objects that aren't
1137 currently associated with an allocated arena). Note that arenas proper are
1138 separately malloc'ed.
1139
1140 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
1141 we do try to free() arenas, and use some mild heuristic strategies to increase
1142 the likelihood that arenas eventually can be freed.
1143
1144 unused_arena_objects
1145
1146 This is a singly-linked list of the arena_objects that are currently not
1147 being used (no arena is associated with them). Objects are taken off the
1148 head of the list in new_arena(), and are pushed on the head of the list in
1149 PyObject_Free() when the arena is empty. Key invariant: an arena_object
1150 is on this list if and only if its .address member is 0.
1151
1152 usable_arenas
1153
1154 This is a doubly-linked list of the arena_objects associated with arenas
1155 that have pools available. These pools are either waiting to be reused,
1156 or have not been used before. The list is sorted to have the most-
1157 allocated arenas first (ascending order based on the nfreepools member).
1158 This means that the next allocation will come from a heavily used arena,
1159 which gives the nearly empty arenas a chance to be returned to the system.
1160 In my unscientific tests this dramatically improved the number of arenas
1161 that could be freed.
1162
1163 Note that an arena_object associated with an arena all of whose pools are
1164 currently in use isn't on either list.
1165
1166 Changed in Python 3.8: keeping usable_arenas sorted by number of free pools
1167 used to be done by one-at-a-time linear search when an arena's number of
1168 free pools changed. That could, overall, consume time quadratic in the
1169 number of arenas. That didn't really matter when there were only a few
1170 hundred arenas (typical!), but could be a timing disaster when there were
1171 hundreds of thousands. See bpo-37029.
1172
1173 Now we have a vector of "search fingers" to eliminate the need to search:
1174 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1175 with nfp free pools. This is NULL if and only if there is no arena with
1176 nfp free pools in usable_arenas.
1177 */
1178
1179 /* Array of objects used to track chunks of memory (arenas). */
1180 static struct arena_object* arenas = NULL;
1181 /* Number of slots currently allocated in the `arenas` vector. */
1182 static uint maxarenas = 0;
1183
1184 /* The head of the singly-linked, NULL-terminated list of available
1185 * arena_objects.
1186 */
1187 static struct arena_object* unused_arena_objects = NULL;
1188
1189 /* The head of the doubly-linked, NULL-terminated at each end, list of
1190 * arena_objects associated with arenas that have pools available.
1191 */
1192 static struct arena_object* usable_arenas = NULL;
1193
1194 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1195 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1196
1197 /* How many arena_objects do we initially allocate?
1198 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1199 * `arenas` vector.
1200 */
1201 #define INITIAL_ARENA_OBJECTS 16
1202
1203 /* Number of arenas allocated that haven't been free()'d. */
1204 static size_t narenas_currently_allocated = 0;
1205
1206 /* Total number of times malloc() called to allocate an arena. */
1207 static size_t ntimes_arena_allocated = 0;
1208 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1209 static size_t narenas_highwater = 0;
1210
1211 static Py_ssize_t raw_allocated_blocks;
1212
1213 Py_ssize_t
_Py_GetAllocatedBlocks(void)1214 _Py_GetAllocatedBlocks(void)
1215 {
1216 Py_ssize_t n = raw_allocated_blocks;
1217 /* add up allocated blocks for used pools */
1218 for (uint i = 0; i < maxarenas; ++i) {
1219 /* Skip arenas which are not allocated. */
1220 if (arenas[i].address == 0) {
1221 continue;
1222 }
1223
1224 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1225
1226 /* visit every pool in the arena */
1227 assert(base <= (uintptr_t) arenas[i].pool_address);
1228 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1229 poolp p = (poolp)base;
1230 n += p->ref.count;
1231 }
1232 }
1233 return n;
1234 }
1235
1236
1237 /* Allocate a new arena. If we run out of memory, return NULL. Else
1238 * allocate a new arena, and return the address of an arena_object
1239 * describing the new arena. It's expected that the caller will set
1240 * `usable_arenas` to the return value.
1241 */
1242 static struct arena_object*
new_arena(void)1243 new_arena(void)
1244 {
1245 struct arena_object* arenaobj;
1246 uint excess; /* number of bytes above pool alignment */
1247 void *address;
1248 static int debug_stats = -1;
1249
1250 if (debug_stats == -1) {
1251 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1252 debug_stats = (opt != NULL && *opt != '\0');
1253 }
1254 if (debug_stats)
1255 _PyObject_DebugMallocStats(stderr);
1256
1257 if (unused_arena_objects == NULL) {
1258 uint i;
1259 uint numarenas;
1260 size_t nbytes;
1261
1262 /* Double the number of arena objects on each allocation.
1263 * Note that it's possible for `numarenas` to overflow.
1264 */
1265 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1266 if (numarenas <= maxarenas)
1267 return NULL; /* overflow */
1268 #if SIZEOF_SIZE_T <= SIZEOF_INT
1269 if (numarenas > SIZE_MAX / sizeof(*arenas))
1270 return NULL; /* overflow */
1271 #endif
1272 nbytes = numarenas * sizeof(*arenas);
1273 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1274 if (arenaobj == NULL)
1275 return NULL;
1276 arenas = arenaobj;
1277
1278 /* We might need to fix pointers that were copied. However,
1279 * new_arena only gets called when all the pages in the
1280 * previous arenas are full. Thus, there are *no* pointers
1281 * into the old array. Thus, we don't have to worry about
1282 * invalid pointers. Just to be sure, some asserts:
1283 */
1284 assert(usable_arenas == NULL);
1285 assert(unused_arena_objects == NULL);
1286
1287 /* Put the new arenas on the unused_arena_objects list. */
1288 for (i = maxarenas; i < numarenas; ++i) {
1289 arenas[i].address = 0; /* mark as unassociated */
1290 arenas[i].nextarena = i < numarenas - 1 ?
1291 &arenas[i+1] : NULL;
1292 }
1293
1294 /* Update globals. */
1295 unused_arena_objects = &arenas[maxarenas];
1296 maxarenas = numarenas;
1297 }
1298
1299 /* Take the next available arena object off the head of the list. */
1300 assert(unused_arena_objects != NULL);
1301 arenaobj = unused_arena_objects;
1302 unused_arena_objects = arenaobj->nextarena;
1303 assert(arenaobj->address == 0);
1304 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1305 if (address == NULL) {
1306 /* The allocation failed: return NULL after putting the
1307 * arenaobj back.
1308 */
1309 arenaobj->nextarena = unused_arena_objects;
1310 unused_arena_objects = arenaobj;
1311 return NULL;
1312 }
1313 arenaobj->address = (uintptr_t)address;
1314
1315 ++narenas_currently_allocated;
1316 ++ntimes_arena_allocated;
1317 if (narenas_currently_allocated > narenas_highwater)
1318 narenas_highwater = narenas_currently_allocated;
1319 arenaobj->freepools = NULL;
1320 /* pool_address <- first pool-aligned address in the arena
1321 nfreepools <- number of whole pools that fit after alignment */
1322 arenaobj->pool_address = (block*)arenaobj->address;
1323 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1324 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1325 if (excess != 0) {
1326 --arenaobj->nfreepools;
1327 arenaobj->pool_address += POOL_SIZE - excess;
1328 }
1329 arenaobj->ntotalpools = arenaobj->nfreepools;
1330
1331 return arenaobj;
1332 }
1333
1334
1335 /*
1336 address_in_range(P, POOL)
1337
1338 Return true if and only if P is an address that was allocated by pymalloc.
1339 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1340 (the caller is asked to compute this because the macro expands POOL more than
1341 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1342 variable and pass the latter to the macro; because address_in_range is
1343 called on every alloc/realloc/free, micro-efficiency is important here).
1344
1345 Tricky: Let B be the arena base address associated with the pool, B =
1346 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1347
1348 B <= P < B + ARENA_SIZE
1349
1350 Subtracting B throughout, this is true iff
1351
1352 0 <= P-B < ARENA_SIZE
1353
1354 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1355
1356 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1357 before the first arena has been allocated. `arenas` is still NULL in that
1358 case. We're relying on that maxarenas is also 0 in that case, so that
1359 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1360 into a NULL arenas.
1361
1362 Details: given P and POOL, the arena_object corresponding to P is AO =
1363 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1364 stores, etc), POOL is the correct address of P's pool, AO.address is the
1365 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1366 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1367 (NULL)). Therefore address_in_range correctly reports that obmalloc
1368 controls P.
1369
1370 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1371 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1372 in this case -- it may even be uninitialized trash. If the trash arenaindex
1373 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1374 control P.
1375
1376 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1377 allocated arena, obmalloc controls all the memory in slice AO.address :
1378 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1379 so P doesn't lie in that slice, so the macro correctly reports that P is not
1380 controlled by obmalloc.
1381
1382 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1383 arena_object (one not currently associated with an allocated arena),
1384 AO.address is 0, and the second test in the macro reduces to:
1385
1386 P < ARENA_SIZE
1387
1388 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1389 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1390 of the test still passes, and the third clause (AO.address != 0) is necessary
1391 to get the correct result: AO.address is 0 in this case, so the macro
1392 correctly reports that P is not controlled by obmalloc (despite that P lies in
1393 slice AO.address : AO.address + ARENA_SIZE).
1394
1395 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1396 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1397 corresponded to a currently-allocated arena, so the "P is not controlled by
1398 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1399 was impossible.
1400
1401 Note that the logic is excruciating, and reading up possibly uninitialized
1402 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1403 creates problems for some memory debuggers. The overwhelming advantage is
1404 that this test determines whether an arbitrary address is controlled by
1405 obmalloc in a small constant time, independent of the number of arenas
1406 obmalloc controls. Since this test is needed at every entry point, it's
1407 extremely desirable that it be this fast.
1408 */
1409
1410 static bool _Py_NO_SANITIZE_ADDRESS
1411 _Py_NO_SANITIZE_THREAD
1412 _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1413 address_in_range(void *p, poolp pool)
1414 {
1415 // Since address_in_range may be reading from memory which was not allocated
1416 // by Python, it is important that pool->arenaindex is read only once, as
1417 // another thread may be concurrently modifying the value without holding
1418 // the GIL. The following dance forces the compiler to read pool->arenaindex
1419 // only once.
1420 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1421 return arenaindex < maxarenas &&
1422 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1423 arenas[arenaindex].address != 0;
1424 }
1425
1426
1427 /*==========================================================================*/
1428
1429 // Called when freelist is exhausted. Extend the freelist if there is
1430 // space for a block. Otherwise, remove this pool from usedpools.
1431 static void
pymalloc_pool_extend(poolp pool,uint size)1432 pymalloc_pool_extend(poolp pool, uint size)
1433 {
1434 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1435 /* There is room for another block. */
1436 pool->freeblock = (block*)pool + pool->nextoffset;
1437 pool->nextoffset += INDEX2SIZE(size);
1438 *(block **)(pool->freeblock) = NULL;
1439 return;
1440 }
1441
1442 /* Pool is full, unlink from used pools. */
1443 poolp next;
1444 next = pool->nextpool;
1445 pool = pool->prevpool;
1446 next->prevpool = pool;
1447 pool->nextpool = next;
1448 }
1449
1450 /* called when pymalloc_alloc can not allocate a block from usedpool.
1451 * This function takes new pool and allocate a block from it.
1452 */
1453 static void*
allocate_from_new_pool(uint size)1454 allocate_from_new_pool(uint size)
1455 {
1456 /* There isn't a pool of the right size class immediately
1457 * available: use a free pool.
1458 */
1459 if (UNLIKELY(usable_arenas == NULL)) {
1460 /* No arena has a free pool: allocate a new arena. */
1461 #ifdef WITH_MEMORY_LIMITS
1462 if (narenas_currently_allocated >= MAX_ARENAS) {
1463 return NULL;
1464 }
1465 #endif
1466 usable_arenas = new_arena();
1467 if (usable_arenas == NULL) {
1468 return NULL;
1469 }
1470 usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1471 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1472 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1473 }
1474 assert(usable_arenas->address != 0);
1475
1476 /* This arena already had the smallest nfreepools value, so decreasing
1477 * nfreepools doesn't change that, and we don't need to rearrange the
1478 * usable_arenas list. However, if the arena becomes wholly allocated,
1479 * we need to remove its arena_object from usable_arenas.
1480 */
1481 assert(usable_arenas->nfreepools > 0);
1482 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1483 /* It's the last of this size, so there won't be any. */
1484 nfp2lasta[usable_arenas->nfreepools] = NULL;
1485 }
1486 /* If any free pools will remain, it will be the new smallest. */
1487 if (usable_arenas->nfreepools > 1) {
1488 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1489 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1490 }
1491
1492 /* Try to get a cached free pool. */
1493 poolp pool = usable_arenas->freepools;
1494 if (LIKELY(pool != NULL)) {
1495 /* Unlink from cached pools. */
1496 usable_arenas->freepools = pool->nextpool;
1497 usable_arenas->nfreepools--;
1498 if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1499 /* Wholly allocated: remove. */
1500 assert(usable_arenas->freepools == NULL);
1501 assert(usable_arenas->nextarena == NULL ||
1502 usable_arenas->nextarena->prevarena ==
1503 usable_arenas);
1504 usable_arenas = usable_arenas->nextarena;
1505 if (usable_arenas != NULL) {
1506 usable_arenas->prevarena = NULL;
1507 assert(usable_arenas->address != 0);
1508 }
1509 }
1510 else {
1511 /* nfreepools > 0: it must be that freepools
1512 * isn't NULL, or that we haven't yet carved
1513 * off all the arena's pools for the first
1514 * time.
1515 */
1516 assert(usable_arenas->freepools != NULL ||
1517 usable_arenas->pool_address <=
1518 (block*)usable_arenas->address +
1519 ARENA_SIZE - POOL_SIZE);
1520 }
1521 }
1522 else {
1523 /* Carve off a new pool. */
1524 assert(usable_arenas->nfreepools > 0);
1525 assert(usable_arenas->freepools == NULL);
1526 pool = (poolp)usable_arenas->pool_address;
1527 assert((block*)pool <= (block*)usable_arenas->address +
1528 ARENA_SIZE - POOL_SIZE);
1529 pool->arenaindex = (uint)(usable_arenas - arenas);
1530 assert(&arenas[pool->arenaindex] == usable_arenas);
1531 pool->szidx = DUMMY_SIZE_IDX;
1532 usable_arenas->pool_address += POOL_SIZE;
1533 --usable_arenas->nfreepools;
1534
1535 if (usable_arenas->nfreepools == 0) {
1536 assert(usable_arenas->nextarena == NULL ||
1537 usable_arenas->nextarena->prevarena ==
1538 usable_arenas);
1539 /* Unlink the arena: it is completely allocated. */
1540 usable_arenas = usable_arenas->nextarena;
1541 if (usable_arenas != NULL) {
1542 usable_arenas->prevarena = NULL;
1543 assert(usable_arenas->address != 0);
1544 }
1545 }
1546 }
1547
1548 /* Frontlink to used pools. */
1549 block *bp;
1550 poolp next = usedpools[size + size]; /* == prev */
1551 pool->nextpool = next;
1552 pool->prevpool = next;
1553 next->nextpool = pool;
1554 next->prevpool = pool;
1555 pool->ref.count = 1;
1556 if (pool->szidx == size) {
1557 /* Luckily, this pool last contained blocks
1558 * of the same size class, so its header
1559 * and free list are already initialized.
1560 */
1561 bp = pool->freeblock;
1562 assert(bp != NULL);
1563 pool->freeblock = *(block **)bp;
1564 return bp;
1565 }
1566 /*
1567 * Initialize the pool header, set up the free list to
1568 * contain just the second block, and return the first
1569 * block.
1570 */
1571 pool->szidx = size;
1572 size = INDEX2SIZE(size);
1573 bp = (block *)pool + POOL_OVERHEAD;
1574 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1575 pool->maxnextoffset = POOL_SIZE - size;
1576 pool->freeblock = bp + size;
1577 *(block **)(pool->freeblock) = NULL;
1578 return bp;
1579 }
1580
1581 /* pymalloc allocator
1582
1583 Return a pointer to newly allocated memory if pymalloc allocated memory.
1584
1585 Return NULL if pymalloc failed to allocate the memory block: on bigger
1586 requests, on error in the code below (as a last chance to serve the request)
1587 or when the max memory limit has been reached.
1588 */
1589 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1590 pymalloc_alloc(void *ctx, size_t nbytes)
1591 {
1592 #ifdef WITH_VALGRIND
1593 if (UNLIKELY(running_on_valgrind == -1)) {
1594 running_on_valgrind = RUNNING_ON_VALGRIND;
1595 }
1596 if (UNLIKELY(running_on_valgrind)) {
1597 return NULL;
1598 }
1599 #endif
1600
1601 if (UNLIKELY(nbytes == 0)) {
1602 return NULL;
1603 }
1604 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1605 return NULL;
1606 }
1607
1608 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1609 poolp pool = usedpools[size + size];
1610 block *bp;
1611
1612 if (LIKELY(pool != pool->nextpool)) {
1613 /*
1614 * There is a used pool for this size class.
1615 * Pick up the head block of its free list.
1616 */
1617 ++pool->ref.count;
1618 bp = pool->freeblock;
1619 assert(bp != NULL);
1620
1621 if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1622 // Reached the end of the free list, try to extend it.
1623 pymalloc_pool_extend(pool, size);
1624 }
1625 }
1626 else {
1627 /* There isn't a pool of the right size class immediately
1628 * available: use a free pool.
1629 */
1630 bp = allocate_from_new_pool(size);
1631 }
1632
1633 return (void *)bp;
1634 }
1635
1636
1637 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1638 _PyObject_Malloc(void *ctx, size_t nbytes)
1639 {
1640 void* ptr = pymalloc_alloc(ctx, nbytes);
1641 if (LIKELY(ptr != NULL)) {
1642 return ptr;
1643 }
1644
1645 ptr = PyMem_RawMalloc(nbytes);
1646 if (ptr != NULL) {
1647 raw_allocated_blocks++;
1648 }
1649 return ptr;
1650 }
1651
1652
1653 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1654 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1655 {
1656 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1657 size_t nbytes = nelem * elsize;
1658
1659 void* ptr = pymalloc_alloc(ctx, nbytes);
1660 if (LIKELY(ptr != NULL)) {
1661 memset(ptr, 0, nbytes);
1662 return ptr;
1663 }
1664
1665 ptr = PyMem_RawCalloc(nelem, elsize);
1666 if (ptr != NULL) {
1667 raw_allocated_blocks++;
1668 }
1669 return ptr;
1670 }
1671
1672
1673 static void
insert_to_usedpool(poolp pool)1674 insert_to_usedpool(poolp pool)
1675 {
1676 assert(pool->ref.count > 0); /* else the pool is empty */
1677
1678 uint size = pool->szidx;
1679 poolp next = usedpools[size + size];
1680 poolp prev = next->prevpool;
1681
1682 /* insert pool before next: prev <-> pool <-> next */
1683 pool->nextpool = next;
1684 pool->prevpool = prev;
1685 next->prevpool = pool;
1686 prev->nextpool = pool;
1687 }
1688
1689 static void
insert_to_freepool(poolp pool)1690 insert_to_freepool(poolp pool)
1691 {
1692 poolp next = pool->nextpool;
1693 poolp prev = pool->prevpool;
1694 next->prevpool = prev;
1695 prev->nextpool = next;
1696
1697 /* Link the pool to freepools. This is a singly-linked
1698 * list, and pool->prevpool isn't used there.
1699 */
1700 struct arena_object *ao = &arenas[pool->arenaindex];
1701 pool->nextpool = ao->freepools;
1702 ao->freepools = pool;
1703 uint nf = ao->nfreepools;
1704 /* If this is the rightmost arena with this number of free pools,
1705 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
1706 * are no arenas in usable_arenas with that value.
1707 */
1708 struct arena_object* lastnf = nfp2lasta[nf];
1709 assert((nf == 0 && lastnf == NULL) ||
1710 (nf > 0 &&
1711 lastnf != NULL &&
1712 lastnf->nfreepools == nf &&
1713 (lastnf->nextarena == NULL ||
1714 nf < lastnf->nextarena->nfreepools)));
1715 if (lastnf == ao) { /* it is the rightmost */
1716 struct arena_object* p = ao->prevarena;
1717 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1718 }
1719 ao->nfreepools = ++nf;
1720
1721 /* All the rest is arena management. We just freed
1722 * a pool, and there are 4 cases for arena mgmt:
1723 * 1. If all the pools are free, return the arena to
1724 * the system free(). Except if this is the last
1725 * arena in the list, keep it to avoid thrashing:
1726 * keeping one wholly free arena in the list avoids
1727 * pathological cases where a simple loop would
1728 * otherwise provoke needing to allocate and free an
1729 * arena on every iteration. See bpo-37257.
1730 * 2. If this is the only free pool in the arena,
1731 * add the arena back to the `usable_arenas` list.
1732 * 3. If the "next" arena has a smaller count of free
1733 * pools, we have to "slide this arena right" to
1734 * restore that usable_arenas is sorted in order of
1735 * nfreepools.
1736 * 4. Else there's nothing more to do.
1737 */
1738 if (nf == ao->ntotalpools && ao->nextarena != NULL) {
1739 /* Case 1. First unlink ao from usable_arenas.
1740 */
1741 assert(ao->prevarena == NULL ||
1742 ao->prevarena->address != 0);
1743 assert(ao ->nextarena == NULL ||
1744 ao->nextarena->address != 0);
1745
1746 /* Fix the pointer in the prevarena, or the
1747 * usable_arenas pointer.
1748 */
1749 if (ao->prevarena == NULL) {
1750 usable_arenas = ao->nextarena;
1751 assert(usable_arenas == NULL ||
1752 usable_arenas->address != 0);
1753 }
1754 else {
1755 assert(ao->prevarena->nextarena == ao);
1756 ao->prevarena->nextarena =
1757 ao->nextarena;
1758 }
1759 /* Fix the pointer in the nextarena. */
1760 if (ao->nextarena != NULL) {
1761 assert(ao->nextarena->prevarena == ao);
1762 ao->nextarena->prevarena =
1763 ao->prevarena;
1764 }
1765 /* Record that this arena_object slot is
1766 * available to be reused.
1767 */
1768 ao->nextarena = unused_arena_objects;
1769 unused_arena_objects = ao;
1770
1771 /* Free the entire arena. */
1772 _PyObject_Arena.free(_PyObject_Arena.ctx,
1773 (void *)ao->address, ARENA_SIZE);
1774 ao->address = 0; /* mark unassociated */
1775 --narenas_currently_allocated;
1776
1777 return;
1778 }
1779
1780 if (nf == 1) {
1781 /* Case 2. Put ao at the head of
1782 * usable_arenas. Note that because
1783 * ao->nfreepools was 0 before, ao isn't
1784 * currently on the usable_arenas list.
1785 */
1786 ao->nextarena = usable_arenas;
1787 ao->prevarena = NULL;
1788 if (usable_arenas)
1789 usable_arenas->prevarena = ao;
1790 usable_arenas = ao;
1791 assert(usable_arenas->address != 0);
1792 if (nfp2lasta[1] == NULL) {
1793 nfp2lasta[1] = ao;
1794 }
1795
1796 return;
1797 }
1798
1799 /* If this arena is now out of order, we need to keep
1800 * the list sorted. The list is kept sorted so that
1801 * the "most full" arenas are used first, which allows
1802 * the nearly empty arenas to be completely freed. In
1803 * a few un-scientific tests, it seems like this
1804 * approach allowed a lot more memory to be freed.
1805 */
1806 /* If this is the only arena with nf, record that. */
1807 if (nfp2lasta[nf] == NULL) {
1808 nfp2lasta[nf] = ao;
1809 } /* else the rightmost with nf doesn't change */
1810 /* If this was the rightmost of the old size, it remains in place. */
1811 if (ao == lastnf) {
1812 /* Case 4. Nothing to do. */
1813 return;
1814 }
1815 /* If ao were the only arena in the list, the last block would have
1816 * gotten us out.
1817 */
1818 assert(ao->nextarena != NULL);
1819
1820 /* Case 3: We have to move the arena towards the end of the list,
1821 * because it has more free pools than the arena to its right. It needs
1822 * to move to follow lastnf.
1823 * First unlink ao from usable_arenas.
1824 */
1825 if (ao->prevarena != NULL) {
1826 /* ao isn't at the head of the list */
1827 assert(ao->prevarena->nextarena == ao);
1828 ao->prevarena->nextarena = ao->nextarena;
1829 }
1830 else {
1831 /* ao is at the head of the list */
1832 assert(usable_arenas == ao);
1833 usable_arenas = ao->nextarena;
1834 }
1835 ao->nextarena->prevarena = ao->prevarena;
1836 /* And insert after lastnf. */
1837 ao->prevarena = lastnf;
1838 ao->nextarena = lastnf->nextarena;
1839 if (ao->nextarena != NULL) {
1840 ao->nextarena->prevarena = ao;
1841 }
1842 lastnf->nextarena = ao;
1843 /* Verify that the swaps worked. */
1844 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1845 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1846 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1847 assert((usable_arenas == ao && ao->prevarena == NULL)
1848 || ao->prevarena->nextarena == ao);
1849 }
1850
1851 /* Free a memory block allocated by pymalloc_alloc().
1852 Return 1 if it was freed.
1853 Return 0 if the block was not allocated by pymalloc_alloc(). */
1854 static inline int
pymalloc_free(void * ctx,void * p)1855 pymalloc_free(void *ctx, void *p)
1856 {
1857 assert(p != NULL);
1858
1859 #ifdef WITH_VALGRIND
1860 if (UNLIKELY(running_on_valgrind > 0)) {
1861 return 0;
1862 }
1863 #endif
1864
1865 poolp pool = POOL_ADDR(p);
1866 if (UNLIKELY(!address_in_range(p, pool))) {
1867 return 0;
1868 }
1869 /* We allocated this address. */
1870
1871 /* Link p to the start of the pool's freeblock list. Since
1872 * the pool had at least the p block outstanding, the pool
1873 * wasn't empty (so it's already in a usedpools[] list, or
1874 * was full and is in no list -- it's not in the freeblocks
1875 * list in any case).
1876 */
1877 assert(pool->ref.count > 0); /* else it was empty */
1878 block *lastfree = pool->freeblock;
1879 *(block **)p = lastfree;
1880 pool->freeblock = (block *)p;
1881 pool->ref.count--;
1882
1883 if (UNLIKELY(lastfree == NULL)) {
1884 /* Pool was full, so doesn't currently live in any list:
1885 * link it to the front of the appropriate usedpools[] list.
1886 * This mimics LRU pool usage for new allocations and
1887 * targets optimal filling when several pools contain
1888 * blocks of the same size class.
1889 */
1890 insert_to_usedpool(pool);
1891 return 1;
1892 }
1893
1894 /* freeblock wasn't NULL, so the pool wasn't full,
1895 * and the pool is in a usedpools[] list.
1896 */
1897 if (LIKELY(pool->ref.count != 0)) {
1898 /* pool isn't empty: leave it in usedpools */
1899 return 1;
1900 }
1901
1902 /* Pool is now empty: unlink from usedpools, and
1903 * link to the front of freepools. This ensures that
1904 * previously freed pools will be allocated later
1905 * (being not referenced, they are perhaps paged out).
1906 */
1907 insert_to_freepool(pool);
1908 return 1;
1909 }
1910
1911
1912 static void
_PyObject_Free(void * ctx,void * p)1913 _PyObject_Free(void *ctx, void *p)
1914 {
1915 /* PyObject_Free(NULL) has no effect */
1916 if (p == NULL) {
1917 return;
1918 }
1919
1920 if (UNLIKELY(!pymalloc_free(ctx, p))) {
1921 /* pymalloc didn't allocate this address */
1922 PyMem_RawFree(p);
1923 raw_allocated_blocks--;
1924 }
1925 }
1926
1927
1928 /* pymalloc realloc.
1929
1930 If nbytes==0, then as the Python docs promise, we do not treat this like
1931 free(p), and return a non-NULL result.
1932
1933 Return 1 if pymalloc reallocated memory and wrote the new pointer into
1934 newptr_p.
1935
1936 Return 0 if pymalloc didn't allocated p. */
1937 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1938 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1939 {
1940 void *bp;
1941 poolp pool;
1942 size_t size;
1943
1944 assert(p != NULL);
1945
1946 #ifdef WITH_VALGRIND
1947 /* Treat running_on_valgrind == -1 the same as 0 */
1948 if (UNLIKELY(running_on_valgrind > 0)) {
1949 return 0;
1950 }
1951 #endif
1952
1953 pool = POOL_ADDR(p);
1954 if (!address_in_range(p, pool)) {
1955 /* pymalloc is not managing this block.
1956
1957 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1958 over this block. However, if we do, we need to copy the valid data
1959 from the C-managed block to one of our blocks, and there's no
1960 portable way to know how much of the memory space starting at p is
1961 valid.
1962
1963 As bug 1185883 pointed out the hard way, it's possible that the
1964 C-managed block is "at the end" of allocated VM space, so that a
1965 memory fault can occur if we try to copy nbytes bytes starting at p.
1966 Instead we punt: let C continue to manage this block. */
1967 return 0;
1968 }
1969
1970 /* pymalloc is in charge of this block */
1971 size = INDEX2SIZE(pool->szidx);
1972 if (nbytes <= size) {
1973 /* The block is staying the same or shrinking.
1974
1975 If it's shrinking, there's a tradeoff: it costs cycles to copy the
1976 block to a smaller size class, but it wastes memory not to copy it.
1977
1978 The compromise here is to copy on shrink only if at least 25% of
1979 size can be shaved off. */
1980 if (4 * nbytes > 3 * size) {
1981 /* It's the same, or shrinking and new/old > 3/4. */
1982 *newptr_p = p;
1983 return 1;
1984 }
1985 size = nbytes;
1986 }
1987
1988 bp = _PyObject_Malloc(ctx, nbytes);
1989 if (bp != NULL) {
1990 memcpy(bp, p, size);
1991 _PyObject_Free(ctx, p);
1992 }
1993 *newptr_p = bp;
1994 return 1;
1995 }
1996
1997
1998 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1999 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2000 {
2001 void *ptr2;
2002
2003 if (ptr == NULL) {
2004 return _PyObject_Malloc(ctx, nbytes);
2005 }
2006
2007 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2008 return ptr2;
2009 }
2010
2011 return PyMem_RawRealloc(ptr, nbytes);
2012 }
2013
2014 #else /* ! WITH_PYMALLOC */
2015
2016 /*==========================================================================*/
2017 /* pymalloc not enabled: Redirect the entry points to malloc. These will
2018 * only be used by extensions that are compiled with pymalloc enabled. */
2019
2020 Py_ssize_t
_Py_GetAllocatedBlocks(void)2021 _Py_GetAllocatedBlocks(void)
2022 {
2023 return 0;
2024 }
2025
2026 #endif /* WITH_PYMALLOC */
2027
2028
2029 /*==========================================================================*/
2030 /* A x-platform debugging allocator. This doesn't manage memory directly,
2031 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2032 */
2033
2034 /* Uncomment this define to add the "serialno" field */
2035 /* #define PYMEM_DEBUG_SERIALNO */
2036
2037 #ifdef PYMEM_DEBUG_SERIALNO
2038 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
2039
2040 /* serialno is always incremented via calling this routine. The point is
2041 * to supply a single place to set a breakpoint.
2042 */
2043 static void
bumpserialno(void)2044 bumpserialno(void)
2045 {
2046 ++serialno;
2047 }
2048 #endif
2049
2050 #define SST SIZEOF_SIZE_T
2051
2052 #ifdef PYMEM_DEBUG_SERIALNO
2053 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2054 #else
2055 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2056 #endif
2057
2058 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2059 static size_t
read_size_t(const void * p)2060 read_size_t(const void *p)
2061 {
2062 const uint8_t *q = (const uint8_t *)p;
2063 size_t result = *q++;
2064 int i;
2065
2066 for (i = SST; --i > 0; ++q)
2067 result = (result << 8) | *q;
2068 return result;
2069 }
2070
2071 /* Write n as a big-endian size_t, MSB at address p, LSB at
2072 * p + sizeof(size_t) - 1.
2073 */
2074 static void
write_size_t(void * p,size_t n)2075 write_size_t(void *p, size_t n)
2076 {
2077 uint8_t *q = (uint8_t *)p + SST - 1;
2078 int i;
2079
2080 for (i = SST; --i >= 0; --q) {
2081 *q = (uint8_t)(n & 0xff);
2082 n >>= 8;
2083 }
2084 }
2085
2086 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2087 fills them with useful stuff, here calling the underlying malloc's result p:
2088
2089 p[0: S]
2090 Number of bytes originally asked for. This is a size_t, big-endian (easier
2091 to read in a memory dump).
2092 p[S]
2093 API ID. See PEP 445. This is a character, but seems undocumented.
2094 p[S+1: 2*S]
2095 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2096 p[2*S: 2*S+n]
2097 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2098 Used to catch reference to uninitialized memory.
2099 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2100 handled the request itself.
2101 p[2*S+n: 2*S+n+S]
2102 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2103 p[2*S+n+S: 2*S+n+2*S]
2104 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2105 and _PyMem_DebugRealloc.
2106 This is a big-endian size_t.
2107 If "bad memory" is detected later, the serial number gives an
2108 excellent way to set a breakpoint on the next run, to capture the
2109 instant at which this block was passed out.
2110
2111 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2112 for 3 * S extra bytes, and omits the last serialno field.
2113 */
2114
2115 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2116 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2117 {
2118 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2119 uint8_t *p; /* base address of malloc'ed pad block */
2120 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2121 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2122 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2123
2124 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2125 /* integer overflow: can't represent total as a Py_ssize_t */
2126 return NULL;
2127 }
2128 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2129
2130 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2131 ^--- p ^--- data ^--- tail
2132 S: nbytes stored as size_t
2133 I: API identifier (1 byte)
2134 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2135 C: Clean bytes used later to store actual data
2136 N: Serial number stored as size_t
2137
2138 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2139 is omitted. */
2140
2141 if (use_calloc) {
2142 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2143 }
2144 else {
2145 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2146 }
2147 if (p == NULL) {
2148 return NULL;
2149 }
2150 data = p + 2*SST;
2151
2152 #ifdef PYMEM_DEBUG_SERIALNO
2153 bumpserialno();
2154 #endif
2155
2156 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2157 write_size_t(p, nbytes);
2158 p[SST] = (uint8_t)api->api_id;
2159 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2160
2161 if (nbytes > 0 && !use_calloc) {
2162 memset(data, PYMEM_CLEANBYTE, nbytes);
2163 }
2164
2165 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2166 tail = data + nbytes;
2167 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2168 #ifdef PYMEM_DEBUG_SERIALNO
2169 write_size_t(tail + SST, serialno);
2170 #endif
2171
2172 return data;
2173 }
2174
2175 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2176 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2177 {
2178 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2179 }
2180
2181 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2182 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2183 {
2184 size_t nbytes;
2185 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2186 nbytes = nelem * elsize;
2187 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2188 }
2189
2190
2191 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2192 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2193 Then fills the original bytes with PYMEM_DEADBYTE.
2194 Then calls the underlying free.
2195 */
2196 static void
_PyMem_DebugRawFree(void * ctx,void * p)2197 _PyMem_DebugRawFree(void *ctx, void *p)
2198 {
2199 /* PyMem_Free(NULL) has no effect */
2200 if (p == NULL) {
2201 return;
2202 }
2203
2204 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2205 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2206 size_t nbytes;
2207
2208 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2209 nbytes = read_size_t(q);
2210 nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2211 memset(q, PYMEM_DEADBYTE, nbytes);
2212 api->alloc.free(api->alloc.ctx, q);
2213 }
2214
2215
2216 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2217 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2218 {
2219 if (p == NULL) {
2220 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2221 }
2222
2223 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2224 uint8_t *head; /* base address of malloc'ed pad block */
2225 uint8_t *data; /* pointer to data bytes */
2226 uint8_t *r;
2227 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2228 size_t total; /* 2 * SST + nbytes + 2 * SST */
2229 size_t original_nbytes;
2230 #define ERASED_SIZE 64
2231 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2232
2233 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2234
2235 data = (uint8_t *)p;
2236 head = data - 2*SST;
2237 original_nbytes = read_size_t(head);
2238 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2239 /* integer overflow: can't represent total as a Py_ssize_t */
2240 return NULL;
2241 }
2242 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2243
2244 tail = data + original_nbytes;
2245 #ifdef PYMEM_DEBUG_SERIALNO
2246 size_t block_serialno = read_size_t(tail + SST);
2247 #endif
2248 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2249 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2250 */
2251 if (original_nbytes <= sizeof(save)) {
2252 memcpy(save, data, original_nbytes);
2253 memset(data - 2 * SST, PYMEM_DEADBYTE,
2254 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2255 }
2256 else {
2257 memcpy(save, data, ERASED_SIZE);
2258 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2259 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2260 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2261 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2262 }
2263
2264 /* Resize and add decorations. */
2265 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2266 if (r == NULL) {
2267 /* if realloc() failed: rewrite header and footer which have
2268 just been erased */
2269 nbytes = original_nbytes;
2270 }
2271 else {
2272 head = r;
2273 #ifdef PYMEM_DEBUG_SERIALNO
2274 bumpserialno();
2275 block_serialno = serialno;
2276 #endif
2277 }
2278 data = head + 2*SST;
2279
2280 write_size_t(head, nbytes);
2281 head[SST] = (uint8_t)api->api_id;
2282 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2283
2284 tail = data + nbytes;
2285 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2286 #ifdef PYMEM_DEBUG_SERIALNO
2287 write_size_t(tail + SST, block_serialno);
2288 #endif
2289
2290 /* Restore saved bytes. */
2291 if (original_nbytes <= sizeof(save)) {
2292 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2293 }
2294 else {
2295 size_t i = original_nbytes - ERASED_SIZE;
2296 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2297 if (nbytes > i) {
2298 memcpy(data + i, &save[ERASED_SIZE],
2299 Py_MIN(nbytes - i, ERASED_SIZE));
2300 }
2301 }
2302
2303 if (r == NULL) {
2304 return NULL;
2305 }
2306
2307 if (nbytes > original_nbytes) {
2308 /* growing: mark new extra memory clean */
2309 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2310 nbytes - original_nbytes);
2311 }
2312
2313 return data;
2314 }
2315
2316 static inline void
_PyMem_DebugCheckGIL(const char * func)2317 _PyMem_DebugCheckGIL(const char *func)
2318 {
2319 if (!PyGILState_Check()) {
2320 _Py_FatalErrorFunc(func,
2321 "Python memory allocator called "
2322 "without holding the GIL");
2323 }
2324 }
2325
2326 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2327 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2328 {
2329 _PyMem_DebugCheckGIL(__func__);
2330 return _PyMem_DebugRawMalloc(ctx, nbytes);
2331 }
2332
2333 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2334 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2335 {
2336 _PyMem_DebugCheckGIL(__func__);
2337 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2338 }
2339
2340
2341 static void
_PyMem_DebugFree(void * ctx,void * ptr)2342 _PyMem_DebugFree(void *ctx, void *ptr)
2343 {
2344 _PyMem_DebugCheckGIL(__func__);
2345 _PyMem_DebugRawFree(ctx, ptr);
2346 }
2347
2348
2349 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2350 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2351 {
2352 _PyMem_DebugCheckGIL(__func__);
2353 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2354 }
2355
2356 /* Check the forbidden bytes on both ends of the memory allocated for p.
2357 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2358 * and call Py_FatalError to kill the program.
2359 * The API id, is also checked.
2360 */
2361 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2362 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2363 {
2364 assert(p != NULL);
2365
2366 const uint8_t *q = (const uint8_t *)p;
2367 size_t nbytes;
2368 const uint8_t *tail;
2369 int i;
2370 char id;
2371
2372 /* Check the API id */
2373 id = (char)q[-SST];
2374 if (id != api) {
2375 _PyObject_DebugDumpAddress(p);
2376 _Py_FatalErrorFormat(func,
2377 "bad ID: Allocated using API '%c', "
2378 "verified using API '%c'",
2379 id, api);
2380 }
2381
2382 /* Check the stuff at the start of p first: if there's underwrite
2383 * corruption, the number-of-bytes field may be nuts, and checking
2384 * the tail could lead to a segfault then.
2385 */
2386 for (i = SST-1; i >= 1; --i) {
2387 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2388 _PyObject_DebugDumpAddress(p);
2389 _Py_FatalErrorFunc(func, "bad leading pad byte");
2390 }
2391 }
2392
2393 nbytes = read_size_t(q - 2*SST);
2394 tail = q + nbytes;
2395 for (i = 0; i < SST; ++i) {
2396 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2397 _PyObject_DebugDumpAddress(p);
2398 _Py_FatalErrorFunc(func, "bad trailing pad byte");
2399 }
2400 }
2401 }
2402
2403 /* Display info to stderr about the memory block at p. */
2404 static void
_PyObject_DebugDumpAddress(const void * p)2405 _PyObject_DebugDumpAddress(const void *p)
2406 {
2407 const uint8_t *q = (const uint8_t *)p;
2408 const uint8_t *tail;
2409 size_t nbytes;
2410 int i;
2411 int ok;
2412 char id;
2413
2414 fprintf(stderr, "Debug memory block at address p=%p:", p);
2415 if (p == NULL) {
2416 fprintf(stderr, "\n");
2417 return;
2418 }
2419 id = (char)q[-SST];
2420 fprintf(stderr, " API '%c'\n", id);
2421
2422 nbytes = read_size_t(q - 2*SST);
2423 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
2424 "requested\n", nbytes);
2425
2426 /* In case this is nuts, check the leading pad bytes first. */
2427 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2428 ok = 1;
2429 for (i = 1; i <= SST-1; ++i) {
2430 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2431 ok = 0;
2432 break;
2433 }
2434 }
2435 if (ok)
2436 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2437 else {
2438 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2439 PYMEM_FORBIDDENBYTE);
2440 for (i = SST-1; i >= 1; --i) {
2441 const uint8_t byte = *(q-i);
2442 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2443 if (byte != PYMEM_FORBIDDENBYTE)
2444 fputs(" *** OUCH", stderr);
2445 fputc('\n', stderr);
2446 }
2447
2448 fputs(" Because memory is corrupted at the start, the "
2449 "count of bytes requested\n"
2450 " may be bogus, and checking the trailing pad "
2451 "bytes may segfault.\n", stderr);
2452 }
2453
2454 tail = q + nbytes;
2455 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
2456 ok = 1;
2457 for (i = 0; i < SST; ++i) {
2458 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2459 ok = 0;
2460 break;
2461 }
2462 }
2463 if (ok)
2464 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2465 else {
2466 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2467 PYMEM_FORBIDDENBYTE);
2468 for (i = 0; i < SST; ++i) {
2469 const uint8_t byte = tail[i];
2470 fprintf(stderr, " at tail+%d: 0x%02x",
2471 i, byte);
2472 if (byte != PYMEM_FORBIDDENBYTE)
2473 fputs(" *** OUCH", stderr);
2474 fputc('\n', stderr);
2475 }
2476 }
2477
2478 #ifdef PYMEM_DEBUG_SERIALNO
2479 size_t serial = read_size_t(tail + SST);
2480 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
2481 "u to debug malloc/realloc.\n", serial);
2482 #endif
2483
2484 if (nbytes > 0) {
2485 i = 0;
2486 fputs(" Data at p:", stderr);
2487 /* print up to 8 bytes at the start */
2488 while (q < tail && i < 8) {
2489 fprintf(stderr, " %02x", *q);
2490 ++i;
2491 ++q;
2492 }
2493 /* and up to 8 at the end */
2494 if (q < tail) {
2495 if (tail - q > 8) {
2496 fputs(" ...", stderr);
2497 q = tail - 8;
2498 }
2499 while (q < tail) {
2500 fprintf(stderr, " %02x", *q);
2501 ++q;
2502 }
2503 }
2504 fputc('\n', stderr);
2505 }
2506 fputc('\n', stderr);
2507
2508 fflush(stderr);
2509 _PyMem_DumpTraceback(fileno(stderr), p);
2510 }
2511
2512
2513 static size_t
printone(FILE * out,const char * msg,size_t value)2514 printone(FILE *out, const char* msg, size_t value)
2515 {
2516 int i, k;
2517 char buf[100];
2518 size_t origvalue = value;
2519
2520 fputs(msg, out);
2521 for (i = (int)strlen(msg); i < 35; ++i)
2522 fputc(' ', out);
2523 fputc('=', out);
2524
2525 /* Write the value with commas. */
2526 i = 22;
2527 buf[i--] = '\0';
2528 buf[i--] = '\n';
2529 k = 3;
2530 do {
2531 size_t nextvalue = value / 10;
2532 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2533 value = nextvalue;
2534 buf[i--] = (char)(digit + '0');
2535 --k;
2536 if (k == 0 && value && i >= 0) {
2537 k = 3;
2538 buf[i--] = ',';
2539 }
2540 } while (value && i >= 0);
2541
2542 while (i >= 0)
2543 buf[i--] = ' ';
2544 fputs(buf, out);
2545
2546 return origvalue;
2547 }
2548
2549 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2550 _PyDebugAllocatorStats(FILE *out,
2551 const char *block_name, int num_blocks, size_t sizeof_block)
2552 {
2553 char buf1[128];
2554 char buf2[128];
2555 PyOS_snprintf(buf1, sizeof(buf1),
2556 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2557 num_blocks, block_name, sizeof_block);
2558 PyOS_snprintf(buf2, sizeof(buf2),
2559 "%48s ", buf1);
2560 (void)printone(out, buf2, num_blocks * sizeof_block);
2561 }
2562
2563
2564 #ifdef WITH_PYMALLOC
2565
2566 #ifdef Py_DEBUG
2567 /* Is target in the list? The list is traversed via the nextpool pointers.
2568 * The list may be NULL-terminated, or circular. Return 1 if target is in
2569 * list, else 0.
2570 */
2571 static int
pool_is_in_list(const poolp target,poolp list)2572 pool_is_in_list(const poolp target, poolp list)
2573 {
2574 poolp origlist = list;
2575 assert(target != NULL);
2576 if (list == NULL)
2577 return 0;
2578 do {
2579 if (target == list)
2580 return 1;
2581 list = list->nextpool;
2582 } while (list != NULL && list != origlist);
2583 return 0;
2584 }
2585 #endif
2586
2587 /* Print summary info to "out" about the state of pymalloc's structures.
2588 * In Py_DEBUG mode, also perform some expensive internal consistency
2589 * checks.
2590 *
2591 * Return 0 if the memory debug hooks are not installed or no statistics was
2592 * written into out, return 1 otherwise.
2593 */
2594 int
_PyObject_DebugMallocStats(FILE * out)2595 _PyObject_DebugMallocStats(FILE *out)
2596 {
2597 if (!_PyMem_PymallocEnabled()) {
2598 return 0;
2599 }
2600
2601 uint i;
2602 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2603 /* # of pools, allocated blocks, and free blocks per class index */
2604 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2605 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2606 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2607 /* total # of allocated bytes in used and full pools */
2608 size_t allocated_bytes = 0;
2609 /* total # of available bytes in used pools */
2610 size_t available_bytes = 0;
2611 /* # of free pools + pools not yet carved out of current arena */
2612 uint numfreepools = 0;
2613 /* # of bytes for arena alignment padding */
2614 size_t arena_alignment = 0;
2615 /* # of bytes in used and full pools used for pool_headers */
2616 size_t pool_header_bytes = 0;
2617 /* # of bytes in used and full pools wasted due to quantization,
2618 * i.e. the necessarily leftover space at the ends of used and
2619 * full pools.
2620 */
2621 size_t quantization = 0;
2622 /* # of arenas actually allocated. */
2623 size_t narenas = 0;
2624 /* running total -- should equal narenas * ARENA_SIZE */
2625 size_t total;
2626 char buf[128];
2627
2628 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2629 SMALL_REQUEST_THRESHOLD, numclasses);
2630
2631 for (i = 0; i < numclasses; ++i)
2632 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2633
2634 /* Because full pools aren't linked to from anything, it's easiest
2635 * to march over all the arenas. If we're lucky, most of the memory
2636 * will be living in full pools -- would be a shame to miss them.
2637 */
2638 for (i = 0; i < maxarenas; ++i) {
2639 uint j;
2640 uintptr_t base = arenas[i].address;
2641
2642 /* Skip arenas which are not allocated. */
2643 if (arenas[i].address == (uintptr_t)NULL)
2644 continue;
2645 narenas += 1;
2646
2647 numfreepools += arenas[i].nfreepools;
2648
2649 /* round up to pool alignment */
2650 if (base & (uintptr_t)POOL_SIZE_MASK) {
2651 arena_alignment += POOL_SIZE;
2652 base &= ~(uintptr_t)POOL_SIZE_MASK;
2653 base += POOL_SIZE;
2654 }
2655
2656 /* visit every pool in the arena */
2657 assert(base <= (uintptr_t) arenas[i].pool_address);
2658 for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2659 ++j, base += POOL_SIZE) {
2660 poolp p = (poolp)base;
2661 const uint sz = p->szidx;
2662 uint freeblocks;
2663
2664 if (p->ref.count == 0) {
2665 /* currently unused */
2666 #ifdef Py_DEBUG
2667 assert(pool_is_in_list(p, arenas[i].freepools));
2668 #endif
2669 continue;
2670 }
2671 ++numpools[sz];
2672 numblocks[sz] += p->ref.count;
2673 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2674 numfreeblocks[sz] += freeblocks;
2675 #ifdef Py_DEBUG
2676 if (freeblocks > 0)
2677 assert(pool_is_in_list(p, usedpools[sz + sz]));
2678 #endif
2679 }
2680 }
2681 assert(narenas == narenas_currently_allocated);
2682
2683 fputc('\n', out);
2684 fputs("class size num pools blocks in use avail blocks\n"
2685 "----- ---- --------- ------------- ------------\n",
2686 out);
2687
2688 for (i = 0; i < numclasses; ++i) {
2689 size_t p = numpools[i];
2690 size_t b = numblocks[i];
2691 size_t f = numfreeblocks[i];
2692 uint size = INDEX2SIZE(i);
2693 if (p == 0) {
2694 assert(b == 0 && f == 0);
2695 continue;
2696 }
2697 fprintf(out, "%5u %6u "
2698 "%11" PY_FORMAT_SIZE_T "u "
2699 "%15" PY_FORMAT_SIZE_T "u "
2700 "%13" PY_FORMAT_SIZE_T "u\n",
2701 i, size, p, b, f);
2702 allocated_bytes += b * size;
2703 available_bytes += f * size;
2704 pool_header_bytes += p * POOL_OVERHEAD;
2705 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2706 }
2707 fputc('\n', out);
2708 #ifdef PYMEM_DEBUG_SERIALNO
2709 if (_PyMem_DebugEnabled()) {
2710 (void)printone(out, "# times object malloc called", serialno);
2711 }
2712 #endif
2713 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2714 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2715 (void)printone(out, "# arenas highwater mark", narenas_highwater);
2716 (void)printone(out, "# arenas allocated current", narenas);
2717
2718 PyOS_snprintf(buf, sizeof(buf),
2719 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2720 narenas, ARENA_SIZE);
2721 (void)printone(out, buf, narenas * ARENA_SIZE);
2722
2723 fputc('\n', out);
2724
2725 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2726 total += printone(out, "# bytes in available blocks", available_bytes);
2727
2728 PyOS_snprintf(buf, sizeof(buf),
2729 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2730 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2731
2732 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2733 total += printone(out, "# bytes lost to quantization", quantization);
2734 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2735 (void)printone(out, "Total", total);
2736 return 1;
2737 }
2738
2739 #endif /* #ifdef WITH_PYMALLOC */
2740