1 #include "Python.h"
2 #include "pycore_pymem.h"
3
4 #include <stdbool.h>
5
6
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9
10
11 /* Python's malloc wrappers (see pymem.h) */
12
13 #undef uint
14 #define uint unsigned int /* assuming >= 16 bits */
15
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
29
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31
32 #if defined(__has_feature) /* Clang */
33 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
35 __attribute__((no_address_safety_analysis))
36 # endif
37 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
38 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 # endif
40 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
41 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 # endif
43 #elif defined(__GNUC__)
44 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
45 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
46 __attribute__((no_address_safety_analysis))
47 # endif
48 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49 // is provided only since GCC 7.
50 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 # endif
53 #endif
54
55 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS
56 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 # define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 # define _Py_NO_SANITIZE_MEMORY
63 #endif
64
65 #ifdef WITH_PYMALLOC
66
67 #ifdef MS_WINDOWS
68 # include <windows.h>
69 #elif defined(HAVE_MMAP)
70 # include <sys/mman.h>
71 # ifdef MAP_ANONYMOUS
72 # define ARENAS_USE_MMAP
73 # endif
74 #endif
75
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82
83
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86 library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88
89
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94 for malloc(0), which would be treated as an error. Some platforms would
95 return a pointer with no memory behind it, which would break pymalloc.
96 To solve these problems, allocate an extra byte. */
97 if (size == 0)
98 size = 1;
99 return malloc(size);
100 }
101
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106 for calloc(0, 0), which would be treated as an error. Some platforms
107 would return a pointer with no memory behind it, which would break
108 pymalloc. To solve these problems, allocate an extra byte. */
109 if (nelem == 0 || elsize == 0) {
110 nelem = 1;
111 elsize = 1;
112 }
113 return calloc(nelem, elsize);
114 }
115
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119 if (size == 0)
120 size = 1;
121 return realloc(ptr, size);
122 }
123
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127 free(ptr);
128 }
129
130
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135 return VirtualAlloc(NULL, size,
136 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142 VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149 void *ptr;
150 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152 if (ptr == MAP_FAILED)
153 return NULL;
154 assert(ptr != NULL);
155 return ptr;
156 }
157
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161 munmap(ptr, size);
162 }
163
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168 return malloc(size);
169 }
170
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174 free(ptr);
175 }
176 #endif
177
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 # define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 # define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190
191 typedef struct {
192 /* We tag each block with an API ID in order to tag API violations */
193 char api_id;
194 PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197 debug_alloc_api_t raw;
198 debug_alloc_api_t mem;
199 debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201 {'r', PYRAW_ALLOC},
202 {'m', PYMEM_ALLOC},
203 {'o', PYOBJ_ALLOC}
204 };
205
206 #define PYDBGRAW_ALLOC \
207 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222
223
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226 PyMemAllocatorEx *old_alloc)
227 {
228 if (old_alloc != NULL) {
229 PyMem_GetAllocator(domain, old_alloc);
230 }
231
232
233 PyMemAllocatorEx new_alloc;
234 switch(domain)
235 {
236 case PYMEM_DOMAIN_RAW:
237 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238 break;
239 case PYMEM_DOMAIN_MEM:
240 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241 break;
242 case PYMEM_DOMAIN_OBJ:
243 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244 break;
245 default:
246 /* unknown domain */
247 return -1;
248 }
249 PyMem_SetAllocator(domain, &new_alloc);
250 if (debug) {
251 _PyMem_SetupDebugHooksDomain(domain);
252 }
253 return 0;
254 }
255
256
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259 PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262 const int debug = 1;
263 #else
264 const int debug = 0;
265 #endif
266 return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268
269
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273 if (name == NULL || *name == '\0') {
274 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275 nameions): use default memory allocators */
276 *allocator = PYMEM_ALLOCATOR_DEFAULT;
277 }
278 else if (strcmp(name, "default") == 0) {
279 *allocator = PYMEM_ALLOCATOR_DEFAULT;
280 }
281 else if (strcmp(name, "debug") == 0) {
282 *allocator = PYMEM_ALLOCATOR_DEBUG;
283 }
284 #ifdef WITH_PYMALLOC
285 else if (strcmp(name, "pymalloc") == 0) {
286 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287 }
288 else if (strcmp(name, "pymalloc_debug") == 0) {
289 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290 }
291 #endif
292 else if (strcmp(name, "malloc") == 0) {
293 *allocator = PYMEM_ALLOCATOR_MALLOC;
294 }
295 else if (strcmp(name, "malloc_debug") == 0) {
296 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297 }
298 else {
299 /* unknown allocator */
300 return -1;
301 }
302 return 0;
303 }
304
305
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309 switch (allocator) {
310 case PYMEM_ALLOCATOR_NOT_SET:
311 /* do nothing */
312 break;
313
314 case PYMEM_ALLOCATOR_DEFAULT:
315 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318 break;
319
320 case PYMEM_ALLOCATOR_DEBUG:
321 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324 break;
325
326 #ifdef WITH_PYMALLOC
327 case PYMEM_ALLOCATOR_PYMALLOC:
328 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329 {
330 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332
333 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336
337 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338 PyMem_SetupDebugHooks();
339 }
340 break;
341 }
342 #endif
343
344 case PYMEM_ALLOCATOR_MALLOC:
345 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346 {
347 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351
352 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353 PyMem_SetupDebugHooks();
354 }
355 break;
356 }
357
358 default:
359 /* unknown allocator */
360 return -1;
361 }
362 return 0;
363 }
364
365
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371
372
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380
381 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383 pymemallocator_eq(&_PyObject, &malloc_alloc))
384 {
385 return "malloc";
386 }
387 #ifdef WITH_PYMALLOC
388 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389 pymemallocator_eq(&_PyMem, &pymalloc) &&
390 pymemallocator_eq(&_PyObject, &pymalloc))
391 {
392 return "pymalloc";
393 }
394 #endif
395
396 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399
400 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401 pymemallocator_eq(&_PyMem, &dbg_mem) &&
402 pymemallocator_eq(&_PyObject, &dbg_obj))
403 {
404 /* Debug hooks installed */
405 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408 {
409 return "malloc_debug";
410 }
411 #ifdef WITH_PYMALLOC
412 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415 {
416 return "pymalloc_debug";
417 }
418 #endif
419 }
420 return NULL;
421 }
422
423
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432
433
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440 _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442 };
443
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448 return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454 if (_PyMem_DebugEnabled()) {
455 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456 }
457 else {
458 return (_PyObject.malloc == _PyObject_Malloc);
459 }
460 }
461 #endif
462
463
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467 PyMemAllocatorEx alloc;
468
469 if (domain == PYMEM_DOMAIN_RAW) {
470 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471 return;
472 }
473
474 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475 alloc.ctx = &_PyMem_Debug.raw;
476 alloc.malloc = _PyMem_DebugRawMalloc;
477 alloc.calloc = _PyMem_DebugRawCalloc;
478 alloc.realloc = _PyMem_DebugRawRealloc;
479 alloc.free = _PyMem_DebugRawFree;
480 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481 }
482 else if (domain == PYMEM_DOMAIN_MEM) {
483 if (_PyMem.malloc == _PyMem_DebugMalloc) {
484 return;
485 }
486
487 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488 alloc.ctx = &_PyMem_Debug.mem;
489 alloc.malloc = _PyMem_DebugMalloc;
490 alloc.calloc = _PyMem_DebugCalloc;
491 alloc.realloc = _PyMem_DebugRealloc;
492 alloc.free = _PyMem_DebugFree;
493 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494 }
495 else if (domain == PYMEM_DOMAIN_OBJ) {
496 if (_PyObject.malloc == _PyMem_DebugMalloc) {
497 return;
498 }
499
500 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501 alloc.ctx = &_PyMem_Debug.obj;
502 alloc.malloc = _PyMem_DebugMalloc;
503 alloc.calloc = _PyMem_DebugCalloc;
504 alloc.realloc = _PyMem_DebugRealloc;
505 alloc.free = _PyMem_DebugFree;
506 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507 }
508 }
509
510
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522 switch(domain)
523 {
524 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527 default:
528 /* unknown domain: set all attributes to NULL */
529 allocator->ctx = NULL;
530 allocator->malloc = NULL;
531 allocator->calloc = NULL;
532 allocator->realloc = NULL;
533 allocator->free = NULL;
534 }
535 }
536
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540 switch(domain)
541 {
542 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545 /* ignore unknown domain */
546 }
547 }
548
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552 *allocator = _PyObject_Arena;
553 }
554
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558 _PyObject_Arena = *allocator;
559 }
560
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564 /*
565 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566 * Most python internals blindly use a signed Py_ssize_t to track
567 * things without checking for overflows or negatives.
568 * As size_t is unsigned, checking for size < 0 is not required.
569 */
570 if (size > (size_t)PY_SSIZE_T_MAX)
571 return NULL;
572 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578 /* see PyMem_RawMalloc() */
579 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580 return NULL;
581 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587 /* see PyMem_RawMalloc() */
588 if (new_size > (size_t)PY_SSIZE_T_MAX)
589 return NULL;
590 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597
598
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602 /* see PyMem_RawMalloc() */
603 if (size > (size_t)PY_SSIZE_T_MAX)
604 return NULL;
605 return _PyMem.malloc(_PyMem.ctx, size);
606 }
607
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611 /* see PyMem_RawMalloc() */
612 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613 return NULL;
614 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620 /* see PyMem_RawMalloc() */
621 if (new_size > (size_t)PY_SSIZE_T_MAX)
622 return NULL;
623 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629 _PyMem.free(_PyMem.ctx, ptr);
630 }
631
632
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636 assert(str != NULL);
637
638 size_t len = wcslen(str);
639 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640 return NULL;
641 }
642
643 size_t size = (len + 1) * sizeof(wchar_t);
644 wchar_t *str2 = PyMem_RawMalloc(size);
645 if (str2 == NULL) {
646 return NULL;
647 }
648
649 memcpy(str2, str, size);
650 return str2;
651 }
652
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656 assert(str != NULL);
657 size_t size = strlen(str) + 1;
658 char *copy = PyMem_RawMalloc(size);
659 if (copy == NULL) {
660 return NULL;
661 }
662 memcpy(copy, str, size);
663 return copy;
664 }
665
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669 assert(str != NULL);
670 size_t size = strlen(str) + 1;
671 char *copy = PyMem_Malloc(size);
672 if (copy == NULL) {
673 return NULL;
674 }
675 memcpy(copy, str, size);
676 return copy;
677 }
678
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682 /* see PyMem_RawMalloc() */
683 if (size > (size_t)PY_SSIZE_T_MAX)
684 return NULL;
685 return _PyObject.malloc(_PyObject.ctx, size);
686 }
687
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691 /* see PyMem_RawMalloc() */
692 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693 return NULL;
694 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700 /* see PyMem_RawMalloc() */
701 if (new_size > (size_t)PY_SSIZE_T_MAX)
702 return NULL;
703 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709 _PyObject.free(_PyObject.ctx, ptr);
710 }
711
712
713 #ifdef WITH_PYMALLOC
714
715 #ifdef WITH_VALGRIND
716 #include <valgrind/valgrind.h>
717
718 /* If we're using GCC, use __builtin_expect() to reduce overhead of
719 the valgrind checks */
720 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
721 # define UNLIKELY(value) __builtin_expect((value), 0)
722 #else
723 # define UNLIKELY(value) (value)
724 #endif
725
726 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
727 static int running_on_valgrind = -1;
728 #endif
729
730
731 /* An object allocator for Python.
732
733 Here is an introduction to the layers of the Python memory architecture,
734 showing where the object allocator is actually used (layer +2), It is
735 called for every object allocation and deallocation (PyObject_New/Del),
736 unless the object-specific allocators implement a proprietary allocation
737 scheme (ex.: ints use a simple free list). This is also the place where
738 the cyclic garbage collector operates selectively on container objects.
739
740
741 Object-specific allocators
742 _____ ______ ______ ________
743 [ int ] [ dict ] [ list ] ... [ string ] Python core |
744 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
745 _______________________________ | |
746 [ Python's object allocator ] | |
747 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
748 ______________________________________________________________ |
749 [ Python's raw memory allocator (PyMem_ API) ] |
750 +1 | <----- Python memory (under PyMem manager's control) ------> | |
751 __________________________________________________________________
752 [ Underlying general-purpose allocator (ex: C library malloc) ]
753 0 | <------ Virtual memory allocated for the python process -------> |
754
755 =========================================================================
756 _______________________________________________________________________
757 [ OS-specific Virtual Memory Manager (VMM) ]
758 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
759 __________________________________ __________________________________
760 [ ] [ ]
761 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
762
763 */
764 /*==========================================================================*/
765
766 /* A fast, special-purpose memory allocator for small blocks, to be used
767 on top of a general-purpose malloc -- heavily based on previous art. */
768
769 /* Vladimir Marangozov -- August 2000 */
770
771 /*
772 * "Memory management is where the rubber meets the road -- if we do the wrong
773 * thing at any level, the results will not be good. And if we don't make the
774 * levels work well together, we are in serious trouble." (1)
775 *
776 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
777 * "Dynamic Storage Allocation: A Survey and Critical Review",
778 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
779 */
780
781 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
782
783 /*==========================================================================*/
784
785 /*
786 * Allocation strategy abstract:
787 *
788 * For small requests, the allocator sub-allocates <Big> blocks of memory.
789 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
790 * system's allocator.
791 *
792 * Small requests are grouped in size classes spaced 8 bytes apart, due
793 * to the required valid alignment of the returned address. Requests of
794 * a particular size are serviced from memory pools of 4K (one VMM page).
795 * Pools are fragmented on demand and contain free lists of blocks of one
796 * particular size class. In other words, there is a fixed-size allocator
797 * for each size class. Free pools are shared by the different allocators
798 * thus minimizing the space reserved for a particular size class.
799 *
800 * This allocation strategy is a variant of what is known as "simple
801 * segregated storage based on array of free lists". The main drawback of
802 * simple segregated storage is that we might end up with lot of reserved
803 * memory for the different free lists, which degenerate in time. To avoid
804 * this, we partition each free list in pools and we share dynamically the
805 * reserved space between all free lists. This technique is quite efficient
806 * for memory intensive programs which allocate mainly small-sized blocks.
807 *
808 * For small requests we have the following table:
809 *
810 * Request in bytes Size of allocated block Size class idx
811 * ----------------------------------------------------------------
812 * 1-8 8 0
813 * 9-16 16 1
814 * 17-24 24 2
815 * 25-32 32 3
816 * 33-40 40 4
817 * 41-48 48 5
818 * 49-56 56 6
819 * 57-64 64 7
820 * 65-72 72 8
821 * ... ... ...
822 * 497-504 504 62
823 * 505-512 512 63
824 *
825 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
826 * allocator.
827 */
828
829 /*==========================================================================*/
830
831 /*
832 * -- Main tunable settings section --
833 */
834
835 /*
836 * Alignment of addresses returned to the user. 8-bytes alignment works
837 * on most current architectures (with 32-bit or 64-bit address busses).
838 * The alignment value is also used for grouping small requests in size
839 * classes spaced ALIGNMENT bytes apart.
840 *
841 * You shouldn't change this unless you know what you are doing.
842 */
843
844 #if SIZEOF_VOID_P > 4
845 #define ALIGNMENT 16 /* must be 2^N */
846 #define ALIGNMENT_SHIFT 4
847 #else
848 #define ALIGNMENT 8 /* must be 2^N */
849 #define ALIGNMENT_SHIFT 3
850 #endif
851
852 /* Return the number of bytes in size class I, as a uint. */
853 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
854
855 /*
856 * Max size threshold below which malloc requests are considered to be
857 * small enough in order to use preallocated memory pools. You can tune
858 * this value according to your application behaviour and memory needs.
859 *
860 * Note: a size threshold of 512 guarantees that newly created dictionaries
861 * will be allocated from preallocated memory pools on 64-bit.
862 *
863 * The following invariants must hold:
864 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
865 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
866 *
867 * Although not required, for better performance and space efficiency,
868 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
869 */
870 #define SMALL_REQUEST_THRESHOLD 512
871 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
872
873 /*
874 * The system's VMM page size can be obtained on most unices with a
875 * getpagesize() call or deduced from various header files. To make
876 * things simpler, we assume that it is 4K, which is OK for most systems.
877 * It is probably better if this is the native page size, but it doesn't
878 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
879 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
880 * violation fault. 4K is apparently OK for all the platforms that python
881 * currently targets.
882 */
883 #define SYSTEM_PAGE_SIZE (4 * 1024)
884 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
885
886 /*
887 * Maximum amount of memory managed by the allocator for small requests.
888 */
889 #ifdef WITH_MEMORY_LIMITS
890 #ifndef SMALL_MEMORY_LIMIT
891 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
892 #endif
893 #endif
894
895 /*
896 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
897 * on a page boundary. This is a reserved virtual address space for the
898 * current process (obtained through a malloc()/mmap() call). In no way this
899 * means that the memory arenas will be used entirely. A malloc(<Big>) is
900 * usually an address range reservation for <Big> bytes, unless all pages within
901 * this space are referenced subsequently. So malloc'ing big blocks and not
902 * using them does not mean "wasting memory". It's an addressable range
903 * wastage...
904 *
905 * Arenas are allocated with mmap() on systems supporting anonymous memory
906 * mappings to reduce heap fragmentation.
907 */
908 #define ARENA_SIZE (256 << 10) /* 256KB */
909
910 #ifdef WITH_MEMORY_LIMITS
911 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
912 #endif
913
914 /*
915 * Size of the pools used for small blocks. Should be a power of 2,
916 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
917 */
918 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
919 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
920
921 #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
922 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
923 # error "arena size not an exact multiple of pool size"
924 #endif
925
926 /*
927 * -- End of tunable settings section --
928 */
929
930 /*==========================================================================*/
931
932 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
933 typedef uint8_t block;
934
935 /* Pool for small blocks. */
936 struct pool_header {
937 union { block *_padding;
938 uint count; } ref; /* number of allocated blocks */
939 block *freeblock; /* pool's free list head */
940 struct pool_header *nextpool; /* next pool of this size class */
941 struct pool_header *prevpool; /* previous pool "" */
942 uint arenaindex; /* index into arenas of base adr */
943 uint szidx; /* block size class index */
944 uint nextoffset; /* bytes to virgin block */
945 uint maxnextoffset; /* largest valid nextoffset */
946 };
947
948 typedef struct pool_header *poolp;
949
950 /* Record keeping for arenas. */
951 struct arena_object {
952 /* The address of the arena, as returned by malloc. Note that 0
953 * will never be returned by a successful malloc, and is used
954 * here to mark an arena_object that doesn't correspond to an
955 * allocated arena.
956 */
957 uintptr_t address;
958
959 /* Pool-aligned pointer to the next pool to be carved off. */
960 block* pool_address;
961
962 /* The number of available pools in the arena: free pools + never-
963 * allocated pools.
964 */
965 uint nfreepools;
966
967 /* The total number of pools in the arena, whether or not available. */
968 uint ntotalpools;
969
970 /* Singly-linked list of available pools. */
971 struct pool_header* freepools;
972
973 /* Whenever this arena_object is not associated with an allocated
974 * arena, the nextarena member is used to link all unassociated
975 * arena_objects in the singly-linked `unused_arena_objects` list.
976 * The prevarena member is unused in this case.
977 *
978 * When this arena_object is associated with an allocated arena
979 * with at least one available pool, both members are used in the
980 * doubly-linked `usable_arenas` list, which is maintained in
981 * increasing order of `nfreepools` values.
982 *
983 * Else this arena_object is associated with an allocated arena
984 * all of whose pools are in use. `nextarena` and `prevarena`
985 * are both meaningless in this case.
986 */
987 struct arena_object* nextarena;
988 struct arena_object* prevarena;
989 };
990
991 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
992
993 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
994
995 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
996 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
997
998 /* Return total number of blocks in pool of size index I, as a uint. */
999 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1000
1001 /*==========================================================================*/
1002
1003 /*
1004 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1005
1006 This is involved. For an index i, usedpools[i+i] is the header for a list of
1007 all partially used pools holding small blocks with "size class idx" i. So
1008 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1009 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1010
1011 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1012 member) as needed. Once carved off, a pool is in one of three states forever
1013 after:
1014
1015 used == partially used, neither empty nor full
1016 At least one block in the pool is currently allocated, and at least one
1017 block in the pool is not currently allocated (note this implies a pool
1018 has room for at least two blocks).
1019 This is a pool's initial state, as a pool is created only when malloc
1020 needs space.
1021 The pool holds blocks of a fixed size, and is in the circular list headed
1022 at usedpools[i] (see above). It's linked to the other used pools of the
1023 same size class via the pool_header's nextpool and prevpool members.
1024 If all but one block is currently allocated, a malloc can cause a
1025 transition to the full state. If all but one block is not currently
1026 allocated, a free can cause a transition to the empty state.
1027
1028 full == all the pool's blocks are currently allocated
1029 On transition to full, a pool is unlinked from its usedpools[] list.
1030 It's not linked to from anything then anymore, and its nextpool and
1031 prevpool members are meaningless until it transitions back to used.
1032 A free of a block in a full pool puts the pool back in the used state.
1033 Then it's linked in at the front of the appropriate usedpools[] list, so
1034 that the next allocation for its size class will reuse the freed block.
1035
1036 empty == all the pool's blocks are currently available for allocation
1037 On transition to empty, a pool is unlinked from its usedpools[] list,
1038 and linked to the front of its arena_object's singly-linked freepools list,
1039 via its nextpool member. The prevpool member has no meaning in this case.
1040 Empty pools have no inherent size class: the next time a malloc finds
1041 an empty list in usedpools[], it takes the first pool off of freepools.
1042 If the size class needed happens to be the same as the size class the pool
1043 last had, some pool initialization can be skipped.
1044
1045
1046 Block Management
1047
1048 Blocks within pools are again carved out as needed. pool->freeblock points to
1049 the start of a singly-linked list of free blocks within the pool. When a
1050 block is freed, it's inserted at the front of its pool's freeblock list. Note
1051 that the available blocks in a pool are *not* linked all together when a pool
1052 is initialized. Instead only "the first two" (lowest addresses) blocks are
1053 set up, returning the first such block, and setting pool->freeblock to a
1054 one-block list holding the second such block. This is consistent with that
1055 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1056 of memory until it's actually needed.
1057
1058 So long as a pool is in the used state, we're certain there *is* a block
1059 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
1060 points to the end of the free list before we've carved the entire pool into
1061 blocks, that means we simply haven't yet gotten to one of the higher-address
1062 blocks. The offset from the pool_header to the start of "the next" virgin
1063 block is stored in the pool_header nextoffset member, and the largest value
1064 of nextoffset that makes sense is stored in the maxnextoffset member when a
1065 pool is initialized. All the blocks in a pool have been passed out at least
1066 once when and only when nextoffset > maxnextoffset.
1067
1068
1069 Major obscurity: While the usedpools vector is declared to have poolp
1070 entries, it doesn't really. It really contains two pointers per (conceptual)
1071 poolp entry, the nextpool and prevpool members of a pool_header. The
1072 excruciating initialization code below fools C so that
1073
1074 usedpool[i+i]
1075
1076 "acts like" a genuine poolp, but only so long as you only reference its
1077 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
1078 compensating for that a pool_header's nextpool and prevpool members
1079 immediately follow a pool_header's first two members:
1080
1081 union { block *_padding;
1082 uint count; } ref;
1083 block *freeblock;
1084
1085 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
1086 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1087 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1088 circular list is empty).
1089
1090 It's unclear why the usedpools setup is so convoluted. It could be to
1091 minimize the amount of cache required to hold this heavily-referenced table
1092 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1093 referencing code has to remember to "double the index" and doing so isn't
1094 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1095 on that C doesn't insert any padding anywhere in a pool_header at or before
1096 the prevpool member.
1097 **************************************************************************** */
1098
1099 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1100 #define PT(x) PTA(x), PTA(x)
1101
1102 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1103 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1104 #if NB_SMALL_SIZE_CLASSES > 8
1105 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1106 #if NB_SMALL_SIZE_CLASSES > 16
1107 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1108 #if NB_SMALL_SIZE_CLASSES > 24
1109 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1110 #if NB_SMALL_SIZE_CLASSES > 32
1111 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1112 #if NB_SMALL_SIZE_CLASSES > 40
1113 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1114 #if NB_SMALL_SIZE_CLASSES > 48
1115 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1116 #if NB_SMALL_SIZE_CLASSES > 56
1117 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1118 #if NB_SMALL_SIZE_CLASSES > 64
1119 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1120 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1121 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1122 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1123 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1124 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1125 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1126 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1127 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
1128 };
1129
1130 /*==========================================================================
1131 Arena management.
1132
1133 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
1134 which may not be currently used (== they're arena_objects that aren't
1135 currently associated with an allocated arena). Note that arenas proper are
1136 separately malloc'ed.
1137
1138 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
1139 we do try to free() arenas, and use some mild heuristic strategies to increase
1140 the likelihood that arenas eventually can be freed.
1141
1142 unused_arena_objects
1143
1144 This is a singly-linked list of the arena_objects that are currently not
1145 being used (no arena is associated with them). Objects are taken off the
1146 head of the list in new_arena(), and are pushed on the head of the list in
1147 PyObject_Free() when the arena is empty. Key invariant: an arena_object
1148 is on this list if and only if its .address member is 0.
1149
1150 usable_arenas
1151
1152 This is a doubly-linked list of the arena_objects associated with arenas
1153 that have pools available. These pools are either waiting to be reused,
1154 or have not been used before. The list is sorted to have the most-
1155 allocated arenas first (ascending order based on the nfreepools member).
1156 This means that the next allocation will come from a heavily used arena,
1157 which gives the nearly empty arenas a chance to be returned to the system.
1158 In my unscientific tests this dramatically improved the number of arenas
1159 that could be freed.
1160
1161 Note that an arena_object associated with an arena all of whose pools are
1162 currently in use isn't on either list.
1163
1164 Changed in Python 3.8: keeping usable_arenas sorted by number of free pools
1165 used to be done by one-at-a-time linear search when an arena's number of
1166 free pools changed. That could, overall, consume time quadratic in the
1167 number of arenas. That didn't really matter when there were only a few
1168 hundred arenas (typical!), but could be a timing disaster when there were
1169 hundreds of thousands. See bpo-37029.
1170
1171 Now we have a vector of "search fingers" to eliminate the need to search:
1172 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1173 with nfp free pools. This is NULL if and only if there is no arena with
1174 nfp free pools in usable_arenas.
1175 */
1176
1177 /* Array of objects used to track chunks of memory (arenas). */
1178 static struct arena_object* arenas = NULL;
1179 /* Number of slots currently allocated in the `arenas` vector. */
1180 static uint maxarenas = 0;
1181
1182 /* The head of the singly-linked, NULL-terminated list of available
1183 * arena_objects.
1184 */
1185 static struct arena_object* unused_arena_objects = NULL;
1186
1187 /* The head of the doubly-linked, NULL-terminated at each end, list of
1188 * arena_objects associated with arenas that have pools available.
1189 */
1190 static struct arena_object* usable_arenas = NULL;
1191
1192 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1193 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1194
1195 /* How many arena_objects do we initially allocate?
1196 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1197 * `arenas` vector.
1198 */
1199 #define INITIAL_ARENA_OBJECTS 16
1200
1201 /* Number of arenas allocated that haven't been free()'d. */
1202 static size_t narenas_currently_allocated = 0;
1203
1204 /* Total number of times malloc() called to allocate an arena. */
1205 static size_t ntimes_arena_allocated = 0;
1206 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1207 static size_t narenas_highwater = 0;
1208
1209 static Py_ssize_t _Py_AllocatedBlocks = 0;
1210
1211 Py_ssize_t
_Py_GetAllocatedBlocks(void)1212 _Py_GetAllocatedBlocks(void)
1213 {
1214 return _Py_AllocatedBlocks;
1215 }
1216
1217
1218 /* Allocate a new arena. If we run out of memory, return NULL. Else
1219 * allocate a new arena, and return the address of an arena_object
1220 * describing the new arena. It's expected that the caller will set
1221 * `usable_arenas` to the return value.
1222 */
1223 static struct arena_object*
new_arena(void)1224 new_arena(void)
1225 {
1226 struct arena_object* arenaobj;
1227 uint excess; /* number of bytes above pool alignment */
1228 void *address;
1229 static int debug_stats = -1;
1230
1231 if (debug_stats == -1) {
1232 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1233 debug_stats = (opt != NULL && *opt != '\0');
1234 }
1235 if (debug_stats)
1236 _PyObject_DebugMallocStats(stderr);
1237
1238 if (unused_arena_objects == NULL) {
1239 uint i;
1240 uint numarenas;
1241 size_t nbytes;
1242
1243 /* Double the number of arena objects on each allocation.
1244 * Note that it's possible for `numarenas` to overflow.
1245 */
1246 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1247 if (numarenas <= maxarenas)
1248 return NULL; /* overflow */
1249 #if SIZEOF_SIZE_T <= SIZEOF_INT
1250 if (numarenas > SIZE_MAX / sizeof(*arenas))
1251 return NULL; /* overflow */
1252 #endif
1253 nbytes = numarenas * sizeof(*arenas);
1254 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1255 if (arenaobj == NULL)
1256 return NULL;
1257 arenas = arenaobj;
1258
1259 /* We might need to fix pointers that were copied. However,
1260 * new_arena only gets called when all the pages in the
1261 * previous arenas are full. Thus, there are *no* pointers
1262 * into the old array. Thus, we don't have to worry about
1263 * invalid pointers. Just to be sure, some asserts:
1264 */
1265 assert(usable_arenas == NULL);
1266 assert(unused_arena_objects == NULL);
1267
1268 /* Put the new arenas on the unused_arena_objects list. */
1269 for (i = maxarenas; i < numarenas; ++i) {
1270 arenas[i].address = 0; /* mark as unassociated */
1271 arenas[i].nextarena = i < numarenas - 1 ?
1272 &arenas[i+1] : NULL;
1273 }
1274
1275 /* Update globals. */
1276 unused_arena_objects = &arenas[maxarenas];
1277 maxarenas = numarenas;
1278 }
1279
1280 /* Take the next available arena object off the head of the list. */
1281 assert(unused_arena_objects != NULL);
1282 arenaobj = unused_arena_objects;
1283 unused_arena_objects = arenaobj->nextarena;
1284 assert(arenaobj->address == 0);
1285 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1286 if (address == NULL) {
1287 /* The allocation failed: return NULL after putting the
1288 * arenaobj back.
1289 */
1290 arenaobj->nextarena = unused_arena_objects;
1291 unused_arena_objects = arenaobj;
1292 return NULL;
1293 }
1294 arenaobj->address = (uintptr_t)address;
1295
1296 ++narenas_currently_allocated;
1297 ++ntimes_arena_allocated;
1298 if (narenas_currently_allocated > narenas_highwater)
1299 narenas_highwater = narenas_currently_allocated;
1300 arenaobj->freepools = NULL;
1301 /* pool_address <- first pool-aligned address in the arena
1302 nfreepools <- number of whole pools that fit after alignment */
1303 arenaobj->pool_address = (block*)arenaobj->address;
1304 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1305 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1306 if (excess != 0) {
1307 --arenaobj->nfreepools;
1308 arenaobj->pool_address += POOL_SIZE - excess;
1309 }
1310 arenaobj->ntotalpools = arenaobj->nfreepools;
1311
1312 return arenaobj;
1313 }
1314
1315
1316 /*
1317 address_in_range(P, POOL)
1318
1319 Return true if and only if P is an address that was allocated by pymalloc.
1320 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1321 (the caller is asked to compute this because the macro expands POOL more than
1322 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1323 variable and pass the latter to the macro; because address_in_range is
1324 called on every alloc/realloc/free, micro-efficiency is important here).
1325
1326 Tricky: Let B be the arena base address associated with the pool, B =
1327 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1328
1329 B <= P < B + ARENA_SIZE
1330
1331 Subtracting B throughout, this is true iff
1332
1333 0 <= P-B < ARENA_SIZE
1334
1335 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1336
1337 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1338 before the first arena has been allocated. `arenas` is still NULL in that
1339 case. We're relying on that maxarenas is also 0 in that case, so that
1340 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1341 into a NULL arenas.
1342
1343 Details: given P and POOL, the arena_object corresponding to P is AO =
1344 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1345 stores, etc), POOL is the correct address of P's pool, AO.address is the
1346 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1347 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1348 (NULL)). Therefore address_in_range correctly reports that obmalloc
1349 controls P.
1350
1351 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1352 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1353 in this case -- it may even be uninitialized trash. If the trash arenaindex
1354 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1355 control P.
1356
1357 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1358 allocated arena, obmalloc controls all the memory in slice AO.address :
1359 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1360 so P doesn't lie in that slice, so the macro correctly reports that P is not
1361 controlled by obmalloc.
1362
1363 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1364 arena_object (one not currently associated with an allocated arena),
1365 AO.address is 0, and the second test in the macro reduces to:
1366
1367 P < ARENA_SIZE
1368
1369 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1370 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1371 of the test still passes, and the third clause (AO.address != 0) is necessary
1372 to get the correct result: AO.address is 0 in this case, so the macro
1373 correctly reports that P is not controlled by obmalloc (despite that P lies in
1374 slice AO.address : AO.address + ARENA_SIZE).
1375
1376 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1377 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1378 corresponded to a currently-allocated arena, so the "P is not controlled by
1379 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1380 was impossible.
1381
1382 Note that the logic is excruciating, and reading up possibly uninitialized
1383 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1384 creates problems for some memory debuggers. The overwhelming advantage is
1385 that this test determines whether an arbitrary address is controlled by
1386 obmalloc in a small constant time, independent of the number of arenas
1387 obmalloc controls. Since this test is needed at every entry point, it's
1388 extremely desirable that it be this fast.
1389 */
1390
1391 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS
1392 _Py_NO_SANITIZE_THREAD
1393 _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1394 address_in_range(void *p, poolp pool)
1395 {
1396 // Since address_in_range may be reading from memory which was not allocated
1397 // by Python, it is important that pool->arenaindex is read only once, as
1398 // another thread may be concurrently modifying the value without holding
1399 // the GIL. The following dance forces the compiler to read pool->arenaindex
1400 // only once.
1401 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1402 return arenaindex < maxarenas &&
1403 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1404 arenas[arenaindex].address != 0;
1405 }
1406
1407
1408 /*==========================================================================*/
1409
1410 /* pymalloc allocator
1411
1412 The basic blocks are ordered by decreasing execution frequency,
1413 which minimizes the number of jumps in the most common cases,
1414 improves branching prediction and instruction scheduling (small
1415 block allocations typically result in a couple of instructions).
1416 Unless the optimizer reorders everything, being too smart...
1417
1418 Return a pointer to newly allocated memory if pymalloc allocated memory.
1419
1420 Return NULL if pymalloc failed to allocate the memory block: on bigger
1421 requests, on error in the code below (as a last chance to serve the request)
1422 or when the max memory limit has been reached. */
1423 static void*
pymalloc_alloc(void * ctx,size_t nbytes)1424 pymalloc_alloc(void *ctx, size_t nbytes)
1425 {
1426 block *bp;
1427 poolp pool;
1428 poolp next;
1429 uint size;
1430
1431 #ifdef WITH_VALGRIND
1432 if (UNLIKELY(running_on_valgrind == -1)) {
1433 running_on_valgrind = RUNNING_ON_VALGRIND;
1434 }
1435 if (UNLIKELY(running_on_valgrind)) {
1436 return NULL;
1437 }
1438 #endif
1439
1440 if (nbytes == 0) {
1441 return NULL;
1442 }
1443 if (nbytes > SMALL_REQUEST_THRESHOLD) {
1444 return NULL;
1445 }
1446
1447 /*
1448 * Most frequent paths first
1449 */
1450 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1451 pool = usedpools[size + size];
1452 if (pool != pool->nextpool) {
1453 /*
1454 * There is a used pool for this size class.
1455 * Pick up the head block of its free list.
1456 */
1457 ++pool->ref.count;
1458 bp = pool->freeblock;
1459 assert(bp != NULL);
1460 if ((pool->freeblock = *(block **)bp) != NULL) {
1461 goto success;
1462 }
1463
1464 /*
1465 * Reached the end of the free list, try to extend it.
1466 */
1467 if (pool->nextoffset <= pool->maxnextoffset) {
1468 /* There is room for another block. */
1469 pool->freeblock = (block*)pool +
1470 pool->nextoffset;
1471 pool->nextoffset += INDEX2SIZE(size);
1472 *(block **)(pool->freeblock) = NULL;
1473 goto success;
1474 }
1475
1476 /* Pool is full, unlink from used pools. */
1477 next = pool->nextpool;
1478 pool = pool->prevpool;
1479 next->prevpool = pool;
1480 pool->nextpool = next;
1481 goto success;
1482 }
1483
1484 /* There isn't a pool of the right size class immediately
1485 * available: use a free pool.
1486 */
1487 if (usable_arenas == NULL) {
1488 /* No arena has a free pool: allocate a new arena. */
1489 #ifdef WITH_MEMORY_LIMITS
1490 if (narenas_currently_allocated >= MAX_ARENAS) {
1491 goto failed;
1492 }
1493 #endif
1494 usable_arenas = new_arena();
1495 if (usable_arenas == NULL) {
1496 goto failed;
1497 }
1498 usable_arenas->nextarena =
1499 usable_arenas->prevarena = NULL;
1500 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1501 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1502 }
1503 assert(usable_arenas->address != 0);
1504
1505 /* This arena already had the smallest nfreepools value, so decreasing
1506 * nfreepools doesn't change that, and we don't need to rearrange the
1507 * usable_arenas list. However, if the arena becomes wholly allocated,
1508 * we need to remove its arena_object from usable_arenas.
1509 */
1510 assert(usable_arenas->nfreepools > 0);
1511 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1512 /* It's the last of this size, so there won't be any. */
1513 nfp2lasta[usable_arenas->nfreepools] = NULL;
1514 }
1515 /* If any free pools will remain, it will be the new smallest. */
1516 if (usable_arenas->nfreepools > 1) {
1517 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1518 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1519 }
1520
1521 /* Try to get a cached free pool. */
1522 pool = usable_arenas->freepools;
1523 if (pool != NULL) {
1524 /* Unlink from cached pools. */
1525 usable_arenas->freepools = pool->nextpool;
1526 --usable_arenas->nfreepools;
1527 if (usable_arenas->nfreepools == 0) {
1528 /* Wholly allocated: remove. */
1529 assert(usable_arenas->freepools == NULL);
1530 assert(usable_arenas->nextarena == NULL ||
1531 usable_arenas->nextarena->prevarena ==
1532 usable_arenas);
1533 usable_arenas = usable_arenas->nextarena;
1534 if (usable_arenas != NULL) {
1535 usable_arenas->prevarena = NULL;
1536 assert(usable_arenas->address != 0);
1537 }
1538 }
1539 else {
1540 /* nfreepools > 0: it must be that freepools
1541 * isn't NULL, or that we haven't yet carved
1542 * off all the arena's pools for the first
1543 * time.
1544 */
1545 assert(usable_arenas->freepools != NULL ||
1546 usable_arenas->pool_address <=
1547 (block*)usable_arenas->address +
1548 ARENA_SIZE - POOL_SIZE);
1549 }
1550
1551 init_pool:
1552 /* Frontlink to used pools. */
1553 next = usedpools[size + size]; /* == prev */
1554 pool->nextpool = next;
1555 pool->prevpool = next;
1556 next->nextpool = pool;
1557 next->prevpool = pool;
1558 pool->ref.count = 1;
1559 if (pool->szidx == size) {
1560 /* Luckily, this pool last contained blocks
1561 * of the same size class, so its header
1562 * and free list are already initialized.
1563 */
1564 bp = pool->freeblock;
1565 assert(bp != NULL);
1566 pool->freeblock = *(block **)bp;
1567 goto success;
1568 }
1569 /*
1570 * Initialize the pool header, set up the free list to
1571 * contain just the second block, and return the first
1572 * block.
1573 */
1574 pool->szidx = size;
1575 size = INDEX2SIZE(size);
1576 bp = (block *)pool + POOL_OVERHEAD;
1577 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1578 pool->maxnextoffset = POOL_SIZE - size;
1579 pool->freeblock = bp + size;
1580 *(block **)(pool->freeblock) = NULL;
1581 goto success;
1582 }
1583
1584 /* Carve off a new pool. */
1585 assert(usable_arenas->nfreepools > 0);
1586 assert(usable_arenas->freepools == NULL);
1587 pool = (poolp)usable_arenas->pool_address;
1588 assert((block*)pool <= (block*)usable_arenas->address +
1589 ARENA_SIZE - POOL_SIZE);
1590 pool->arenaindex = (uint)(usable_arenas - arenas);
1591 assert(&arenas[pool->arenaindex] == usable_arenas);
1592 pool->szidx = DUMMY_SIZE_IDX;
1593 usable_arenas->pool_address += POOL_SIZE;
1594 --usable_arenas->nfreepools;
1595
1596 if (usable_arenas->nfreepools == 0) {
1597 assert(usable_arenas->nextarena == NULL ||
1598 usable_arenas->nextarena->prevarena ==
1599 usable_arenas);
1600 /* Unlink the arena: it is completely allocated. */
1601 usable_arenas = usable_arenas->nextarena;
1602 if (usable_arenas != NULL) {
1603 usable_arenas->prevarena = NULL;
1604 assert(usable_arenas->address != 0);
1605 }
1606 }
1607
1608 goto init_pool;
1609
1610 success:
1611 assert(bp != NULL);
1612 return (void *)bp;
1613
1614 failed:
1615 return NULL;
1616 }
1617
1618
1619 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1620 _PyObject_Malloc(void *ctx, size_t nbytes)
1621 {
1622 void* ptr = pymalloc_alloc(ctx, nbytes);
1623 if (ptr != NULL) {
1624 _Py_AllocatedBlocks++;
1625 return ptr;
1626 }
1627
1628 ptr = PyMem_RawMalloc(nbytes);
1629 if (ptr != NULL) {
1630 _Py_AllocatedBlocks++;
1631 }
1632 return ptr;
1633 }
1634
1635
1636 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1637 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1638 {
1639 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1640 size_t nbytes = nelem * elsize;
1641
1642 void *ptr = pymalloc_alloc(ctx, nbytes);
1643 if (ptr != NULL) {
1644 memset(ptr, 0, nbytes);
1645 _Py_AllocatedBlocks++;
1646 return ptr;
1647 }
1648
1649 ptr = PyMem_RawCalloc(nelem, elsize);
1650 if (ptr != NULL) {
1651 _Py_AllocatedBlocks++;
1652 }
1653 return ptr;
1654 }
1655
1656
1657 /* Free a memory block allocated by pymalloc_alloc().
1658 Return 1 if it was freed.
1659 Return 0 if the block was not allocated by pymalloc_alloc(). */
1660 static int
pymalloc_free(void * ctx,void * p)1661 pymalloc_free(void *ctx, void *p)
1662 {
1663 poolp pool;
1664 block *lastfree;
1665 poolp next, prev;
1666 uint size;
1667
1668 assert(p != NULL);
1669
1670 #ifdef WITH_VALGRIND
1671 if (UNLIKELY(running_on_valgrind > 0)) {
1672 return 0;
1673 }
1674 #endif
1675
1676 pool = POOL_ADDR(p);
1677 if (!address_in_range(p, pool)) {
1678 return 0;
1679 }
1680 /* We allocated this address. */
1681
1682 /* Link p to the start of the pool's freeblock list. Since
1683 * the pool had at least the p block outstanding, the pool
1684 * wasn't empty (so it's already in a usedpools[] list, or
1685 * was full and is in no list -- it's not in the freeblocks
1686 * list in any case).
1687 */
1688 assert(pool->ref.count > 0); /* else it was empty */
1689 *(block **)p = lastfree = pool->freeblock;
1690 pool->freeblock = (block *)p;
1691 if (!lastfree) {
1692 /* Pool was full, so doesn't currently live in any list:
1693 * link it to the front of the appropriate usedpools[] list.
1694 * This mimics LRU pool usage for new allocations and
1695 * targets optimal filling when several pools contain
1696 * blocks of the same size class.
1697 */
1698 --pool->ref.count;
1699 assert(pool->ref.count > 0); /* else the pool is empty */
1700 size = pool->szidx;
1701 next = usedpools[size + size];
1702 prev = next->prevpool;
1703
1704 /* insert pool before next: prev <-> pool <-> next */
1705 pool->nextpool = next;
1706 pool->prevpool = prev;
1707 next->prevpool = pool;
1708 prev->nextpool = pool;
1709 goto success;
1710 }
1711
1712 struct arena_object* ao;
1713 uint nf; /* ao->nfreepools */
1714
1715 /* freeblock wasn't NULL, so the pool wasn't full,
1716 * and the pool is in a usedpools[] list.
1717 */
1718 if (--pool->ref.count != 0) {
1719 /* pool isn't empty: leave it in usedpools */
1720 goto success;
1721 }
1722 /* Pool is now empty: unlink from usedpools, and
1723 * link to the front of freepools. This ensures that
1724 * previously freed pools will be allocated later
1725 * (being not referenced, they are perhaps paged out).
1726 */
1727 next = pool->nextpool;
1728 prev = pool->prevpool;
1729 next->prevpool = prev;
1730 prev->nextpool = next;
1731
1732 /* Link the pool to freepools. This is a singly-linked
1733 * list, and pool->prevpool isn't used there.
1734 */
1735 ao = &arenas[pool->arenaindex];
1736 pool->nextpool = ao->freepools;
1737 ao->freepools = pool;
1738 nf = ao->nfreepools;
1739 /* If this is the rightmost arena with this number of free pools,
1740 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
1741 * are no arenas in usable_arenas with that value.
1742 */
1743 struct arena_object* lastnf = nfp2lasta[nf];
1744 assert((nf == 0 && lastnf == NULL) ||
1745 (nf > 0 &&
1746 lastnf != NULL &&
1747 lastnf->nfreepools == nf &&
1748 (lastnf->nextarena == NULL ||
1749 nf < lastnf->nextarena->nfreepools)));
1750 if (lastnf == ao) { /* it is the rightmost */
1751 struct arena_object* p = ao->prevarena;
1752 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1753 }
1754 ao->nfreepools = ++nf;
1755
1756 /* All the rest is arena management. We just freed
1757 * a pool, and there are 4 cases for arena mgmt:
1758 * 1. If all the pools are free, return the arena to
1759 * the system free().
1760 * 2. If this is the only free pool in the arena,
1761 * add the arena back to the `usable_arenas` list.
1762 * 3. If the "next" arena has a smaller count of free
1763 * pools, we have to "slide this arena right" to
1764 * restore that usable_arenas is sorted in order of
1765 * nfreepools.
1766 * 4. Else there's nothing more to do.
1767 */
1768 if (nf == ao->ntotalpools) {
1769 /* Case 1. First unlink ao from usable_arenas.
1770 */
1771 assert(ao->prevarena == NULL ||
1772 ao->prevarena->address != 0);
1773 assert(ao ->nextarena == NULL ||
1774 ao->nextarena->address != 0);
1775
1776 /* Fix the pointer in the prevarena, or the
1777 * usable_arenas pointer.
1778 */
1779 if (ao->prevarena == NULL) {
1780 usable_arenas = ao->nextarena;
1781 assert(usable_arenas == NULL ||
1782 usable_arenas->address != 0);
1783 }
1784 else {
1785 assert(ao->prevarena->nextarena == ao);
1786 ao->prevarena->nextarena =
1787 ao->nextarena;
1788 }
1789 /* Fix the pointer in the nextarena. */
1790 if (ao->nextarena != NULL) {
1791 assert(ao->nextarena->prevarena == ao);
1792 ao->nextarena->prevarena =
1793 ao->prevarena;
1794 }
1795 /* Record that this arena_object slot is
1796 * available to be reused.
1797 */
1798 ao->nextarena = unused_arena_objects;
1799 unused_arena_objects = ao;
1800
1801 /* Free the entire arena. */
1802 _PyObject_Arena.free(_PyObject_Arena.ctx,
1803 (void *)ao->address, ARENA_SIZE);
1804 ao->address = 0; /* mark unassociated */
1805 --narenas_currently_allocated;
1806
1807 goto success;
1808 }
1809
1810 if (nf == 1) {
1811 /* Case 2. Put ao at the head of
1812 * usable_arenas. Note that because
1813 * ao->nfreepools was 0 before, ao isn't
1814 * currently on the usable_arenas list.
1815 */
1816 ao->nextarena = usable_arenas;
1817 ao->prevarena = NULL;
1818 if (usable_arenas)
1819 usable_arenas->prevarena = ao;
1820 usable_arenas = ao;
1821 assert(usable_arenas->address != 0);
1822 if (nfp2lasta[1] == NULL) {
1823 nfp2lasta[1] = ao;
1824 }
1825
1826 goto success;
1827 }
1828
1829 /* If this arena is now out of order, we need to keep
1830 * the list sorted. The list is kept sorted so that
1831 * the "most full" arenas are used first, which allows
1832 * the nearly empty arenas to be completely freed. In
1833 * a few un-scientific tests, it seems like this
1834 * approach allowed a lot more memory to be freed.
1835 */
1836 /* If this is the only arena with nf, record that. */
1837 if (nfp2lasta[nf] == NULL) {
1838 nfp2lasta[nf] = ao;
1839 } /* else the rightmost with nf doesn't change */
1840 /* If this was the rightmost of the old size, it remains in place. */
1841 if (ao == lastnf) {
1842 /* Case 4. Nothing to do. */
1843 goto success;
1844 }
1845 /* If ao were the only arena in the list, the last block would have
1846 * gotten us out.
1847 */
1848 assert(ao->nextarena != NULL);
1849
1850 /* Case 3: We have to move the arena towards the end of the list,
1851 * because it has more free pools than the arena to its right. It needs
1852 * to move to follow lastnf.
1853 * First unlink ao from usable_arenas.
1854 */
1855 if (ao->prevarena != NULL) {
1856 /* ao isn't at the head of the list */
1857 assert(ao->prevarena->nextarena == ao);
1858 ao->prevarena->nextarena = ao->nextarena;
1859 }
1860 else {
1861 /* ao is at the head of the list */
1862 assert(usable_arenas == ao);
1863 usable_arenas = ao->nextarena;
1864 }
1865 ao->nextarena->prevarena = ao->prevarena;
1866 /* And insert after lastnf. */
1867 ao->prevarena = lastnf;
1868 ao->nextarena = lastnf->nextarena;
1869 if (ao->nextarena != NULL) {
1870 ao->nextarena->prevarena = ao;
1871 }
1872 lastnf->nextarena = ao;
1873 /* Verify that the swaps worked. */
1874 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1875 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1876 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1877 assert((usable_arenas == ao && ao->prevarena == NULL)
1878 || ao->prevarena->nextarena == ao);
1879
1880 goto success;
1881
1882 success:
1883 return 1;
1884 }
1885
1886
1887 static void
_PyObject_Free(void * ctx,void * p)1888 _PyObject_Free(void *ctx, void *p)
1889 {
1890 /* PyObject_Free(NULL) has no effect */
1891 if (p == NULL) {
1892 return;
1893 }
1894
1895 _Py_AllocatedBlocks--;
1896 if (!pymalloc_free(ctx, p)) {
1897 /* pymalloc didn't allocate this address */
1898 PyMem_RawFree(p);
1899 }
1900 }
1901
1902
1903 /* pymalloc realloc.
1904
1905 If nbytes==0, then as the Python docs promise, we do not treat this like
1906 free(p), and return a non-NULL result.
1907
1908 Return 1 if pymalloc reallocated memory and wrote the new pointer into
1909 newptr_p.
1910
1911 Return 0 if pymalloc didn't allocated p. */
1912 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1913 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1914 {
1915 void *bp;
1916 poolp pool;
1917 size_t size;
1918
1919 assert(p != NULL);
1920
1921 #ifdef WITH_VALGRIND
1922 /* Treat running_on_valgrind == -1 the same as 0 */
1923 if (UNLIKELY(running_on_valgrind > 0)) {
1924 return 0;
1925 }
1926 #endif
1927
1928 pool = POOL_ADDR(p);
1929 if (!address_in_range(p, pool)) {
1930 /* pymalloc is not managing this block.
1931
1932 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1933 over this block. However, if we do, we need to copy the valid data
1934 from the C-managed block to one of our blocks, and there's no
1935 portable way to know how much of the memory space starting at p is
1936 valid.
1937
1938 As bug 1185883 pointed out the hard way, it's possible that the
1939 C-managed block is "at the end" of allocated VM space, so that a
1940 memory fault can occur if we try to copy nbytes bytes starting at p.
1941 Instead we punt: let C continue to manage this block. */
1942 return 0;
1943 }
1944
1945 /* pymalloc is in charge of this block */
1946 size = INDEX2SIZE(pool->szidx);
1947 if (nbytes <= size) {
1948 /* The block is staying the same or shrinking.
1949
1950 If it's shrinking, there's a tradeoff: it costs cycles to copy the
1951 block to a smaller size class, but it wastes memory not to copy it.
1952
1953 The compromise here is to copy on shrink only if at least 25% of
1954 size can be shaved off. */
1955 if (4 * nbytes > 3 * size) {
1956 /* It's the same, or shrinking and new/old > 3/4. */
1957 *newptr_p = p;
1958 return 1;
1959 }
1960 size = nbytes;
1961 }
1962
1963 bp = _PyObject_Malloc(ctx, nbytes);
1964 if (bp != NULL) {
1965 memcpy(bp, p, size);
1966 _PyObject_Free(ctx, p);
1967 }
1968 *newptr_p = bp;
1969 return 1;
1970 }
1971
1972
1973 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1974 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
1975 {
1976 void *ptr2;
1977
1978 if (ptr == NULL) {
1979 return _PyObject_Malloc(ctx, nbytes);
1980 }
1981
1982 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
1983 return ptr2;
1984 }
1985
1986 return PyMem_RawRealloc(ptr, nbytes);
1987 }
1988
1989 #else /* ! WITH_PYMALLOC */
1990
1991 /*==========================================================================*/
1992 /* pymalloc not enabled: Redirect the entry points to malloc. These will
1993 * only be used by extensions that are compiled with pymalloc enabled. */
1994
1995 Py_ssize_t
_Py_GetAllocatedBlocks(void)1996 _Py_GetAllocatedBlocks(void)
1997 {
1998 return 0;
1999 }
2000
2001 #endif /* WITH_PYMALLOC */
2002
2003
2004 /*==========================================================================*/
2005 /* A x-platform debugging allocator. This doesn't manage memory directly,
2006 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2007 */
2008
2009 /* Uncomment this define to add the "serialno" field */
2010 /* #define PYMEM_DEBUG_SERIALNO */
2011
2012 #ifdef PYMEM_DEBUG_SERIALNO
2013 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
2014
2015 /* serialno is always incremented via calling this routine. The point is
2016 * to supply a single place to set a breakpoint.
2017 */
2018 static void
bumpserialno(void)2019 bumpserialno(void)
2020 {
2021 ++serialno;
2022 }
2023 #endif
2024
2025 #define SST SIZEOF_SIZE_T
2026
2027 #ifdef PYMEM_DEBUG_SERIALNO
2028 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2029 #else
2030 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2031 #endif
2032
2033 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2034 static size_t
read_size_t(const void * p)2035 read_size_t(const void *p)
2036 {
2037 const uint8_t *q = (const uint8_t *)p;
2038 size_t result = *q++;
2039 int i;
2040
2041 for (i = SST; --i > 0; ++q)
2042 result = (result << 8) | *q;
2043 return result;
2044 }
2045
2046 /* Write n as a big-endian size_t, MSB at address p, LSB at
2047 * p + sizeof(size_t) - 1.
2048 */
2049 static void
write_size_t(void * p,size_t n)2050 write_size_t(void *p, size_t n)
2051 {
2052 uint8_t *q = (uint8_t *)p + SST - 1;
2053 int i;
2054
2055 for (i = SST; --i >= 0; --q) {
2056 *q = (uint8_t)(n & 0xff);
2057 n >>= 8;
2058 }
2059 }
2060
2061 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2062 fills them with useful stuff, here calling the underlying malloc's result p:
2063
2064 p[0: S]
2065 Number of bytes originally asked for. This is a size_t, big-endian (easier
2066 to read in a memory dump).
2067 p[S]
2068 API ID. See PEP 445. This is a character, but seems undocumented.
2069 p[S+1: 2*S]
2070 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2071 p[2*S: 2*S+n]
2072 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2073 Used to catch reference to uninitialized memory.
2074 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2075 handled the request itself.
2076 p[2*S+n: 2*S+n+S]
2077 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2078 p[2*S+n+S: 2*S+n+2*S]
2079 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2080 and _PyMem_DebugRealloc.
2081 This is a big-endian size_t.
2082 If "bad memory" is detected later, the serial number gives an
2083 excellent way to set a breakpoint on the next run, to capture the
2084 instant at which this block was passed out.
2085
2086 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2087 for 3 * S extra bytes, and omits the last serialno field.
2088 */
2089
2090 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2091 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2092 {
2093 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2094 uint8_t *p; /* base address of malloc'ed pad block */
2095 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2096 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2097 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2098
2099 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2100 /* integer overflow: can't represent total as a Py_ssize_t */
2101 return NULL;
2102 }
2103 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2104
2105 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2106 ^--- p ^--- data ^--- tail
2107 S: nbytes stored as size_t
2108 I: API identifier (1 byte)
2109 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2110 C: Clean bytes used later to store actual data
2111 N: Serial number stored as size_t
2112
2113 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2114 is omitted. */
2115
2116 if (use_calloc) {
2117 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2118 }
2119 else {
2120 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2121 }
2122 if (p == NULL) {
2123 return NULL;
2124 }
2125 data = p + 2*SST;
2126
2127 #ifdef PYMEM_DEBUG_SERIALNO
2128 bumpserialno();
2129 #endif
2130
2131 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2132 write_size_t(p, nbytes);
2133 p[SST] = (uint8_t)api->api_id;
2134 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2135
2136 if (nbytes > 0 && !use_calloc) {
2137 memset(data, PYMEM_CLEANBYTE, nbytes);
2138 }
2139
2140 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2141 tail = data + nbytes;
2142 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2143 #ifdef PYMEM_DEBUG_SERIALNO
2144 write_size_t(tail + SST, serialno);
2145 #endif
2146
2147 return data;
2148 }
2149
2150 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2151 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2152 {
2153 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2154 }
2155
2156 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2157 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2158 {
2159 size_t nbytes;
2160 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2161 nbytes = nelem * elsize;
2162 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2163 }
2164
2165
2166 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2167 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2168 Then fills the original bytes with PYMEM_DEADBYTE.
2169 Then calls the underlying free.
2170 */
2171 static void
_PyMem_DebugRawFree(void * ctx,void * p)2172 _PyMem_DebugRawFree(void *ctx, void *p)
2173 {
2174 /* PyMem_Free(NULL) has no effect */
2175 if (p == NULL) {
2176 return;
2177 }
2178
2179 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2180 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2181 size_t nbytes;
2182
2183 _PyMem_DebugCheckAddress(api->api_id, p);
2184 nbytes = read_size_t(q);
2185 nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2186 memset(q, PYMEM_DEADBYTE, nbytes);
2187 api->alloc.free(api->alloc.ctx, q);
2188 }
2189
2190
2191 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2192 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2193 {
2194 if (p == NULL) {
2195 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2196 }
2197
2198 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2199 uint8_t *head; /* base address of malloc'ed pad block */
2200 uint8_t *data; /* pointer to data bytes */
2201 uint8_t *r;
2202 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2203 size_t total; /* 2 * SST + nbytes + 2 * SST */
2204 size_t original_nbytes;
2205 #define ERASED_SIZE 64
2206 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2207
2208 _PyMem_DebugCheckAddress(api->api_id, p);
2209
2210 data = (uint8_t *)p;
2211 head = data - 2*SST;
2212 original_nbytes = read_size_t(head);
2213 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2214 /* integer overflow: can't represent total as a Py_ssize_t */
2215 return NULL;
2216 }
2217 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2218
2219 tail = data + original_nbytes;
2220 #ifdef PYMEM_DEBUG_SERIALNO
2221 size_t block_serialno = read_size_t(tail + SST);
2222 #endif
2223 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2224 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2225 */
2226 if (original_nbytes <= sizeof(save)) {
2227 memcpy(save, data, original_nbytes);
2228 memset(data - 2 * SST, PYMEM_DEADBYTE,
2229 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2230 }
2231 else {
2232 memcpy(save, data, ERASED_SIZE);
2233 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2234 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2235 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2236 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2237 }
2238
2239 /* Resize and add decorations. */
2240 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2241 if (r == NULL) {
2242 /* if realloc() failed: rewrite header and footer which have
2243 just been erased */
2244 nbytes = original_nbytes;
2245 }
2246 else {
2247 head = r;
2248 #ifdef PYMEM_DEBUG_SERIALNO
2249 bumpserialno();
2250 block_serialno = serialno;
2251 #endif
2252 }
2253 data = head + 2*SST;
2254
2255 write_size_t(head, nbytes);
2256 head[SST] = (uint8_t)api->api_id;
2257 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2258
2259 tail = data + nbytes;
2260 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2261 #ifdef PYMEM_DEBUG_SERIALNO
2262 write_size_t(tail + SST, block_serialno);
2263 #endif
2264
2265 /* Restore saved bytes. */
2266 if (original_nbytes <= sizeof(save)) {
2267 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2268 }
2269 else {
2270 size_t i = original_nbytes - ERASED_SIZE;
2271 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2272 if (nbytes > i) {
2273 memcpy(data + i, &save[ERASED_SIZE],
2274 Py_MIN(nbytes - i, ERASED_SIZE));
2275 }
2276 }
2277
2278 if (r == NULL) {
2279 return NULL;
2280 }
2281
2282 if (nbytes > original_nbytes) {
2283 /* growing: mark new extra memory clean */
2284 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2285 nbytes - original_nbytes);
2286 }
2287
2288 return data;
2289 }
2290
2291 static void
_PyMem_DebugCheckGIL(void)2292 _PyMem_DebugCheckGIL(void)
2293 {
2294 if (!PyGILState_Check())
2295 Py_FatalError("Python memory allocator called "
2296 "without holding the GIL");
2297 }
2298
2299 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2300 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2301 {
2302 _PyMem_DebugCheckGIL();
2303 return _PyMem_DebugRawMalloc(ctx, nbytes);
2304 }
2305
2306 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2307 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2308 {
2309 _PyMem_DebugCheckGIL();
2310 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2311 }
2312
2313
2314 static void
_PyMem_DebugFree(void * ctx,void * ptr)2315 _PyMem_DebugFree(void *ctx, void *ptr)
2316 {
2317 _PyMem_DebugCheckGIL();
2318 _PyMem_DebugRawFree(ctx, ptr);
2319 }
2320
2321
2322 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2323 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2324 {
2325 _PyMem_DebugCheckGIL();
2326 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2327 }
2328
2329 /* Check the forbidden bytes on both ends of the memory allocated for p.
2330 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2331 * and call Py_FatalError to kill the program.
2332 * The API id, is also checked.
2333 */
2334 static void
_PyMem_DebugCheckAddress(char api,const void * p)2335 _PyMem_DebugCheckAddress(char api, const void *p)
2336 {
2337 const uint8_t *q = (const uint8_t *)p;
2338 char msgbuf[64];
2339 const char *msg;
2340 size_t nbytes;
2341 const uint8_t *tail;
2342 int i;
2343 char id;
2344
2345 if (p == NULL) {
2346 msg = "didn't expect a NULL pointer";
2347 goto error;
2348 }
2349
2350 /* Check the API id */
2351 id = (char)q[-SST];
2352 if (id != api) {
2353 msg = msgbuf;
2354 snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2355 msgbuf[sizeof(msgbuf)-1] = 0;
2356 goto error;
2357 }
2358
2359 /* Check the stuff at the start of p first: if there's underwrite
2360 * corruption, the number-of-bytes field may be nuts, and checking
2361 * the tail could lead to a segfault then.
2362 */
2363 for (i = SST-1; i >= 1; --i) {
2364 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2365 msg = "bad leading pad byte";
2366 goto error;
2367 }
2368 }
2369
2370 nbytes = read_size_t(q - 2*SST);
2371 tail = q + nbytes;
2372 for (i = 0; i < SST; ++i) {
2373 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2374 msg = "bad trailing pad byte";
2375 goto error;
2376 }
2377 }
2378
2379 return;
2380
2381 error:
2382 _PyObject_DebugDumpAddress(p);
2383 Py_FatalError(msg);
2384 }
2385
2386 /* Display info to stderr about the memory block at p. */
2387 static void
_PyObject_DebugDumpAddress(const void * p)2388 _PyObject_DebugDumpAddress(const void *p)
2389 {
2390 const uint8_t *q = (const uint8_t *)p;
2391 const uint8_t *tail;
2392 size_t nbytes;
2393 int i;
2394 int ok;
2395 char id;
2396
2397 fprintf(stderr, "Debug memory block at address p=%p:", p);
2398 if (p == NULL) {
2399 fprintf(stderr, "\n");
2400 return;
2401 }
2402 id = (char)q[-SST];
2403 fprintf(stderr, " API '%c'\n", id);
2404
2405 nbytes = read_size_t(q - 2*SST);
2406 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
2407 "requested\n", nbytes);
2408
2409 /* In case this is nuts, check the leading pad bytes first. */
2410 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2411 ok = 1;
2412 for (i = 1; i <= SST-1; ++i) {
2413 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2414 ok = 0;
2415 break;
2416 }
2417 }
2418 if (ok)
2419 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2420 else {
2421 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2422 PYMEM_FORBIDDENBYTE);
2423 for (i = SST-1; i >= 1; --i) {
2424 const uint8_t byte = *(q-i);
2425 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2426 if (byte != PYMEM_FORBIDDENBYTE)
2427 fputs(" *** OUCH", stderr);
2428 fputc('\n', stderr);
2429 }
2430
2431 fputs(" Because memory is corrupted at the start, the "
2432 "count of bytes requested\n"
2433 " may be bogus, and checking the trailing pad "
2434 "bytes may segfault.\n", stderr);
2435 }
2436
2437 tail = q + nbytes;
2438 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
2439 ok = 1;
2440 for (i = 0; i < SST; ++i) {
2441 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2442 ok = 0;
2443 break;
2444 }
2445 }
2446 if (ok)
2447 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2448 else {
2449 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2450 PYMEM_FORBIDDENBYTE);
2451 for (i = 0; i < SST; ++i) {
2452 const uint8_t byte = tail[i];
2453 fprintf(stderr, " at tail+%d: 0x%02x",
2454 i, byte);
2455 if (byte != PYMEM_FORBIDDENBYTE)
2456 fputs(" *** OUCH", stderr);
2457 fputc('\n', stderr);
2458 }
2459 }
2460
2461 #ifdef PYMEM_DEBUG_SERIALNO
2462 size_t serial = read_size_t(tail + SST);
2463 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
2464 "u to debug malloc/realloc.\n", serial);
2465 #endif
2466
2467 if (nbytes > 0) {
2468 i = 0;
2469 fputs(" Data at p:", stderr);
2470 /* print up to 8 bytes at the start */
2471 while (q < tail && i < 8) {
2472 fprintf(stderr, " %02x", *q);
2473 ++i;
2474 ++q;
2475 }
2476 /* and up to 8 at the end */
2477 if (q < tail) {
2478 if (tail - q > 8) {
2479 fputs(" ...", stderr);
2480 q = tail - 8;
2481 }
2482 while (q < tail) {
2483 fprintf(stderr, " %02x", *q);
2484 ++q;
2485 }
2486 }
2487 fputc('\n', stderr);
2488 }
2489 fputc('\n', stderr);
2490
2491 fflush(stderr);
2492 _PyMem_DumpTraceback(fileno(stderr), p);
2493 }
2494
2495
2496 static size_t
printone(FILE * out,const char * msg,size_t value)2497 printone(FILE *out, const char* msg, size_t value)
2498 {
2499 int i, k;
2500 char buf[100];
2501 size_t origvalue = value;
2502
2503 fputs(msg, out);
2504 for (i = (int)strlen(msg); i < 35; ++i)
2505 fputc(' ', out);
2506 fputc('=', out);
2507
2508 /* Write the value with commas. */
2509 i = 22;
2510 buf[i--] = '\0';
2511 buf[i--] = '\n';
2512 k = 3;
2513 do {
2514 size_t nextvalue = value / 10;
2515 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2516 value = nextvalue;
2517 buf[i--] = (char)(digit + '0');
2518 --k;
2519 if (k == 0 && value && i >= 0) {
2520 k = 3;
2521 buf[i--] = ',';
2522 }
2523 } while (value && i >= 0);
2524
2525 while (i >= 0)
2526 buf[i--] = ' ';
2527 fputs(buf, out);
2528
2529 return origvalue;
2530 }
2531
2532 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2533 _PyDebugAllocatorStats(FILE *out,
2534 const char *block_name, int num_blocks, size_t sizeof_block)
2535 {
2536 char buf1[128];
2537 char buf2[128];
2538 PyOS_snprintf(buf1, sizeof(buf1),
2539 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2540 num_blocks, block_name, sizeof_block);
2541 PyOS_snprintf(buf2, sizeof(buf2),
2542 "%48s ", buf1);
2543 (void)printone(out, buf2, num_blocks * sizeof_block);
2544 }
2545
2546
2547 #ifdef WITH_PYMALLOC
2548
2549 #ifdef Py_DEBUG
2550 /* Is target in the list? The list is traversed via the nextpool pointers.
2551 * The list may be NULL-terminated, or circular. Return 1 if target is in
2552 * list, else 0.
2553 */
2554 static int
pool_is_in_list(const poolp target,poolp list)2555 pool_is_in_list(const poolp target, poolp list)
2556 {
2557 poolp origlist = list;
2558 assert(target != NULL);
2559 if (list == NULL)
2560 return 0;
2561 do {
2562 if (target == list)
2563 return 1;
2564 list = list->nextpool;
2565 } while (list != NULL && list != origlist);
2566 return 0;
2567 }
2568 #endif
2569
2570 /* Print summary info to "out" about the state of pymalloc's structures.
2571 * In Py_DEBUG mode, also perform some expensive internal consistency
2572 * checks.
2573 *
2574 * Return 0 if the memory debug hooks are not installed or no statistics was
2575 * written into out, return 1 otherwise.
2576 */
2577 int
_PyObject_DebugMallocStats(FILE * out)2578 _PyObject_DebugMallocStats(FILE *out)
2579 {
2580 if (!_PyMem_PymallocEnabled()) {
2581 return 0;
2582 }
2583
2584 uint i;
2585 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2586 /* # of pools, allocated blocks, and free blocks per class index */
2587 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2588 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2589 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2590 /* total # of allocated bytes in used and full pools */
2591 size_t allocated_bytes = 0;
2592 /* total # of available bytes in used pools */
2593 size_t available_bytes = 0;
2594 /* # of free pools + pools not yet carved out of current arena */
2595 uint numfreepools = 0;
2596 /* # of bytes for arena alignment padding */
2597 size_t arena_alignment = 0;
2598 /* # of bytes in used and full pools used for pool_headers */
2599 size_t pool_header_bytes = 0;
2600 /* # of bytes in used and full pools wasted due to quantization,
2601 * i.e. the necessarily leftover space at the ends of used and
2602 * full pools.
2603 */
2604 size_t quantization = 0;
2605 /* # of arenas actually allocated. */
2606 size_t narenas = 0;
2607 /* running total -- should equal narenas * ARENA_SIZE */
2608 size_t total;
2609 char buf[128];
2610
2611 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2612 SMALL_REQUEST_THRESHOLD, numclasses);
2613
2614 for (i = 0; i < numclasses; ++i)
2615 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2616
2617 /* Because full pools aren't linked to from anything, it's easiest
2618 * to march over all the arenas. If we're lucky, most of the memory
2619 * will be living in full pools -- would be a shame to miss them.
2620 */
2621 for (i = 0; i < maxarenas; ++i) {
2622 uint j;
2623 uintptr_t base = arenas[i].address;
2624
2625 /* Skip arenas which are not allocated. */
2626 if (arenas[i].address == (uintptr_t)NULL)
2627 continue;
2628 narenas += 1;
2629
2630 numfreepools += arenas[i].nfreepools;
2631
2632 /* round up to pool alignment */
2633 if (base & (uintptr_t)POOL_SIZE_MASK) {
2634 arena_alignment += POOL_SIZE;
2635 base &= ~(uintptr_t)POOL_SIZE_MASK;
2636 base += POOL_SIZE;
2637 }
2638
2639 /* visit every pool in the arena */
2640 assert(base <= (uintptr_t) arenas[i].pool_address);
2641 for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2642 ++j, base += POOL_SIZE) {
2643 poolp p = (poolp)base;
2644 const uint sz = p->szidx;
2645 uint freeblocks;
2646
2647 if (p->ref.count == 0) {
2648 /* currently unused */
2649 #ifdef Py_DEBUG
2650 assert(pool_is_in_list(p, arenas[i].freepools));
2651 #endif
2652 continue;
2653 }
2654 ++numpools[sz];
2655 numblocks[sz] += p->ref.count;
2656 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2657 numfreeblocks[sz] += freeblocks;
2658 #ifdef Py_DEBUG
2659 if (freeblocks > 0)
2660 assert(pool_is_in_list(p, usedpools[sz + sz]));
2661 #endif
2662 }
2663 }
2664 assert(narenas == narenas_currently_allocated);
2665
2666 fputc('\n', out);
2667 fputs("class size num pools blocks in use avail blocks\n"
2668 "----- ---- --------- ------------- ------------\n",
2669 out);
2670
2671 for (i = 0; i < numclasses; ++i) {
2672 size_t p = numpools[i];
2673 size_t b = numblocks[i];
2674 size_t f = numfreeblocks[i];
2675 uint size = INDEX2SIZE(i);
2676 if (p == 0) {
2677 assert(b == 0 && f == 0);
2678 continue;
2679 }
2680 fprintf(out, "%5u %6u "
2681 "%11" PY_FORMAT_SIZE_T "u "
2682 "%15" PY_FORMAT_SIZE_T "u "
2683 "%13" PY_FORMAT_SIZE_T "u\n",
2684 i, size, p, b, f);
2685 allocated_bytes += b * size;
2686 available_bytes += f * size;
2687 pool_header_bytes += p * POOL_OVERHEAD;
2688 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2689 }
2690 fputc('\n', out);
2691 #ifdef PYMEM_DEBUG_SERIALNO
2692 if (_PyMem_DebugEnabled()) {
2693 (void)printone(out, "# times object malloc called", serialno);
2694 }
2695 #endif
2696 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2697 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2698 (void)printone(out, "# arenas highwater mark", narenas_highwater);
2699 (void)printone(out, "# arenas allocated current", narenas);
2700
2701 PyOS_snprintf(buf, sizeof(buf),
2702 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2703 narenas, ARENA_SIZE);
2704 (void)printone(out, buf, narenas * ARENA_SIZE);
2705
2706 fputc('\n', out);
2707
2708 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2709 total += printone(out, "# bytes in available blocks", available_bytes);
2710
2711 PyOS_snprintf(buf, sizeof(buf),
2712 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2713 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2714
2715 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2716 total += printone(out, "# bytes lost to quantization", quantization);
2717 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2718 (void)printone(out, "Total", total);
2719 return 1;
2720 }
2721
2722 #endif /* #ifdef WITH_PYMALLOC */
2723