1 #include "Python.h" 2 #include "pycore_pymem.h" // _PyTraceMalloc_Config 3 4 #include <stdbool.h> 5 6 7 /* Defined in tracemalloc.c */ 8 extern void _PyMem_DumpTraceback(int fd, const void *ptr); 9 10 11 /* Python's malloc wrappers (see pymem.h) */ 12 13 #undef uint 14 #define uint unsigned int /* assuming >= 16 bits */ 15 16 /* Forward declaration */ 17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size); 18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); 19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); 20 static void _PyMem_DebugRawFree(void *ctx, void *ptr); 21 22 static void* _PyMem_DebugMalloc(void *ctx, size_t size); 23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); 24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); 25 static void _PyMem_DebugFree(void *ctx, void *p); 26 27 static void _PyObject_DebugDumpAddress(const void *p); 28 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p); 29 30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain); 31 32 #if defined(__has_feature) /* Clang */ 33 # if __has_feature(address_sanitizer) /* is ASAN enabled? */ 34 # define _Py_NO_SANITIZE_ADDRESS \ 35 __attribute__((no_sanitize("address"))) 36 # endif 37 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */ 38 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 39 # endif 40 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */ 41 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) 42 # endif 43 #elif defined(__GNUC__) 44 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */ 45 # define _Py_NO_SANITIZE_ADDRESS \ 46 __attribute__((no_sanitize_address)) 47 # endif 48 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro 49 // is provided only since GCC 7. 50 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 51 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 52 # endif 53 #endif 54 55 #ifndef _Py_NO_SANITIZE_ADDRESS 56 # define _Py_NO_SANITIZE_ADDRESS 57 #endif 58 #ifndef _Py_NO_SANITIZE_THREAD 59 # define _Py_NO_SANITIZE_THREAD 60 #endif 61 #ifndef _Py_NO_SANITIZE_MEMORY 62 # define _Py_NO_SANITIZE_MEMORY 63 #endif 64 65 #ifdef WITH_PYMALLOC 66 67 #ifdef MS_WINDOWS 68 # include <windows.h> 69 #elif defined(HAVE_MMAP) 70 # include <sys/mman.h> 71 # ifdef MAP_ANONYMOUS 72 # define ARENAS_USE_MMAP 73 # endif 74 #endif 75 76 /* Forward declaration */ 77 static void* _PyObject_Malloc(void *ctx, size_t size); 78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); 79 static void _PyObject_Free(void *ctx, void *p); 80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); 81 #endif 82 83 84 /* bpo-35053: Declare tracemalloc configuration here rather than 85 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic 86 library, whereas _Py_NewReference() requires it. */ 87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT; 88 89 90 static void * _PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size) 92 { 93 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL 94 for malloc(0), which would be treated as an error. Some platforms would 95 return a pointer with no memory behind it, which would break pymalloc. 96 To solve these problems, allocate an extra byte. */ 97 if (size == 0) 98 size = 1; 99 return malloc(size); 100 } 101 102 static void * _PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) 104 { 105 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL 106 for calloc(0, 0), which would be treated as an error. Some platforms 107 would return a pointer with no memory behind it, which would break 108 pymalloc. To solve these problems, allocate an extra byte. */ 109 if (nelem == 0 || elsize == 0) { 110 nelem = 1; 111 elsize = 1; 112 } 113 return calloc(nelem, elsize); 114 } 115 116 static void * _PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size) 118 { 119 if (size == 0) 120 size = 1; 121 return realloc(ptr, size); 122 } 123 124 static void _PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr) 126 { 127 free(ptr); 128 } 129 130 131 #ifdef MS_WINDOWS 132 static void * _PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size) 134 { 135 return VirtualAlloc(NULL, size, 136 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); 137 } 138 139 static void _PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) 141 { 142 VirtualFree(ptr, 0, MEM_RELEASE); 143 } 144 145 #elif defined(ARENAS_USE_MMAP) 146 static void * _PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size) 148 { 149 void *ptr; 150 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, 151 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 152 if (ptr == MAP_FAILED) 153 return NULL; 154 assert(ptr != NULL); 155 return ptr; 156 } 157 158 static void _PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) 160 { 161 munmap(ptr, size); 162 } 163 164 #else 165 static void * _PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size) 167 { 168 return malloc(size); 169 } 170 171 static void _PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size) 173 { 174 free(ptr); 175 } 176 #endif 177 178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree} 179 #ifdef WITH_PYMALLOC 180 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free} 181 #endif 182 183 #define PYRAW_ALLOC MALLOC_ALLOC 184 #ifdef WITH_PYMALLOC 185 # define PYOBJ_ALLOC PYMALLOC_ALLOC 186 #else 187 # define PYOBJ_ALLOC MALLOC_ALLOC 188 #endif 189 #define PYMEM_ALLOC PYOBJ_ALLOC 190 191 typedef struct { 192 /* We tag each block with an API ID in order to tag API violations */ 193 char api_id; 194 PyMemAllocatorEx alloc; 195 } debug_alloc_api_t; 196 static struct { 197 debug_alloc_api_t raw; 198 debug_alloc_api_t mem; 199 debug_alloc_api_t obj; 200 } _PyMem_Debug = { 201 {'r', PYRAW_ALLOC}, 202 {'m', PYMEM_ALLOC}, 203 {'o', PYOBJ_ALLOC} 204 }; 205 206 #define PYDBGRAW_ALLOC \ 207 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree} 208 #define PYDBGMEM_ALLOC \ 209 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 210 #define PYDBGOBJ_ALLOC \ 211 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 212 213 #ifdef Py_DEBUG 214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC; 215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC; 216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC; 217 #else 218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC; 219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC; 220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC; 221 #endif 222 223 224 static int pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug, 226 PyMemAllocatorEx *old_alloc) 227 { 228 if (old_alloc != NULL) { 229 PyMem_GetAllocator(domain, old_alloc); 230 } 231 232 233 PyMemAllocatorEx new_alloc; 234 switch(domain) 235 { 236 case PYMEM_DOMAIN_RAW: 237 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC; 238 break; 239 case PYMEM_DOMAIN_MEM: 240 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC; 241 break; 242 case PYMEM_DOMAIN_OBJ: 243 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC; 244 break; 245 default: 246 /* unknown domain */ 247 return -1; 248 } 249 PyMem_SetAllocator(domain, &new_alloc); 250 if (debug) { 251 _PyMem_SetupDebugHooksDomain(domain); 252 } 253 return 0; 254 } 255 256 257 int _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain, 259 PyMemAllocatorEx *old_alloc) 260 { 261 #ifdef Py_DEBUG 262 const int debug = 1; 263 #else 264 const int debug = 0; 265 #endif 266 return pymem_set_default_allocator(domain, debug, old_alloc); 267 } 268 269 270 int _PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator) 272 { 273 if (name == NULL || *name == '\0') { 274 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line 275 nameions): use default memory allocators */ 276 *allocator = PYMEM_ALLOCATOR_DEFAULT; 277 } 278 else if (strcmp(name, "default") == 0) { 279 *allocator = PYMEM_ALLOCATOR_DEFAULT; 280 } 281 else if (strcmp(name, "debug") == 0) { 282 *allocator = PYMEM_ALLOCATOR_DEBUG; 283 } 284 #ifdef WITH_PYMALLOC 285 else if (strcmp(name, "pymalloc") == 0) { 286 *allocator = PYMEM_ALLOCATOR_PYMALLOC; 287 } 288 else if (strcmp(name, "pymalloc_debug") == 0) { 289 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG; 290 } 291 #endif 292 else if (strcmp(name, "malloc") == 0) { 293 *allocator = PYMEM_ALLOCATOR_MALLOC; 294 } 295 else if (strcmp(name, "malloc_debug") == 0) { 296 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG; 297 } 298 else { 299 /* unknown allocator */ 300 return -1; 301 } 302 return 0; 303 } 304 305 306 int _PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator) 308 { 309 switch (allocator) { 310 case PYMEM_ALLOCATOR_NOT_SET: 311 /* do nothing */ 312 break; 313 314 case PYMEM_ALLOCATOR_DEFAULT: 315 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL); 316 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL); 317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL); 318 break; 319 320 case PYMEM_ALLOCATOR_DEBUG: 321 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL); 322 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL); 323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL); 324 break; 325 326 #ifdef WITH_PYMALLOC 327 case PYMEM_ALLOCATOR_PYMALLOC: 328 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG: 329 { 330 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 331 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 332 333 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 334 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc); 335 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc); 336 337 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) { 338 PyMem_SetupDebugHooks(); 339 } 340 break; 341 } 342 #endif 343 344 case PYMEM_ALLOCATOR_MALLOC: 345 case PYMEM_ALLOCATOR_MALLOC_DEBUG: 346 { 347 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 348 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 349 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc); 350 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc); 351 352 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) { 353 PyMem_SetupDebugHooks(); 354 } 355 break; 356 } 357 358 default: 359 /* unknown allocator */ 360 return -1; 361 } 362 return 0; 363 } 364 365 366 static int pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b) 368 { 369 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0); 370 } 371 372 373 const char* _PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void) 375 { 376 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 377 #ifdef WITH_PYMALLOC 378 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 379 #endif 380 381 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 382 pymemallocator_eq(&_PyMem, &malloc_alloc) && 383 pymemallocator_eq(&_PyObject, &malloc_alloc)) 384 { 385 return "malloc"; 386 } 387 #ifdef WITH_PYMALLOC 388 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 389 pymemallocator_eq(&_PyMem, &pymalloc) && 390 pymemallocator_eq(&_PyObject, &pymalloc)) 391 { 392 return "pymalloc"; 393 } 394 #endif 395 396 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC; 397 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC; 398 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC; 399 400 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) && 401 pymemallocator_eq(&_PyMem, &dbg_mem) && 402 pymemallocator_eq(&_PyObject, &dbg_obj)) 403 { 404 /* Debug hooks installed */ 405 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 406 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) && 407 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc)) 408 { 409 return "malloc_debug"; 410 } 411 #ifdef WITH_PYMALLOC 412 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 413 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) && 414 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc)) 415 { 416 return "pymalloc_debug"; 417 } 418 #endif 419 } 420 return NULL; 421 } 422 423 424 #undef MALLOC_ALLOC 425 #undef PYMALLOC_ALLOC 426 #undef PYRAW_ALLOC 427 #undef PYMEM_ALLOC 428 #undef PYOBJ_ALLOC 429 #undef PYDBGRAW_ALLOC 430 #undef PYDBGMEM_ALLOC 431 #undef PYDBGOBJ_ALLOC 432 433 434 static PyObjectArenaAllocator _PyObject_Arena = {NULL, 435 #ifdef MS_WINDOWS 436 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree 437 #elif defined(ARENAS_USE_MMAP) 438 _PyObject_ArenaMmap, _PyObject_ArenaMunmap 439 #else 440 _PyObject_ArenaMalloc, _PyObject_ArenaFree 441 #endif 442 }; 443 444 #ifdef WITH_PYMALLOC 445 static int _PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void) 447 { 448 return (_PyObject.malloc == _PyMem_DebugMalloc); 449 } 450 451 static int _PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void) 453 { 454 if (_PyMem_DebugEnabled()) { 455 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc); 456 } 457 else { 458 return (_PyObject.malloc == _PyObject_Malloc); 459 } 460 } 461 #endif 462 463 464 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain) 466 { 467 PyMemAllocatorEx alloc; 468 469 if (domain == PYMEM_DOMAIN_RAW) { 470 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) { 471 return; 472 } 473 474 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc); 475 alloc.ctx = &_PyMem_Debug.raw; 476 alloc.malloc = _PyMem_DebugRawMalloc; 477 alloc.calloc = _PyMem_DebugRawCalloc; 478 alloc.realloc = _PyMem_DebugRawRealloc; 479 alloc.free = _PyMem_DebugRawFree; 480 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); 481 } 482 else if (domain == PYMEM_DOMAIN_MEM) { 483 if (_PyMem.malloc == _PyMem_DebugMalloc) { 484 return; 485 } 486 487 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc); 488 alloc.ctx = &_PyMem_Debug.mem; 489 alloc.malloc = _PyMem_DebugMalloc; 490 alloc.calloc = _PyMem_DebugCalloc; 491 alloc.realloc = _PyMem_DebugRealloc; 492 alloc.free = _PyMem_DebugFree; 493 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); 494 } 495 else if (domain == PYMEM_DOMAIN_OBJ) { 496 if (_PyObject.malloc == _PyMem_DebugMalloc) { 497 return; 498 } 499 500 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc); 501 alloc.ctx = &_PyMem_Debug.obj; 502 alloc.malloc = _PyMem_DebugMalloc; 503 alloc.calloc = _PyMem_DebugCalloc; 504 alloc.realloc = _PyMem_DebugRealloc; 505 alloc.free = _PyMem_DebugFree; 506 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); 507 } 508 } 509 510 511 void PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void) 513 { 514 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW); 515 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM); 516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ); 517 } 518 519 void PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 521 { 522 switch(domain) 523 { 524 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; 525 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; 526 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; 527 default: 528 /* unknown domain: set all attributes to NULL */ 529 allocator->ctx = NULL; 530 allocator->malloc = NULL; 531 allocator->calloc = NULL; 532 allocator->realloc = NULL; 533 allocator->free = NULL; 534 } 535 } 536 537 void PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 539 { 540 switch(domain) 541 { 542 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; 543 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; 544 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; 545 /* ignore unknown domain */ 546 } 547 } 548 549 void PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) 551 { 552 *allocator = _PyObject_Arena; 553 } 554 555 void PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) 557 { 558 _PyObject_Arena = *allocator; 559 } 560 561 void * PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size) 563 { 564 /* 565 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. 566 * Most python internals blindly use a signed Py_ssize_t to track 567 * things without checking for overflows or negatives. 568 * As size_t is unsigned, checking for size < 0 is not required. 569 */ 570 if (size > (size_t)PY_SSIZE_T_MAX) 571 return NULL; 572 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); 573 } 574 575 void * PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize) 577 { 578 /* see PyMem_RawMalloc() */ 579 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 580 return NULL; 581 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); 582 } 583 584 void* PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size) 586 { 587 /* see PyMem_RawMalloc() */ 588 if (new_size > (size_t)PY_SSIZE_T_MAX) 589 return NULL; 590 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); 591 } 592 PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr) 594 { 595 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); 596 } 597 598 599 void * PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size) 601 { 602 /* see PyMem_RawMalloc() */ 603 if (size > (size_t)PY_SSIZE_T_MAX) 604 return NULL; 605 return _PyMem.malloc(_PyMem.ctx, size); 606 } 607 608 void * PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize) 610 { 611 /* see PyMem_RawMalloc() */ 612 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 613 return NULL; 614 return _PyMem.calloc(_PyMem.ctx, nelem, elsize); 615 } 616 617 void * PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size) 619 { 620 /* see PyMem_RawMalloc() */ 621 if (new_size > (size_t)PY_SSIZE_T_MAX) 622 return NULL; 623 return _PyMem.realloc(_PyMem.ctx, ptr, new_size); 624 } 625 626 void PyMem_Free(void * ptr)627 PyMem_Free(void *ptr) 628 { 629 _PyMem.free(_PyMem.ctx, ptr); 630 } 631 632 633 wchar_t* _PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str) 635 { 636 assert(str != NULL); 637 638 size_t len = wcslen(str); 639 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { 640 return NULL; 641 } 642 643 size_t size = (len + 1) * sizeof(wchar_t); 644 wchar_t *str2 = PyMem_RawMalloc(size); 645 if (str2 == NULL) { 646 return NULL; 647 } 648 649 memcpy(str2, str, size); 650 return str2; 651 } 652 653 char * _PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str) 655 { 656 assert(str != NULL); 657 size_t size = strlen(str) + 1; 658 char *copy = PyMem_RawMalloc(size); 659 if (copy == NULL) { 660 return NULL; 661 } 662 memcpy(copy, str, size); 663 return copy; 664 } 665 666 char * _PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str) 668 { 669 assert(str != NULL); 670 size_t size = strlen(str) + 1; 671 char *copy = PyMem_Malloc(size); 672 if (copy == NULL) { 673 return NULL; 674 } 675 memcpy(copy, str, size); 676 return copy; 677 } 678 679 void * PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size) 681 { 682 /* see PyMem_RawMalloc() */ 683 if (size > (size_t)PY_SSIZE_T_MAX) 684 return NULL; 685 return _PyObject.malloc(_PyObject.ctx, size); 686 } 687 688 void * PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize) 690 { 691 /* see PyMem_RawMalloc() */ 692 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 693 return NULL; 694 return _PyObject.calloc(_PyObject.ctx, nelem, elsize); 695 } 696 697 void * PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size) 699 { 700 /* see PyMem_RawMalloc() */ 701 if (new_size > (size_t)PY_SSIZE_T_MAX) 702 return NULL; 703 return _PyObject.realloc(_PyObject.ctx, ptr, new_size); 704 } 705 706 void PyObject_Free(void * ptr)707 PyObject_Free(void *ptr) 708 { 709 _PyObject.free(_PyObject.ctx, ptr); 710 } 711 712 713 /* If we're using GCC, use __builtin_expect() to reduce overhead of 714 the valgrind checks */ 715 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) 716 # define UNLIKELY(value) __builtin_expect((value), 0) 717 # define LIKELY(value) __builtin_expect((value), 1) 718 #else 719 # define UNLIKELY(value) (value) 720 # define LIKELY(value) (value) 721 #endif 722 723 #ifdef WITH_PYMALLOC 724 725 #ifdef WITH_VALGRIND 726 #include <valgrind/valgrind.h> 727 728 /* -1 indicates that we haven't checked that we're running on valgrind yet. */ 729 static int running_on_valgrind = -1; 730 #endif 731 732 733 /* An object allocator for Python. 734 735 Here is an introduction to the layers of the Python memory architecture, 736 showing where the object allocator is actually used (layer +2), It is 737 called for every object allocation and deallocation (PyObject_New/Del), 738 unless the object-specific allocators implement a proprietary allocation 739 scheme (ex.: ints use a simple free list). This is also the place where 740 the cyclic garbage collector operates selectively on container objects. 741 742 743 Object-specific allocators 744 _____ ______ ______ ________ 745 [ int ] [ dict ] [ list ] ... [ string ] Python core | 746 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> | 747 _______________________________ | | 748 [ Python's object allocator ] | | 749 +2 | ####### Object memory ####### | <------ Internal buffers ------> | 750 ______________________________________________________________ | 751 [ Python's raw memory allocator (PyMem_ API) ] | 752 +1 | <----- Python memory (under PyMem manager's control) ------> | | 753 __________________________________________________________________ 754 [ Underlying general-purpose allocator (ex: C library malloc) ] 755 0 | <------ Virtual memory allocated for the python process -------> | 756 757 ========================================================================= 758 _______________________________________________________________________ 759 [ OS-specific Virtual Memory Manager (VMM) ] 760 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | 761 __________________________________ __________________________________ 762 [ ] [ ] 763 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | 764 765 */ 766 /*==========================================================================*/ 767 768 /* A fast, special-purpose memory allocator for small blocks, to be used 769 on top of a general-purpose malloc -- heavily based on previous art. */ 770 771 /* Vladimir Marangozov -- August 2000 */ 772 773 /* 774 * "Memory management is where the rubber meets the road -- if we do the wrong 775 * thing at any level, the results will not be good. And if we don't make the 776 * levels work well together, we are in serious trouble." (1) 777 * 778 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, 779 * "Dynamic Storage Allocation: A Survey and Critical Review", 780 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. 781 */ 782 783 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ 784 785 /*==========================================================================*/ 786 787 /* 788 * Allocation strategy abstract: 789 * 790 * For small requests, the allocator sub-allocates <Big> blocks of memory. 791 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the 792 * system's allocator. 793 * 794 * Small requests are grouped in size classes spaced 8 bytes apart, due 795 * to the required valid alignment of the returned address. Requests of 796 * a particular size are serviced from memory pools of 4K (one VMM page). 797 * Pools are fragmented on demand and contain free lists of blocks of one 798 * particular size class. In other words, there is a fixed-size allocator 799 * for each size class. Free pools are shared by the different allocators 800 * thus minimizing the space reserved for a particular size class. 801 * 802 * This allocation strategy is a variant of what is known as "simple 803 * segregated storage based on array of free lists". The main drawback of 804 * simple segregated storage is that we might end up with lot of reserved 805 * memory for the different free lists, which degenerate in time. To avoid 806 * this, we partition each free list in pools and we share dynamically the 807 * reserved space between all free lists. This technique is quite efficient 808 * for memory intensive programs which allocate mainly small-sized blocks. 809 * 810 * For small requests we have the following table: 811 * 812 * Request in bytes Size of allocated block Size class idx 813 * ---------------------------------------------------------------- 814 * 1-8 8 0 815 * 9-16 16 1 816 * 17-24 24 2 817 * 25-32 32 3 818 * 33-40 40 4 819 * 41-48 48 5 820 * 49-56 56 6 821 * 57-64 64 7 822 * 65-72 72 8 823 * ... ... ... 824 * 497-504 504 62 825 * 505-512 512 63 826 * 827 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying 828 * allocator. 829 */ 830 831 /*==========================================================================*/ 832 833 /* 834 * -- Main tunable settings section -- 835 */ 836 837 /* 838 * Alignment of addresses returned to the user. 8-bytes alignment works 839 * on most current architectures (with 32-bit or 64-bit address buses). 840 * The alignment value is also used for grouping small requests in size 841 * classes spaced ALIGNMENT bytes apart. 842 * 843 * You shouldn't change this unless you know what you are doing. 844 */ 845 846 #if SIZEOF_VOID_P > 4 847 #define ALIGNMENT 16 /* must be 2^N */ 848 #define ALIGNMENT_SHIFT 4 849 #else 850 #define ALIGNMENT 8 /* must be 2^N */ 851 #define ALIGNMENT_SHIFT 3 852 #endif 853 854 /* Return the number of bytes in size class I, as a uint. */ 855 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) 856 857 /* 858 * Max size threshold below which malloc requests are considered to be 859 * small enough in order to use preallocated memory pools. You can tune 860 * this value according to your application behaviour and memory needs. 861 * 862 * Note: a size threshold of 512 guarantees that newly created dictionaries 863 * will be allocated from preallocated memory pools on 64-bit. 864 * 865 * The following invariants must hold: 866 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 867 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT 868 * 869 * Although not required, for better performance and space efficiency, 870 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. 871 */ 872 #define SMALL_REQUEST_THRESHOLD 512 873 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) 874 875 /* 876 * The system's VMM page size can be obtained on most unices with a 877 * getpagesize() call or deduced from various header files. To make 878 * things simpler, we assume that it is 4K, which is OK for most systems. 879 * It is probably better if this is the native page size, but it doesn't 880 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page 881 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation 882 * violation fault. 4K is apparently OK for all the platforms that python 883 * currently targets. 884 */ 885 #define SYSTEM_PAGE_SIZE (4 * 1024) 886 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) 887 888 /* 889 * Maximum amount of memory managed by the allocator for small requests. 890 */ 891 #ifdef WITH_MEMORY_LIMITS 892 #ifndef SMALL_MEMORY_LIMIT 893 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ 894 #endif 895 #endif 896 897 #if !defined(WITH_PYMALLOC_RADIX_TREE) 898 /* Use radix-tree to track arena memory regions, for address_in_range(). 899 * Enable by default since it allows larger pool sizes. Can be disabled 900 * using -DWITH_PYMALLOC_RADIX_TREE=0 */ 901 #define WITH_PYMALLOC_RADIX_TREE 1 902 #endif 903 904 #if SIZEOF_VOID_P > 4 905 /* on 64-bit platforms use larger pools and arenas if we can */ 906 #define USE_LARGE_ARENAS 907 #if WITH_PYMALLOC_RADIX_TREE 908 /* large pools only supported if radix-tree is enabled */ 909 #define USE_LARGE_POOLS 910 #endif 911 #endif 912 913 /* 914 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned 915 * on a page boundary. This is a reserved virtual address space for the 916 * current process (obtained through a malloc()/mmap() call). In no way this 917 * means that the memory arenas will be used entirely. A malloc(<Big>) is 918 * usually an address range reservation for <Big> bytes, unless all pages within 919 * this space are referenced subsequently. So malloc'ing big blocks and not 920 * using them does not mean "wasting memory". It's an addressable range 921 * wastage... 922 * 923 * Arenas are allocated with mmap() on systems supporting anonymous memory 924 * mappings to reduce heap fragmentation. 925 */ 926 #ifdef USE_LARGE_ARENAS 927 #define ARENA_BITS 20 /* 1 MiB */ 928 #else 929 #define ARENA_BITS 18 /* 256 KiB */ 930 #endif 931 #define ARENA_SIZE (1 << ARENA_BITS) 932 #define ARENA_SIZE_MASK (ARENA_SIZE - 1) 933 934 #ifdef WITH_MEMORY_LIMITS 935 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) 936 #endif 937 938 /* 939 * Size of the pools used for small blocks. Must be a power of 2. 940 */ 941 #ifdef USE_LARGE_POOLS 942 #define POOL_BITS 14 /* 16 KiB */ 943 #else 944 #define POOL_BITS 12 /* 4 KiB */ 945 #endif 946 #define POOL_SIZE (1 << POOL_BITS) 947 #define POOL_SIZE_MASK (POOL_SIZE - 1) 948 949 #if !WITH_PYMALLOC_RADIX_TREE 950 #if POOL_SIZE != SYSTEM_PAGE_SIZE 951 # error "pool size must be equal to system page size" 952 #endif 953 #endif 954 955 #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) 956 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE 957 # error "arena size not an exact multiple of pool size" 958 #endif 959 960 /* 961 * -- End of tunable settings section -- 962 */ 963 964 /*==========================================================================*/ 965 966 /* When you say memory, my mind reasons in terms of (pointers to) blocks */ 967 typedef uint8_t block; 968 969 /* Pool for small blocks. */ 970 struct pool_header { 971 union { block *_padding; 972 uint count; } ref; /* number of allocated blocks */ 973 block *freeblock; /* pool's free list head */ 974 struct pool_header *nextpool; /* next pool of this size class */ 975 struct pool_header *prevpool; /* previous pool "" */ 976 uint arenaindex; /* index into arenas of base adr */ 977 uint szidx; /* block size class index */ 978 uint nextoffset; /* bytes to virgin block */ 979 uint maxnextoffset; /* largest valid nextoffset */ 980 }; 981 982 typedef struct pool_header *poolp; 983 984 /* Record keeping for arenas. */ 985 struct arena_object { 986 /* The address of the arena, as returned by malloc. Note that 0 987 * will never be returned by a successful malloc, and is used 988 * here to mark an arena_object that doesn't correspond to an 989 * allocated arena. 990 */ 991 uintptr_t address; 992 993 /* Pool-aligned pointer to the next pool to be carved off. */ 994 block* pool_address; 995 996 /* The number of available pools in the arena: free pools + never- 997 * allocated pools. 998 */ 999 uint nfreepools; 1000 1001 /* The total number of pools in the arena, whether or not available. */ 1002 uint ntotalpools; 1003 1004 /* Singly-linked list of available pools. */ 1005 struct pool_header* freepools; 1006 1007 /* Whenever this arena_object is not associated with an allocated 1008 * arena, the nextarena member is used to link all unassociated 1009 * arena_objects in the singly-linked `unused_arena_objects` list. 1010 * The prevarena member is unused in this case. 1011 * 1012 * When this arena_object is associated with an allocated arena 1013 * with at least one available pool, both members are used in the 1014 * doubly-linked `usable_arenas` list, which is maintained in 1015 * increasing order of `nfreepools` values. 1016 * 1017 * Else this arena_object is associated with an allocated arena 1018 * all of whose pools are in use. `nextarena` and `prevarena` 1019 * are both meaningless in this case. 1020 */ 1021 struct arena_object* nextarena; 1022 struct arena_object* prevarena; 1023 }; 1024 1025 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) 1026 1027 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ 1028 1029 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ 1030 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) 1031 1032 /* Return total number of blocks in pool of size index I, as a uint. */ 1033 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) 1034 1035 /*==========================================================================*/ 1036 1037 /* 1038 * Pool table -- headed, circular, doubly-linked lists of partially used pools. 1039 1040 This is involved. For an index i, usedpools[i+i] is the header for a list of 1041 all partially used pools holding small blocks with "size class idx" i. So 1042 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size 1043 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. 1044 1045 Pools are carved off an arena's highwater mark (an arena_object's pool_address 1046 member) as needed. Once carved off, a pool is in one of three states forever 1047 after: 1048 1049 used == partially used, neither empty nor full 1050 At least one block in the pool is currently allocated, and at least one 1051 block in the pool is not currently allocated (note this implies a pool 1052 has room for at least two blocks). 1053 This is a pool's initial state, as a pool is created only when malloc 1054 needs space. 1055 The pool holds blocks of a fixed size, and is in the circular list headed 1056 at usedpools[i] (see above). It's linked to the other used pools of the 1057 same size class via the pool_header's nextpool and prevpool members. 1058 If all but one block is currently allocated, a malloc can cause a 1059 transition to the full state. If all but one block is not currently 1060 allocated, a free can cause a transition to the empty state. 1061 1062 full == all the pool's blocks are currently allocated 1063 On transition to full, a pool is unlinked from its usedpools[] list. 1064 It's not linked to from anything then anymore, and its nextpool and 1065 prevpool members are meaningless until it transitions back to used. 1066 A free of a block in a full pool puts the pool back in the used state. 1067 Then it's linked in at the front of the appropriate usedpools[] list, so 1068 that the next allocation for its size class will reuse the freed block. 1069 1070 empty == all the pool's blocks are currently available for allocation 1071 On transition to empty, a pool is unlinked from its usedpools[] list, 1072 and linked to the front of its arena_object's singly-linked freepools list, 1073 via its nextpool member. The prevpool member has no meaning in this case. 1074 Empty pools have no inherent size class: the next time a malloc finds 1075 an empty list in usedpools[], it takes the first pool off of freepools. 1076 If the size class needed happens to be the same as the size class the pool 1077 last had, some pool initialization can be skipped. 1078 1079 1080 Block Management 1081 1082 Blocks within pools are again carved out as needed. pool->freeblock points to 1083 the start of a singly-linked list of free blocks within the pool. When a 1084 block is freed, it's inserted at the front of its pool's freeblock list. Note 1085 that the available blocks in a pool are *not* linked all together when a pool 1086 is initialized. Instead only "the first two" (lowest addresses) blocks are 1087 set up, returning the first such block, and setting pool->freeblock to a 1088 one-block list holding the second such block. This is consistent with that 1089 pymalloc strives at all levels (arena, pool, and block) never to touch a piece 1090 of memory until it's actually needed. 1091 1092 So long as a pool is in the used state, we're certain there *is* a block 1093 available for allocating, and pool->freeblock is not NULL. If pool->freeblock 1094 points to the end of the free list before we've carved the entire pool into 1095 blocks, that means we simply haven't yet gotten to one of the higher-address 1096 blocks. The offset from the pool_header to the start of "the next" virgin 1097 block is stored in the pool_header nextoffset member, and the largest value 1098 of nextoffset that makes sense is stored in the maxnextoffset member when a 1099 pool is initialized. All the blocks in a pool have been passed out at least 1100 once when and only when nextoffset > maxnextoffset. 1101 1102 1103 Major obscurity: While the usedpools vector is declared to have poolp 1104 entries, it doesn't really. It really contains two pointers per (conceptual) 1105 poolp entry, the nextpool and prevpool members of a pool_header. The 1106 excruciating initialization code below fools C so that 1107 1108 usedpool[i+i] 1109 1110 "acts like" a genuine poolp, but only so long as you only reference its 1111 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is 1112 compensating for that a pool_header's nextpool and prevpool members 1113 immediately follow a pool_header's first two members: 1114 1115 union { block *_padding; 1116 uint count; } ref; 1117 block *freeblock; 1118 1119 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really 1120 contains is a fudged-up pointer p such that *if* C believes it's a poolp 1121 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed 1122 circular list is empty). 1123 1124 It's unclear why the usedpools setup is so convoluted. It could be to 1125 minimize the amount of cache required to hold this heavily-referenced table 1126 (which only *needs* the two interpool pointer members of a pool_header). OTOH, 1127 referencing code has to remember to "double the index" and doing so isn't 1128 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying 1129 on that C doesn't insert any padding anywhere in a pool_header at or before 1130 the prevpool member. 1131 **************************************************************************** */ 1132 1133 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) 1134 #define PT(x) PTA(x), PTA(x) 1135 1136 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { 1137 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) 1138 #if NB_SMALL_SIZE_CLASSES > 8 1139 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) 1140 #if NB_SMALL_SIZE_CLASSES > 16 1141 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) 1142 #if NB_SMALL_SIZE_CLASSES > 24 1143 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) 1144 #if NB_SMALL_SIZE_CLASSES > 32 1145 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) 1146 #if NB_SMALL_SIZE_CLASSES > 40 1147 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) 1148 #if NB_SMALL_SIZE_CLASSES > 48 1149 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) 1150 #if NB_SMALL_SIZE_CLASSES > 56 1151 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) 1152 #if NB_SMALL_SIZE_CLASSES > 64 1153 #error "NB_SMALL_SIZE_CLASSES should be less than 64" 1154 #endif /* NB_SMALL_SIZE_CLASSES > 64 */ 1155 #endif /* NB_SMALL_SIZE_CLASSES > 56 */ 1156 #endif /* NB_SMALL_SIZE_CLASSES > 48 */ 1157 #endif /* NB_SMALL_SIZE_CLASSES > 40 */ 1158 #endif /* NB_SMALL_SIZE_CLASSES > 32 */ 1159 #endif /* NB_SMALL_SIZE_CLASSES > 24 */ 1160 #endif /* NB_SMALL_SIZE_CLASSES > 16 */ 1161 #endif /* NB_SMALL_SIZE_CLASSES > 8 */ 1162 }; 1163 1164 /*========================================================================== 1165 Arena management. 1166 1167 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of 1168 which may not be currently used (== they're arena_objects that aren't 1169 currently associated with an allocated arena). Note that arenas proper are 1170 separately malloc'ed. 1171 1172 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, 1173 we do try to free() arenas, and use some mild heuristic strategies to increase 1174 the likelihood that arenas eventually can be freed. 1175 1176 unused_arena_objects 1177 1178 This is a singly-linked list of the arena_objects that are currently not 1179 being used (no arena is associated with them). Objects are taken off the 1180 head of the list in new_arena(), and are pushed on the head of the list in 1181 PyObject_Free() when the arena is empty. Key invariant: an arena_object 1182 is on this list if and only if its .address member is 0. 1183 1184 usable_arenas 1185 1186 This is a doubly-linked list of the arena_objects associated with arenas 1187 that have pools available. These pools are either waiting to be reused, 1188 or have not been used before. The list is sorted to have the most- 1189 allocated arenas first (ascending order based on the nfreepools member). 1190 This means that the next allocation will come from a heavily used arena, 1191 which gives the nearly empty arenas a chance to be returned to the system. 1192 In my unscientific tests this dramatically improved the number of arenas 1193 that could be freed. 1194 1195 Note that an arena_object associated with an arena all of whose pools are 1196 currently in use isn't on either list. 1197 1198 Changed in Python 3.8: keeping usable_arenas sorted by number of free pools 1199 used to be done by one-at-a-time linear search when an arena's number of 1200 free pools changed. That could, overall, consume time quadratic in the 1201 number of arenas. That didn't really matter when there were only a few 1202 hundred arenas (typical!), but could be a timing disaster when there were 1203 hundreds of thousands. See bpo-37029. 1204 1205 Now we have a vector of "search fingers" to eliminate the need to search: 1206 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas 1207 with nfp free pools. This is NULL if and only if there is no arena with 1208 nfp free pools in usable_arenas. 1209 */ 1210 1211 /* Array of objects used to track chunks of memory (arenas). */ 1212 static struct arena_object* arenas = NULL; 1213 /* Number of slots currently allocated in the `arenas` vector. */ 1214 static uint maxarenas = 0; 1215 1216 /* The head of the singly-linked, NULL-terminated list of available 1217 * arena_objects. 1218 */ 1219 static struct arena_object* unused_arena_objects = NULL; 1220 1221 /* The head of the doubly-linked, NULL-terminated at each end, list of 1222 * arena_objects associated with arenas that have pools available. 1223 */ 1224 static struct arena_object* usable_arenas = NULL; 1225 1226 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */ 1227 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL }; 1228 1229 /* How many arena_objects do we initially allocate? 1230 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the 1231 * `arenas` vector. 1232 */ 1233 #define INITIAL_ARENA_OBJECTS 16 1234 1235 /* Number of arenas allocated that haven't been free()'d. */ 1236 static size_t narenas_currently_allocated = 0; 1237 1238 /* Total number of times malloc() called to allocate an arena. */ 1239 static size_t ntimes_arena_allocated = 0; 1240 /* High water mark (max value ever seen) for narenas_currently_allocated. */ 1241 static size_t narenas_highwater = 0; 1242 1243 static Py_ssize_t raw_allocated_blocks; 1244 1245 Py_ssize_t _Py_GetAllocatedBlocks(void)1246 _Py_GetAllocatedBlocks(void) 1247 { 1248 Py_ssize_t n = raw_allocated_blocks; 1249 /* add up allocated blocks for used pools */ 1250 for (uint i = 0; i < maxarenas; ++i) { 1251 /* Skip arenas which are not allocated. */ 1252 if (arenas[i].address == 0) { 1253 continue; 1254 } 1255 1256 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE); 1257 1258 /* visit every pool in the arena */ 1259 assert(base <= (uintptr_t) arenas[i].pool_address); 1260 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) { 1261 poolp p = (poolp)base; 1262 n += p->ref.count; 1263 } 1264 } 1265 return n; 1266 } 1267 1268 #if WITH_PYMALLOC_RADIX_TREE 1269 /*==========================================================================*/ 1270 /* radix tree for tracking arena usage 1271 1272 bit allocation for keys 1273 1274 64-bit pointers and 2^20 arena size: 1275 16 -> ignored (POINTER_BITS - ADDRESS_BITS) 1276 10 -> MAP_TOP 1277 10 -> MAP_MID 1278 8 -> MAP_BOT 1279 20 -> ideal aligned arena 1280 ---- 1281 64 1282 1283 32-bit pointers and 2^18 arena size: 1284 14 -> MAP_BOT 1285 18 -> ideal aligned arena 1286 ---- 1287 32 1288 1289 */ 1290 1291 #if SIZEOF_VOID_P == 8 1292 1293 /* number of bits in a pointer */ 1294 #define POINTER_BITS 64 1295 1296 /* Current 64-bit processors are limited to 48-bit physical addresses. For 1297 * now, the top 17 bits of addresses will all be equal to bit 2**47. If that 1298 * changes in the future, this must be adjusted upwards. 1299 */ 1300 #define ADDRESS_BITS 48 1301 1302 /* use the top and mid layers of the radix tree */ 1303 #define USE_INTERIOR_NODES 1304 1305 #elif SIZEOF_VOID_P == 4 1306 1307 #define POINTER_BITS 32 1308 #define ADDRESS_BITS 32 1309 1310 #else 1311 1312 /* Currently this code works for 64-bit or 32-bit pointers only. */ 1313 #error "obmalloc radix tree requires 64-bit or 32-bit pointers." 1314 1315 #endif /* SIZEOF_VOID_P */ 1316 1317 /* arena_coverage_t members require this to be true */ 1318 #if ARENA_BITS >= 32 1319 # error "arena size must be < 2^32" 1320 #endif 1321 1322 #ifdef USE_INTERIOR_NODES 1323 /* number of bits used for MAP_TOP and MAP_MID nodes */ 1324 #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3) 1325 #else 1326 #define INTERIOR_BITS 0 1327 #endif 1328 1329 #define MAP_TOP_BITS INTERIOR_BITS 1330 #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS) 1331 #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1) 1332 1333 #define MAP_MID_BITS INTERIOR_BITS 1334 #define MAP_MID_LENGTH (1 << MAP_MID_BITS) 1335 #define MAP_MID_MASK (MAP_MID_LENGTH - 1) 1336 1337 #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS) 1338 #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS) 1339 #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1) 1340 1341 #define MAP_BOT_SHIFT ARENA_BITS 1342 #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT) 1343 #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT) 1344 1345 #define AS_UINT(p) ((uintptr_t)(p)) 1346 #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK) 1347 #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK) 1348 #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK) 1349 1350 #if ADDRESS_BITS > POINTER_BITS 1351 /* Return non-physical address bits of a pointer. Those bits should be same 1352 * for all valid pointers if ADDRESS_BITS set correctly. Linux has support for 1353 * 57-bit address space (Intel 5-level paging) but will not currently give 1354 * those addresses to user space. 1355 */ 1356 #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS) 1357 #else 1358 #define HIGH_BITS(p) 0 1359 #endif 1360 1361 1362 /* This is the leaf of the radix tree. See arena_map_mark_used() for the 1363 * meaning of these members. */ 1364 typedef struct { 1365 int32_t tail_hi; 1366 int32_t tail_lo; 1367 } arena_coverage_t; 1368 1369 typedef struct arena_map_bot { 1370 /* The members tail_hi and tail_lo are accessed together. So, it 1371 * better to have them as an array of structs, rather than two 1372 * arrays. 1373 */ 1374 arena_coverage_t arenas[MAP_BOT_LENGTH]; 1375 } arena_map_bot_t; 1376 1377 #ifdef USE_INTERIOR_NODES 1378 typedef struct arena_map_mid { 1379 struct arena_map_bot *ptrs[MAP_MID_LENGTH]; 1380 } arena_map_mid_t; 1381 1382 typedef struct arena_map_top { 1383 struct arena_map_mid *ptrs[MAP_TOP_LENGTH]; 1384 } arena_map_top_t; 1385 #endif 1386 1387 /* The root of radix tree. Note that by initializing like this, the memory 1388 * should be in the BSS. The OS will only memory map pages as the MAP_MID 1389 * nodes get used (OS pages are demand loaded as needed). 1390 */ 1391 #ifdef USE_INTERIOR_NODES 1392 static arena_map_top_t arena_map_root; 1393 /* accounting for number of used interior nodes */ 1394 static int arena_map_mid_count; 1395 static int arena_map_bot_count; 1396 #else 1397 static arena_map_bot_t arena_map_root; 1398 #endif 1399 1400 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or 1401 * it cannot be created */ 1402 static arena_map_bot_t * arena_map_get(block * p,int create)1403 arena_map_get(block *p, int create) 1404 { 1405 #ifdef USE_INTERIOR_NODES 1406 /* sanity check that ADDRESS_BITS is correct */ 1407 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); 1408 int i1 = MAP_TOP_INDEX(p); 1409 if (arena_map_root.ptrs[i1] == NULL) { 1410 if (!create) { 1411 return NULL; 1412 } 1413 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t)); 1414 if (n == NULL) { 1415 return NULL; 1416 } 1417 arena_map_root.ptrs[i1] = n; 1418 arena_map_mid_count++; 1419 } 1420 int i2 = MAP_MID_INDEX(p); 1421 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { 1422 if (!create) { 1423 return NULL; 1424 } 1425 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t)); 1426 if (n == NULL) { 1427 return NULL; 1428 } 1429 arena_map_root.ptrs[i1]->ptrs[i2] = n; 1430 arena_map_bot_count++; 1431 } 1432 return arena_map_root.ptrs[i1]->ptrs[i2]; 1433 #else 1434 return &arena_map_root; 1435 #endif 1436 } 1437 1438 1439 /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw 1440 * away 24 bits of the address. That reduces the space requirement of 1441 * the tree compared to similar radix tree page-map schemes. In 1442 * exchange for slashing the space requirement, it needs more 1443 * computation to check an address. 1444 * 1445 * Tracking coverage is done by "ideal" arena address. It is easier to 1446 * explain in decimal so let's say that the arena size is 100 bytes. 1447 * Then, ideal addresses are 100, 200, 300, etc. For checking if a 1448 * pointer address is inside an actual arena, we have to check two ideal 1449 * arena addresses. E.g. if pointer is 357, we need to check 200 and 1450 * 300. In the rare case that an arena is aligned in the ideal way 1451 * (e.g. base address of arena is 200) then we only have to check one 1452 * ideal address. 1453 * 1454 * The tree nodes for 200 and 300 both store the address of arena. 1455 * There are two cases: the arena starts at a lower ideal arena and 1456 * extends to this one, or the arena starts in this arena and extends to 1457 * the next ideal arena. The tail_lo and tail_hi members correspond to 1458 * these two cases. 1459 */ 1460 1461 1462 /* mark or unmark addresses covered by arena */ 1463 static int arena_map_mark_used(uintptr_t arena_base,int is_used)1464 arena_map_mark_used(uintptr_t arena_base, int is_used) 1465 { 1466 /* sanity check that ADDRESS_BITS is correct */ 1467 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); 1468 arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used); 1469 if (n_hi == NULL) { 1470 assert(is_used); /* otherwise node should already exist */ 1471 return 0; /* failed to allocate space for node */ 1472 } 1473 int i3 = MAP_BOT_INDEX((block *)arena_base); 1474 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK); 1475 if (tail == 0) { 1476 /* is ideal arena address */ 1477 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; 1478 } 1479 else { 1480 /* arena_base address is not ideal (aligned to arena size) and 1481 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node 1482 * for the next arena. Note that it might be in different MAP_TOP 1483 * and MAP_MID nodes as well so we need to call arena_map_get() 1484 * again (do the full tree traversal). 1485 */ 1486 n_hi->arenas[i3].tail_hi = is_used ? tail : 0; 1487 uintptr_t arena_base_next = arena_base + ARENA_SIZE; 1488 /* If arena_base is a legit arena address, so is arena_base_next - 1 1489 * (last address in arena). If arena_base_next overflows then it 1490 * must overflow to 0. However, that would mean arena_base was 1491 * "ideal" and we should not be in this case. */ 1492 assert(arena_base < arena_base_next); 1493 arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used); 1494 if (n_lo == NULL) { 1495 assert(is_used); /* otherwise should already exist */ 1496 n_hi->arenas[i3].tail_hi = 0; 1497 return 0; /* failed to allocate space for node */ 1498 } 1499 int i3_next = MAP_BOT_INDEX(arena_base_next); 1500 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; 1501 } 1502 return 1; 1503 } 1504 1505 /* Return true if 'p' is a pointer inside an obmalloc arena. 1506 * _PyObject_Free() calls this so it needs to be very fast. */ 1507 static int arena_map_is_used(block * p)1508 arena_map_is_used(block *p) 1509 { 1510 arena_map_bot_t *n = arena_map_get(p, 0); 1511 if (n == NULL) { 1512 return 0; 1513 } 1514 int i3 = MAP_BOT_INDEX(p); 1515 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ 1516 int32_t hi = n->arenas[i3].tail_hi; 1517 int32_t lo = n->arenas[i3].tail_lo; 1518 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK); 1519 return (tail < lo) || (tail >= hi && hi != 0); 1520 } 1521 1522 /* end of radix tree logic */ 1523 /*==========================================================================*/ 1524 #endif /* WITH_PYMALLOC_RADIX_TREE */ 1525 1526 1527 /* Allocate a new arena. If we run out of memory, return NULL. Else 1528 * allocate a new arena, and return the address of an arena_object 1529 * describing the new arena. It's expected that the caller will set 1530 * `usable_arenas` to the return value. 1531 */ 1532 static struct arena_object* new_arena(void)1533 new_arena(void) 1534 { 1535 struct arena_object* arenaobj; 1536 uint excess; /* number of bytes above pool alignment */ 1537 void *address; 1538 static int debug_stats = -1; 1539 1540 if (debug_stats == -1) { 1541 const char *opt = Py_GETENV("PYTHONMALLOCSTATS"); 1542 debug_stats = (opt != NULL && *opt != '\0'); 1543 } 1544 if (debug_stats) 1545 _PyObject_DebugMallocStats(stderr); 1546 1547 if (unused_arena_objects == NULL) { 1548 uint i; 1549 uint numarenas; 1550 size_t nbytes; 1551 1552 /* Double the number of arena objects on each allocation. 1553 * Note that it's possible for `numarenas` to overflow. 1554 */ 1555 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; 1556 if (numarenas <= maxarenas) 1557 return NULL; /* overflow */ 1558 #if SIZEOF_SIZE_T <= SIZEOF_INT 1559 if (numarenas > SIZE_MAX / sizeof(*arenas)) 1560 return NULL; /* overflow */ 1561 #endif 1562 nbytes = numarenas * sizeof(*arenas); 1563 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); 1564 if (arenaobj == NULL) 1565 return NULL; 1566 arenas = arenaobj; 1567 1568 /* We might need to fix pointers that were copied. However, 1569 * new_arena only gets called when all the pages in the 1570 * previous arenas are full. Thus, there are *no* pointers 1571 * into the old array. Thus, we don't have to worry about 1572 * invalid pointers. Just to be sure, some asserts: 1573 */ 1574 assert(usable_arenas == NULL); 1575 assert(unused_arena_objects == NULL); 1576 1577 /* Put the new arenas on the unused_arena_objects list. */ 1578 for (i = maxarenas; i < numarenas; ++i) { 1579 arenas[i].address = 0; /* mark as unassociated */ 1580 arenas[i].nextarena = i < numarenas - 1 ? 1581 &arenas[i+1] : NULL; 1582 } 1583 1584 /* Update globals. */ 1585 unused_arena_objects = &arenas[maxarenas]; 1586 maxarenas = numarenas; 1587 } 1588 1589 /* Take the next available arena object off the head of the list. */ 1590 assert(unused_arena_objects != NULL); 1591 arenaobj = unused_arena_objects; 1592 unused_arena_objects = arenaobj->nextarena; 1593 assert(arenaobj->address == 0); 1594 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); 1595 #if WITH_PYMALLOC_RADIX_TREE 1596 if (address != NULL) { 1597 if (!arena_map_mark_used((uintptr_t)address, 1)) { 1598 /* marking arena in radix tree failed, abort */ 1599 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE); 1600 address = NULL; 1601 } 1602 } 1603 #endif 1604 if (address == NULL) { 1605 /* The allocation failed: return NULL after putting the 1606 * arenaobj back. 1607 */ 1608 arenaobj->nextarena = unused_arena_objects; 1609 unused_arena_objects = arenaobj; 1610 return NULL; 1611 } 1612 arenaobj->address = (uintptr_t)address; 1613 1614 ++narenas_currently_allocated; 1615 ++ntimes_arena_allocated; 1616 if (narenas_currently_allocated > narenas_highwater) 1617 narenas_highwater = narenas_currently_allocated; 1618 arenaobj->freepools = NULL; 1619 /* pool_address <- first pool-aligned address in the arena 1620 nfreepools <- number of whole pools that fit after alignment */ 1621 arenaobj->pool_address = (block*)arenaobj->address; 1622 arenaobj->nfreepools = MAX_POOLS_IN_ARENA; 1623 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); 1624 if (excess != 0) { 1625 --arenaobj->nfreepools; 1626 arenaobj->pool_address += POOL_SIZE - excess; 1627 } 1628 arenaobj->ntotalpools = arenaobj->nfreepools; 1629 1630 return arenaobj; 1631 } 1632 1633 1634 1635 #if WITH_PYMALLOC_RADIX_TREE 1636 /* Return true if and only if P is an address that was allocated by 1637 pymalloc. When the radix tree is used, 'poolp' is unused. 1638 */ 1639 static bool address_in_range(void * p,poolp pool)1640 address_in_range(void *p, poolp pool) 1641 { 1642 return arena_map_is_used(p); 1643 } 1644 #else 1645 /* 1646 address_in_range(P, POOL) 1647 1648 Return true if and only if P is an address that was allocated by pymalloc. 1649 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) 1650 (the caller is asked to compute this because the macro expands POOL more than 1651 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a 1652 variable and pass the latter to the macro; because address_in_range is 1653 called on every alloc/realloc/free, micro-efficiency is important here). 1654 1655 Tricky: Let B be the arena base address associated with the pool, B = 1656 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if 1657 1658 B <= P < B + ARENA_SIZE 1659 1660 Subtracting B throughout, this is true iff 1661 1662 0 <= P-B < ARENA_SIZE 1663 1664 By using unsigned arithmetic, the "0 <=" half of the test can be skipped. 1665 1666 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc 1667 before the first arena has been allocated. `arenas` is still NULL in that 1668 case. We're relying on that maxarenas is also 0 in that case, so that 1669 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index 1670 into a NULL arenas. 1671 1672 Details: given P and POOL, the arena_object corresponding to P is AO = 1673 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild 1674 stores, etc), POOL is the correct address of P's pool, AO.address is the 1675 correct base address of the pool's arena, and P must be within ARENA_SIZE of 1676 AO.address. In addition, AO.address is not 0 (no arena can start at address 0 1677 (NULL)). Therefore address_in_range correctly reports that obmalloc 1678 controls P. 1679 1680 Now suppose obmalloc does not control P (e.g., P was obtained via a direct 1681 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything 1682 in this case -- it may even be uninitialized trash. If the trash arenaindex 1683 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't 1684 control P. 1685 1686 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an 1687 allocated arena, obmalloc controls all the memory in slice AO.address : 1688 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, 1689 so P doesn't lie in that slice, so the macro correctly reports that P is not 1690 controlled by obmalloc. 1691 1692 Finally, if P is not controlled by obmalloc and AO corresponds to an unused 1693 arena_object (one not currently associated with an allocated arena), 1694 AO.address is 0, and the second test in the macro reduces to: 1695 1696 P < ARENA_SIZE 1697 1698 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes 1699 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part 1700 of the test still passes, and the third clause (AO.address != 0) is necessary 1701 to get the correct result: AO.address is 0 in this case, so the macro 1702 correctly reports that P is not controlled by obmalloc (despite that P lies in 1703 slice AO.address : AO.address + ARENA_SIZE). 1704 1705 Note: The third (AO.address != 0) clause was added in Python 2.5. Before 1706 2.5, arenas were never free()'ed, and an arenaindex < maxarena always 1707 corresponded to a currently-allocated arena, so the "P is not controlled by 1708 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case 1709 was impossible. 1710 1711 Note that the logic is excruciating, and reading up possibly uninitialized 1712 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) 1713 creates problems for some memory debuggers. The overwhelming advantage is 1714 that this test determines whether an arbitrary address is controlled by 1715 obmalloc in a small constant time, independent of the number of arenas 1716 obmalloc controls. Since this test is needed at every entry point, it's 1717 extremely desirable that it be this fast. 1718 */ 1719 1720 static bool _Py_NO_SANITIZE_ADDRESS 1721 _Py_NO_SANITIZE_THREAD 1722 _Py_NO_SANITIZE_MEMORY address_in_range(void * p,poolp pool)1723 address_in_range(void *p, poolp pool) 1724 { 1725 // Since address_in_range may be reading from memory which was not allocated 1726 // by Python, it is important that pool->arenaindex is read only once, as 1727 // another thread may be concurrently modifying the value without holding 1728 // the GIL. The following dance forces the compiler to read pool->arenaindex 1729 // only once. 1730 uint arenaindex = *((volatile uint *)&pool->arenaindex); 1731 return arenaindex < maxarenas && 1732 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && 1733 arenas[arenaindex].address != 0; 1734 } 1735 1736 #endif /* !WITH_PYMALLOC_RADIX_TREE */ 1737 1738 /*==========================================================================*/ 1739 1740 // Called when freelist is exhausted. Extend the freelist if there is 1741 // space for a block. Otherwise, remove this pool from usedpools. 1742 static void pymalloc_pool_extend(poolp pool,uint size)1743 pymalloc_pool_extend(poolp pool, uint size) 1744 { 1745 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) { 1746 /* There is room for another block. */ 1747 pool->freeblock = (block*)pool + pool->nextoffset; 1748 pool->nextoffset += INDEX2SIZE(size); 1749 *(block **)(pool->freeblock) = NULL; 1750 return; 1751 } 1752 1753 /* Pool is full, unlink from used pools. */ 1754 poolp next; 1755 next = pool->nextpool; 1756 pool = pool->prevpool; 1757 next->prevpool = pool; 1758 pool->nextpool = next; 1759 } 1760 1761 /* called when pymalloc_alloc can not allocate a block from usedpool. 1762 * This function takes new pool and allocate a block from it. 1763 */ 1764 static void* allocate_from_new_pool(uint size)1765 allocate_from_new_pool(uint size) 1766 { 1767 /* There isn't a pool of the right size class immediately 1768 * available: use a free pool. 1769 */ 1770 if (UNLIKELY(usable_arenas == NULL)) { 1771 /* No arena has a free pool: allocate a new arena. */ 1772 #ifdef WITH_MEMORY_LIMITS 1773 if (narenas_currently_allocated >= MAX_ARENAS) { 1774 return NULL; 1775 } 1776 #endif 1777 usable_arenas = new_arena(); 1778 if (usable_arenas == NULL) { 1779 return NULL; 1780 } 1781 usable_arenas->nextarena = usable_arenas->prevarena = NULL; 1782 assert(nfp2lasta[usable_arenas->nfreepools] == NULL); 1783 nfp2lasta[usable_arenas->nfreepools] = usable_arenas; 1784 } 1785 assert(usable_arenas->address != 0); 1786 1787 /* This arena already had the smallest nfreepools value, so decreasing 1788 * nfreepools doesn't change that, and we don't need to rearrange the 1789 * usable_arenas list. However, if the arena becomes wholly allocated, 1790 * we need to remove its arena_object from usable_arenas. 1791 */ 1792 assert(usable_arenas->nfreepools > 0); 1793 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) { 1794 /* It's the last of this size, so there won't be any. */ 1795 nfp2lasta[usable_arenas->nfreepools] = NULL; 1796 } 1797 /* If any free pools will remain, it will be the new smallest. */ 1798 if (usable_arenas->nfreepools > 1) { 1799 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL); 1800 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas; 1801 } 1802 1803 /* Try to get a cached free pool. */ 1804 poolp pool = usable_arenas->freepools; 1805 if (LIKELY(pool != NULL)) { 1806 /* Unlink from cached pools. */ 1807 usable_arenas->freepools = pool->nextpool; 1808 usable_arenas->nfreepools--; 1809 if (UNLIKELY(usable_arenas->nfreepools == 0)) { 1810 /* Wholly allocated: remove. */ 1811 assert(usable_arenas->freepools == NULL); 1812 assert(usable_arenas->nextarena == NULL || 1813 usable_arenas->nextarena->prevarena == 1814 usable_arenas); 1815 usable_arenas = usable_arenas->nextarena; 1816 if (usable_arenas != NULL) { 1817 usable_arenas->prevarena = NULL; 1818 assert(usable_arenas->address != 0); 1819 } 1820 } 1821 else { 1822 /* nfreepools > 0: it must be that freepools 1823 * isn't NULL, or that we haven't yet carved 1824 * off all the arena's pools for the first 1825 * time. 1826 */ 1827 assert(usable_arenas->freepools != NULL || 1828 usable_arenas->pool_address <= 1829 (block*)usable_arenas->address + 1830 ARENA_SIZE - POOL_SIZE); 1831 } 1832 } 1833 else { 1834 /* Carve off a new pool. */ 1835 assert(usable_arenas->nfreepools > 0); 1836 assert(usable_arenas->freepools == NULL); 1837 pool = (poolp)usable_arenas->pool_address; 1838 assert((block*)pool <= (block*)usable_arenas->address + 1839 ARENA_SIZE - POOL_SIZE); 1840 pool->arenaindex = (uint)(usable_arenas - arenas); 1841 assert(&arenas[pool->arenaindex] == usable_arenas); 1842 pool->szidx = DUMMY_SIZE_IDX; 1843 usable_arenas->pool_address += POOL_SIZE; 1844 --usable_arenas->nfreepools; 1845 1846 if (usable_arenas->nfreepools == 0) { 1847 assert(usable_arenas->nextarena == NULL || 1848 usable_arenas->nextarena->prevarena == 1849 usable_arenas); 1850 /* Unlink the arena: it is completely allocated. */ 1851 usable_arenas = usable_arenas->nextarena; 1852 if (usable_arenas != NULL) { 1853 usable_arenas->prevarena = NULL; 1854 assert(usable_arenas->address != 0); 1855 } 1856 } 1857 } 1858 1859 /* Frontlink to used pools. */ 1860 block *bp; 1861 poolp next = usedpools[size + size]; /* == prev */ 1862 pool->nextpool = next; 1863 pool->prevpool = next; 1864 next->nextpool = pool; 1865 next->prevpool = pool; 1866 pool->ref.count = 1; 1867 if (pool->szidx == size) { 1868 /* Luckily, this pool last contained blocks 1869 * of the same size class, so its header 1870 * and free list are already initialized. 1871 */ 1872 bp = pool->freeblock; 1873 assert(bp != NULL); 1874 pool->freeblock = *(block **)bp; 1875 return bp; 1876 } 1877 /* 1878 * Initialize the pool header, set up the free list to 1879 * contain just the second block, and return the first 1880 * block. 1881 */ 1882 pool->szidx = size; 1883 size = INDEX2SIZE(size); 1884 bp = (block *)pool + POOL_OVERHEAD; 1885 pool->nextoffset = POOL_OVERHEAD + (size << 1); 1886 pool->maxnextoffset = POOL_SIZE - size; 1887 pool->freeblock = bp + size; 1888 *(block **)(pool->freeblock) = NULL; 1889 return bp; 1890 } 1891 1892 /* pymalloc allocator 1893 1894 Return a pointer to newly allocated memory if pymalloc allocated memory. 1895 1896 Return NULL if pymalloc failed to allocate the memory block: on bigger 1897 requests, on error in the code below (as a last chance to serve the request) 1898 or when the max memory limit has been reached. 1899 */ 1900 static inline void* pymalloc_alloc(void * ctx,size_t nbytes)1901 pymalloc_alloc(void *ctx, size_t nbytes) 1902 { 1903 #ifdef WITH_VALGRIND 1904 if (UNLIKELY(running_on_valgrind == -1)) { 1905 running_on_valgrind = RUNNING_ON_VALGRIND; 1906 } 1907 if (UNLIKELY(running_on_valgrind)) { 1908 return NULL; 1909 } 1910 #endif 1911 1912 if (UNLIKELY(nbytes == 0)) { 1913 return NULL; 1914 } 1915 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) { 1916 return NULL; 1917 } 1918 1919 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; 1920 poolp pool = usedpools[size + size]; 1921 block *bp; 1922 1923 if (LIKELY(pool != pool->nextpool)) { 1924 /* 1925 * There is a used pool for this size class. 1926 * Pick up the head block of its free list. 1927 */ 1928 ++pool->ref.count; 1929 bp = pool->freeblock; 1930 assert(bp != NULL); 1931 1932 if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) { 1933 // Reached the end of the free list, try to extend it. 1934 pymalloc_pool_extend(pool, size); 1935 } 1936 } 1937 else { 1938 /* There isn't a pool of the right size class immediately 1939 * available: use a free pool. 1940 */ 1941 bp = allocate_from_new_pool(size); 1942 } 1943 1944 return (void *)bp; 1945 } 1946 1947 1948 static void * _PyObject_Malloc(void * ctx,size_t nbytes)1949 _PyObject_Malloc(void *ctx, size_t nbytes) 1950 { 1951 void* ptr = pymalloc_alloc(ctx, nbytes); 1952 if (LIKELY(ptr != NULL)) { 1953 return ptr; 1954 } 1955 1956 ptr = PyMem_RawMalloc(nbytes); 1957 if (ptr != NULL) { 1958 raw_allocated_blocks++; 1959 } 1960 return ptr; 1961 } 1962 1963 1964 static void * _PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1965 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) 1966 { 1967 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 1968 size_t nbytes = nelem * elsize; 1969 1970 void* ptr = pymalloc_alloc(ctx, nbytes); 1971 if (LIKELY(ptr != NULL)) { 1972 memset(ptr, 0, nbytes); 1973 return ptr; 1974 } 1975 1976 ptr = PyMem_RawCalloc(nelem, elsize); 1977 if (ptr != NULL) { 1978 raw_allocated_blocks++; 1979 } 1980 return ptr; 1981 } 1982 1983 1984 static void insert_to_usedpool(poolp pool)1985 insert_to_usedpool(poolp pool) 1986 { 1987 assert(pool->ref.count > 0); /* else the pool is empty */ 1988 1989 uint size = pool->szidx; 1990 poolp next = usedpools[size + size]; 1991 poolp prev = next->prevpool; 1992 1993 /* insert pool before next: prev <-> pool <-> next */ 1994 pool->nextpool = next; 1995 pool->prevpool = prev; 1996 next->prevpool = pool; 1997 prev->nextpool = pool; 1998 } 1999 2000 static void insert_to_freepool(poolp pool)2001 insert_to_freepool(poolp pool) 2002 { 2003 poolp next = pool->nextpool; 2004 poolp prev = pool->prevpool; 2005 next->prevpool = prev; 2006 prev->nextpool = next; 2007 2008 /* Link the pool to freepools. This is a singly-linked 2009 * list, and pool->prevpool isn't used there. 2010 */ 2011 struct arena_object *ao = &arenas[pool->arenaindex]; 2012 pool->nextpool = ao->freepools; 2013 ao->freepools = pool; 2014 uint nf = ao->nfreepools; 2015 /* If this is the rightmost arena with this number of free pools, 2016 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there 2017 * are no arenas in usable_arenas with that value. 2018 */ 2019 struct arena_object* lastnf = nfp2lasta[nf]; 2020 assert((nf == 0 && lastnf == NULL) || 2021 (nf > 0 && 2022 lastnf != NULL && 2023 lastnf->nfreepools == nf && 2024 (lastnf->nextarena == NULL || 2025 nf < lastnf->nextarena->nfreepools))); 2026 if (lastnf == ao) { /* it is the rightmost */ 2027 struct arena_object* p = ao->prevarena; 2028 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL; 2029 } 2030 ao->nfreepools = ++nf; 2031 2032 /* All the rest is arena management. We just freed 2033 * a pool, and there are 4 cases for arena mgmt: 2034 * 1. If all the pools are free, return the arena to 2035 * the system free(). Except if this is the last 2036 * arena in the list, keep it to avoid thrashing: 2037 * keeping one wholly free arena in the list avoids 2038 * pathological cases where a simple loop would 2039 * otherwise provoke needing to allocate and free an 2040 * arena on every iteration. See bpo-37257. 2041 * 2. If this is the only free pool in the arena, 2042 * add the arena back to the `usable_arenas` list. 2043 * 3. If the "next" arena has a smaller count of free 2044 * pools, we have to "slide this arena right" to 2045 * restore that usable_arenas is sorted in order of 2046 * nfreepools. 2047 * 4. Else there's nothing more to do. 2048 */ 2049 if (nf == ao->ntotalpools && ao->nextarena != NULL) { 2050 /* Case 1. First unlink ao from usable_arenas. 2051 */ 2052 assert(ao->prevarena == NULL || 2053 ao->prevarena->address != 0); 2054 assert(ao ->nextarena == NULL || 2055 ao->nextarena->address != 0); 2056 2057 /* Fix the pointer in the prevarena, or the 2058 * usable_arenas pointer. 2059 */ 2060 if (ao->prevarena == NULL) { 2061 usable_arenas = ao->nextarena; 2062 assert(usable_arenas == NULL || 2063 usable_arenas->address != 0); 2064 } 2065 else { 2066 assert(ao->prevarena->nextarena == ao); 2067 ao->prevarena->nextarena = 2068 ao->nextarena; 2069 } 2070 /* Fix the pointer in the nextarena. */ 2071 if (ao->nextarena != NULL) { 2072 assert(ao->nextarena->prevarena == ao); 2073 ao->nextarena->prevarena = 2074 ao->prevarena; 2075 } 2076 /* Record that this arena_object slot is 2077 * available to be reused. 2078 */ 2079 ao->nextarena = unused_arena_objects; 2080 unused_arena_objects = ao; 2081 2082 #if WITH_PYMALLOC_RADIX_TREE 2083 /* mark arena region as not under control of obmalloc */ 2084 arena_map_mark_used(ao->address, 0); 2085 #endif 2086 2087 /* Free the entire arena. */ 2088 _PyObject_Arena.free(_PyObject_Arena.ctx, 2089 (void *)ao->address, ARENA_SIZE); 2090 ao->address = 0; /* mark unassociated */ 2091 --narenas_currently_allocated; 2092 2093 return; 2094 } 2095 2096 if (nf == 1) { 2097 /* Case 2. Put ao at the head of 2098 * usable_arenas. Note that because 2099 * ao->nfreepools was 0 before, ao isn't 2100 * currently on the usable_arenas list. 2101 */ 2102 ao->nextarena = usable_arenas; 2103 ao->prevarena = NULL; 2104 if (usable_arenas) 2105 usable_arenas->prevarena = ao; 2106 usable_arenas = ao; 2107 assert(usable_arenas->address != 0); 2108 if (nfp2lasta[1] == NULL) { 2109 nfp2lasta[1] = ao; 2110 } 2111 2112 return; 2113 } 2114 2115 /* If this arena is now out of order, we need to keep 2116 * the list sorted. The list is kept sorted so that 2117 * the "most full" arenas are used first, which allows 2118 * the nearly empty arenas to be completely freed. In 2119 * a few un-scientific tests, it seems like this 2120 * approach allowed a lot more memory to be freed. 2121 */ 2122 /* If this is the only arena with nf, record that. */ 2123 if (nfp2lasta[nf] == NULL) { 2124 nfp2lasta[nf] = ao; 2125 } /* else the rightmost with nf doesn't change */ 2126 /* If this was the rightmost of the old size, it remains in place. */ 2127 if (ao == lastnf) { 2128 /* Case 4. Nothing to do. */ 2129 return; 2130 } 2131 /* If ao were the only arena in the list, the last block would have 2132 * gotten us out. 2133 */ 2134 assert(ao->nextarena != NULL); 2135 2136 /* Case 3: We have to move the arena towards the end of the list, 2137 * because it has more free pools than the arena to its right. It needs 2138 * to move to follow lastnf. 2139 * First unlink ao from usable_arenas. 2140 */ 2141 if (ao->prevarena != NULL) { 2142 /* ao isn't at the head of the list */ 2143 assert(ao->prevarena->nextarena == ao); 2144 ao->prevarena->nextarena = ao->nextarena; 2145 } 2146 else { 2147 /* ao is at the head of the list */ 2148 assert(usable_arenas == ao); 2149 usable_arenas = ao->nextarena; 2150 } 2151 ao->nextarena->prevarena = ao->prevarena; 2152 /* And insert after lastnf. */ 2153 ao->prevarena = lastnf; 2154 ao->nextarena = lastnf->nextarena; 2155 if (ao->nextarena != NULL) { 2156 ao->nextarena->prevarena = ao; 2157 } 2158 lastnf->nextarena = ao; 2159 /* Verify that the swaps worked. */ 2160 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools); 2161 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools); 2162 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao); 2163 assert((usable_arenas == ao && ao->prevarena == NULL) 2164 || ao->prevarena->nextarena == ao); 2165 } 2166 2167 /* Free a memory block allocated by pymalloc_alloc(). 2168 Return 1 if it was freed. 2169 Return 0 if the block was not allocated by pymalloc_alloc(). */ 2170 static inline int pymalloc_free(void * ctx,void * p)2171 pymalloc_free(void *ctx, void *p) 2172 { 2173 assert(p != NULL); 2174 2175 #ifdef WITH_VALGRIND 2176 if (UNLIKELY(running_on_valgrind > 0)) { 2177 return 0; 2178 } 2179 #endif 2180 2181 poolp pool = POOL_ADDR(p); 2182 if (UNLIKELY(!address_in_range(p, pool))) { 2183 return 0; 2184 } 2185 /* We allocated this address. */ 2186 2187 /* Link p to the start of the pool's freeblock list. Since 2188 * the pool had at least the p block outstanding, the pool 2189 * wasn't empty (so it's already in a usedpools[] list, or 2190 * was full and is in no list -- it's not in the freeblocks 2191 * list in any case). 2192 */ 2193 assert(pool->ref.count > 0); /* else it was empty */ 2194 block *lastfree = pool->freeblock; 2195 *(block **)p = lastfree; 2196 pool->freeblock = (block *)p; 2197 pool->ref.count--; 2198 2199 if (UNLIKELY(lastfree == NULL)) { 2200 /* Pool was full, so doesn't currently live in any list: 2201 * link it to the front of the appropriate usedpools[] list. 2202 * This mimics LRU pool usage for new allocations and 2203 * targets optimal filling when several pools contain 2204 * blocks of the same size class. 2205 */ 2206 insert_to_usedpool(pool); 2207 return 1; 2208 } 2209 2210 /* freeblock wasn't NULL, so the pool wasn't full, 2211 * and the pool is in a usedpools[] list. 2212 */ 2213 if (LIKELY(pool->ref.count != 0)) { 2214 /* pool isn't empty: leave it in usedpools */ 2215 return 1; 2216 } 2217 2218 /* Pool is now empty: unlink from usedpools, and 2219 * link to the front of freepools. This ensures that 2220 * previously freed pools will be allocated later 2221 * (being not referenced, they are perhaps paged out). 2222 */ 2223 insert_to_freepool(pool); 2224 return 1; 2225 } 2226 2227 2228 static void _PyObject_Free(void * ctx,void * p)2229 _PyObject_Free(void *ctx, void *p) 2230 { 2231 /* PyObject_Free(NULL) has no effect */ 2232 if (p == NULL) { 2233 return; 2234 } 2235 2236 if (UNLIKELY(!pymalloc_free(ctx, p))) { 2237 /* pymalloc didn't allocate this address */ 2238 PyMem_RawFree(p); 2239 raw_allocated_blocks--; 2240 } 2241 } 2242 2243 2244 /* pymalloc realloc. 2245 2246 If nbytes==0, then as the Python docs promise, we do not treat this like 2247 free(p), and return a non-NULL result. 2248 2249 Return 1 if pymalloc reallocated memory and wrote the new pointer into 2250 newptr_p. 2251 2252 Return 0 if pymalloc didn't allocated p. */ 2253 static int pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2254 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) 2255 { 2256 void *bp; 2257 poolp pool; 2258 size_t size; 2259 2260 assert(p != NULL); 2261 2262 #ifdef WITH_VALGRIND 2263 /* Treat running_on_valgrind == -1 the same as 0 */ 2264 if (UNLIKELY(running_on_valgrind > 0)) { 2265 return 0; 2266 } 2267 #endif 2268 2269 pool = POOL_ADDR(p); 2270 if (!address_in_range(p, pool)) { 2271 /* pymalloc is not managing this block. 2272 2273 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take 2274 over this block. However, if we do, we need to copy the valid data 2275 from the C-managed block to one of our blocks, and there's no 2276 portable way to know how much of the memory space starting at p is 2277 valid. 2278 2279 As bug 1185883 pointed out the hard way, it's possible that the 2280 C-managed block is "at the end" of allocated VM space, so that a 2281 memory fault can occur if we try to copy nbytes bytes starting at p. 2282 Instead we punt: let C continue to manage this block. */ 2283 return 0; 2284 } 2285 2286 /* pymalloc is in charge of this block */ 2287 size = INDEX2SIZE(pool->szidx); 2288 if (nbytes <= size) { 2289 /* The block is staying the same or shrinking. 2290 2291 If it's shrinking, there's a tradeoff: it costs cycles to copy the 2292 block to a smaller size class, but it wastes memory not to copy it. 2293 2294 The compromise here is to copy on shrink only if at least 25% of 2295 size can be shaved off. */ 2296 if (4 * nbytes > 3 * size) { 2297 /* It's the same, or shrinking and new/old > 3/4. */ 2298 *newptr_p = p; 2299 return 1; 2300 } 2301 size = nbytes; 2302 } 2303 2304 bp = _PyObject_Malloc(ctx, nbytes); 2305 if (bp != NULL) { 2306 memcpy(bp, p, size); 2307 _PyObject_Free(ctx, p); 2308 } 2309 *newptr_p = bp; 2310 return 1; 2311 } 2312 2313 2314 static void * _PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2315 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) 2316 { 2317 void *ptr2; 2318 2319 if (ptr == NULL) { 2320 return _PyObject_Malloc(ctx, nbytes); 2321 } 2322 2323 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) { 2324 return ptr2; 2325 } 2326 2327 return PyMem_RawRealloc(ptr, nbytes); 2328 } 2329 2330 #else /* ! WITH_PYMALLOC */ 2331 2332 /*==========================================================================*/ 2333 /* pymalloc not enabled: Redirect the entry points to malloc. These will 2334 * only be used by extensions that are compiled with pymalloc enabled. */ 2335 2336 Py_ssize_t _Py_GetAllocatedBlocks(void)2337 _Py_GetAllocatedBlocks(void) 2338 { 2339 return 0; 2340 } 2341 2342 #endif /* WITH_PYMALLOC */ 2343 2344 2345 /*==========================================================================*/ 2346 /* A x-platform debugging allocator. This doesn't manage memory directly, 2347 * it wraps a real allocator, adding extra debugging info to the memory blocks. 2348 */ 2349 2350 /* Uncomment this define to add the "serialno" field */ 2351 /* #define PYMEM_DEBUG_SERIALNO */ 2352 2353 #ifdef PYMEM_DEBUG_SERIALNO 2354 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ 2355 2356 /* serialno is always incremented via calling this routine. The point is 2357 * to supply a single place to set a breakpoint. 2358 */ 2359 static void bumpserialno(void)2360 bumpserialno(void) 2361 { 2362 ++serialno; 2363 } 2364 #endif 2365 2366 #define SST SIZEOF_SIZE_T 2367 2368 #ifdef PYMEM_DEBUG_SERIALNO 2369 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST 2370 #else 2371 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST 2372 #endif 2373 2374 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */ 2375 static size_t read_size_t(const void * p)2376 read_size_t(const void *p) 2377 { 2378 const uint8_t *q = (const uint8_t *)p; 2379 size_t result = *q++; 2380 int i; 2381 2382 for (i = SST; --i > 0; ++q) 2383 result = (result << 8) | *q; 2384 return result; 2385 } 2386 2387 /* Write n as a big-endian size_t, MSB at address p, LSB at 2388 * p + sizeof(size_t) - 1. 2389 */ 2390 static void write_size_t(void * p,size_t n)2391 write_size_t(void *p, size_t n) 2392 { 2393 uint8_t *q = (uint8_t *)p + SST - 1; 2394 int i; 2395 2396 for (i = SST; --i >= 0; --q) { 2397 *q = (uint8_t)(n & 0xff); 2398 n >>= 8; 2399 } 2400 } 2401 2402 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and 2403 fills them with useful stuff, here calling the underlying malloc's result p: 2404 2405 p[0: S] 2406 Number of bytes originally asked for. This is a size_t, big-endian (easier 2407 to read in a memory dump). 2408 p[S] 2409 API ID. See PEP 445. This is a character, but seems undocumented. 2410 p[S+1: 2*S] 2411 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads. 2412 p[2*S: 2*S+n] 2413 The requested memory, filled with copies of PYMEM_CLEANBYTE. 2414 Used to catch reference to uninitialized memory. 2415 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc 2416 handled the request itself. 2417 p[2*S+n: 2*S+n+S] 2418 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads. 2419 p[2*S+n+S: 2*S+n+2*S] 2420 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc 2421 and _PyMem_DebugRealloc. 2422 This is a big-endian size_t. 2423 If "bad memory" is detected later, the serial number gives an 2424 excellent way to set a breakpoint on the next run, to capture the 2425 instant at which this block was passed out. 2426 2427 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks 2428 for 3 * S extra bytes, and omits the last serialno field. 2429 */ 2430 2431 static void * _PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2432 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) 2433 { 2434 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2435 uint8_t *p; /* base address of malloc'ed pad block */ 2436 uint8_t *data; /* p + 2*SST == pointer to data bytes */ 2437 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2438 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */ 2439 2440 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) { 2441 /* integer overflow: can't represent total as a Py_ssize_t */ 2442 return NULL; 2443 } 2444 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES; 2445 2446 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN] 2447 ^--- p ^--- data ^--- tail 2448 S: nbytes stored as size_t 2449 I: API identifier (1 byte) 2450 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after) 2451 C: Clean bytes used later to store actual data 2452 N: Serial number stored as size_t 2453 2454 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field 2455 is omitted. */ 2456 2457 if (use_calloc) { 2458 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total); 2459 } 2460 else { 2461 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total); 2462 } 2463 if (p == NULL) { 2464 return NULL; 2465 } 2466 data = p + 2*SST; 2467 2468 #ifdef PYMEM_DEBUG_SERIALNO 2469 bumpserialno(); 2470 #endif 2471 2472 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ 2473 write_size_t(p, nbytes); 2474 p[SST] = (uint8_t)api->api_id; 2475 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); 2476 2477 if (nbytes > 0 && !use_calloc) { 2478 memset(data, PYMEM_CLEANBYTE, nbytes); 2479 } 2480 2481 /* at tail, write pad (SST bytes) and serialno (SST bytes) */ 2482 tail = data + nbytes; 2483 memset(tail, PYMEM_FORBIDDENBYTE, SST); 2484 #ifdef PYMEM_DEBUG_SERIALNO 2485 write_size_t(tail + SST, serialno); 2486 #endif 2487 2488 return data; 2489 } 2490 2491 static void * _PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2492 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes) 2493 { 2494 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2495 } 2496 2497 static void * _PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2498 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) 2499 { 2500 size_t nbytes; 2501 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 2502 nbytes = nelem * elsize; 2503 return _PyMem_DebugRawAlloc(1, ctx, nbytes); 2504 } 2505 2506 2507 /* The debug free first checks the 2*SST bytes on each end for sanity (in 2508 particular, that the FORBIDDENBYTEs with the api ID are still intact). 2509 Then fills the original bytes with PYMEM_DEADBYTE. 2510 Then calls the underlying free. 2511 */ 2512 static void _PyMem_DebugRawFree(void * ctx,void * p)2513 _PyMem_DebugRawFree(void *ctx, void *p) 2514 { 2515 /* PyMem_Free(NULL) has no effect */ 2516 if (p == NULL) { 2517 return; 2518 } 2519 2520 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2521 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */ 2522 size_t nbytes; 2523 2524 _PyMem_DebugCheckAddress(__func__, api->api_id, p); 2525 nbytes = read_size_t(q); 2526 nbytes += PYMEM_DEBUG_EXTRA_BYTES; 2527 memset(q, PYMEM_DEADBYTE, nbytes); 2528 api->alloc.free(api->alloc.ctx, q); 2529 } 2530 2531 2532 static void * _PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2533 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) 2534 { 2535 if (p == NULL) { 2536 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2537 } 2538 2539 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2540 uint8_t *head; /* base address of malloc'ed pad block */ 2541 uint8_t *data; /* pointer to data bytes */ 2542 uint8_t *r; 2543 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2544 size_t total; /* 2 * SST + nbytes + 2 * SST */ 2545 size_t original_nbytes; 2546 #define ERASED_SIZE 64 2547 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */ 2548 2549 _PyMem_DebugCheckAddress(__func__, api->api_id, p); 2550 2551 data = (uint8_t *)p; 2552 head = data - 2*SST; 2553 original_nbytes = read_size_t(head); 2554 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) { 2555 /* integer overflow: can't represent total as a Py_ssize_t */ 2556 return NULL; 2557 } 2558 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES; 2559 2560 tail = data + original_nbytes; 2561 #ifdef PYMEM_DEBUG_SERIALNO 2562 size_t block_serialno = read_size_t(tail + SST); 2563 #endif 2564 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and 2565 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes. 2566 */ 2567 if (original_nbytes <= sizeof(save)) { 2568 memcpy(save, data, original_nbytes); 2569 memset(data - 2 * SST, PYMEM_DEADBYTE, 2570 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES); 2571 } 2572 else { 2573 memcpy(save, data, ERASED_SIZE); 2574 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST); 2575 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE); 2576 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE, 2577 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST); 2578 } 2579 2580 /* Resize and add decorations. */ 2581 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total); 2582 if (r == NULL) { 2583 /* if realloc() failed: rewrite header and footer which have 2584 just been erased */ 2585 nbytes = original_nbytes; 2586 } 2587 else { 2588 head = r; 2589 #ifdef PYMEM_DEBUG_SERIALNO 2590 bumpserialno(); 2591 block_serialno = serialno; 2592 #endif 2593 } 2594 data = head + 2*SST; 2595 2596 write_size_t(head, nbytes); 2597 head[SST] = (uint8_t)api->api_id; 2598 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); 2599 2600 tail = data + nbytes; 2601 memset(tail, PYMEM_FORBIDDENBYTE, SST); 2602 #ifdef PYMEM_DEBUG_SERIALNO 2603 write_size_t(tail + SST, block_serialno); 2604 #endif 2605 2606 /* Restore saved bytes. */ 2607 if (original_nbytes <= sizeof(save)) { 2608 memcpy(data, save, Py_MIN(nbytes, original_nbytes)); 2609 } 2610 else { 2611 size_t i = original_nbytes - ERASED_SIZE; 2612 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE)); 2613 if (nbytes > i) { 2614 memcpy(data + i, &save[ERASED_SIZE], 2615 Py_MIN(nbytes - i, ERASED_SIZE)); 2616 } 2617 } 2618 2619 if (r == NULL) { 2620 return NULL; 2621 } 2622 2623 if (nbytes > original_nbytes) { 2624 /* growing: mark new extra memory clean */ 2625 memset(data + original_nbytes, PYMEM_CLEANBYTE, 2626 nbytes - original_nbytes); 2627 } 2628 2629 return data; 2630 } 2631 2632 static inline void _PyMem_DebugCheckGIL(const char * func)2633 _PyMem_DebugCheckGIL(const char *func) 2634 { 2635 if (!PyGILState_Check()) { 2636 _Py_FatalErrorFunc(func, 2637 "Python memory allocator called " 2638 "without holding the GIL"); 2639 } 2640 } 2641 2642 static void * _PyMem_DebugMalloc(void * ctx,size_t nbytes)2643 _PyMem_DebugMalloc(void *ctx, size_t nbytes) 2644 { 2645 _PyMem_DebugCheckGIL(__func__); 2646 return _PyMem_DebugRawMalloc(ctx, nbytes); 2647 } 2648 2649 static void * _PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2650 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) 2651 { 2652 _PyMem_DebugCheckGIL(__func__); 2653 return _PyMem_DebugRawCalloc(ctx, nelem, elsize); 2654 } 2655 2656 2657 static void _PyMem_DebugFree(void * ctx,void * ptr)2658 _PyMem_DebugFree(void *ctx, void *ptr) 2659 { 2660 _PyMem_DebugCheckGIL(__func__); 2661 _PyMem_DebugRawFree(ctx, ptr); 2662 } 2663 2664 2665 static void * _PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2666 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) 2667 { 2668 _PyMem_DebugCheckGIL(__func__); 2669 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes); 2670 } 2671 2672 /* Check the forbidden bytes on both ends of the memory allocated for p. 2673 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, 2674 * and call Py_FatalError to kill the program. 2675 * The API id, is also checked. 2676 */ 2677 static void _PyMem_DebugCheckAddress(const char * func,char api,const void * p)2678 _PyMem_DebugCheckAddress(const char *func, char api, const void *p) 2679 { 2680 assert(p != NULL); 2681 2682 const uint8_t *q = (const uint8_t *)p; 2683 size_t nbytes; 2684 const uint8_t *tail; 2685 int i; 2686 char id; 2687 2688 /* Check the API id */ 2689 id = (char)q[-SST]; 2690 if (id != api) { 2691 _PyObject_DebugDumpAddress(p); 2692 _Py_FatalErrorFormat(func, 2693 "bad ID: Allocated using API '%c', " 2694 "verified using API '%c'", 2695 id, api); 2696 } 2697 2698 /* Check the stuff at the start of p first: if there's underwrite 2699 * corruption, the number-of-bytes field may be nuts, and checking 2700 * the tail could lead to a segfault then. 2701 */ 2702 for (i = SST-1; i >= 1; --i) { 2703 if (*(q-i) != PYMEM_FORBIDDENBYTE) { 2704 _PyObject_DebugDumpAddress(p); 2705 _Py_FatalErrorFunc(func, "bad leading pad byte"); 2706 } 2707 } 2708 2709 nbytes = read_size_t(q - 2*SST); 2710 tail = q + nbytes; 2711 for (i = 0; i < SST; ++i) { 2712 if (tail[i] != PYMEM_FORBIDDENBYTE) { 2713 _PyObject_DebugDumpAddress(p); 2714 _Py_FatalErrorFunc(func, "bad trailing pad byte"); 2715 } 2716 } 2717 } 2718 2719 /* Display info to stderr about the memory block at p. */ 2720 static void _PyObject_DebugDumpAddress(const void * p)2721 _PyObject_DebugDumpAddress(const void *p) 2722 { 2723 const uint8_t *q = (const uint8_t *)p; 2724 const uint8_t *tail; 2725 size_t nbytes; 2726 int i; 2727 int ok; 2728 char id; 2729 2730 fprintf(stderr, "Debug memory block at address p=%p:", p); 2731 if (p == NULL) { 2732 fprintf(stderr, "\n"); 2733 return; 2734 } 2735 id = (char)q[-SST]; 2736 fprintf(stderr, " API '%c'\n", id); 2737 2738 nbytes = read_size_t(q - 2*SST); 2739 fprintf(stderr, " %zu bytes originally requested\n", nbytes); 2740 2741 /* In case this is nuts, check the leading pad bytes first. */ 2742 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1); 2743 ok = 1; 2744 for (i = 1; i <= SST-1; ++i) { 2745 if (*(q-i) != PYMEM_FORBIDDENBYTE) { 2746 ok = 0; 2747 break; 2748 } 2749 } 2750 if (ok) 2751 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2752 else { 2753 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2754 PYMEM_FORBIDDENBYTE); 2755 for (i = SST-1; i >= 1; --i) { 2756 const uint8_t byte = *(q-i); 2757 fprintf(stderr, " at p-%d: 0x%02x", i, byte); 2758 if (byte != PYMEM_FORBIDDENBYTE) 2759 fputs(" *** OUCH", stderr); 2760 fputc('\n', stderr); 2761 } 2762 2763 fputs(" Because memory is corrupted at the start, the " 2764 "count of bytes requested\n" 2765 " may be bogus, and checking the trailing pad " 2766 "bytes may segfault.\n", stderr); 2767 } 2768 2769 tail = q + nbytes; 2770 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail); 2771 ok = 1; 2772 for (i = 0; i < SST; ++i) { 2773 if (tail[i] != PYMEM_FORBIDDENBYTE) { 2774 ok = 0; 2775 break; 2776 } 2777 } 2778 if (ok) 2779 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2780 else { 2781 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2782 PYMEM_FORBIDDENBYTE); 2783 for (i = 0; i < SST; ++i) { 2784 const uint8_t byte = tail[i]; 2785 fprintf(stderr, " at tail+%d: 0x%02x", 2786 i, byte); 2787 if (byte != PYMEM_FORBIDDENBYTE) 2788 fputs(" *** OUCH", stderr); 2789 fputc('\n', stderr); 2790 } 2791 } 2792 2793 #ifdef PYMEM_DEBUG_SERIALNO 2794 size_t serial = read_size_t(tail + SST); 2795 fprintf(stderr, 2796 " The block was made by call #%zu to debug malloc/realloc.\n", 2797 serial); 2798 #endif 2799 2800 if (nbytes > 0) { 2801 i = 0; 2802 fputs(" Data at p:", stderr); 2803 /* print up to 8 bytes at the start */ 2804 while (q < tail && i < 8) { 2805 fprintf(stderr, " %02x", *q); 2806 ++i; 2807 ++q; 2808 } 2809 /* and up to 8 at the end */ 2810 if (q < tail) { 2811 if (tail - q > 8) { 2812 fputs(" ...", stderr); 2813 q = tail - 8; 2814 } 2815 while (q < tail) { 2816 fprintf(stderr, " %02x", *q); 2817 ++q; 2818 } 2819 } 2820 fputc('\n', stderr); 2821 } 2822 fputc('\n', stderr); 2823 2824 fflush(stderr); 2825 _PyMem_DumpTraceback(fileno(stderr), p); 2826 } 2827 2828 2829 static size_t printone(FILE * out,const char * msg,size_t value)2830 printone(FILE *out, const char* msg, size_t value) 2831 { 2832 int i, k; 2833 char buf[100]; 2834 size_t origvalue = value; 2835 2836 fputs(msg, out); 2837 for (i = (int)strlen(msg); i < 35; ++i) 2838 fputc(' ', out); 2839 fputc('=', out); 2840 2841 /* Write the value with commas. */ 2842 i = 22; 2843 buf[i--] = '\0'; 2844 buf[i--] = '\n'; 2845 k = 3; 2846 do { 2847 size_t nextvalue = value / 10; 2848 unsigned int digit = (unsigned int)(value - nextvalue * 10); 2849 value = nextvalue; 2850 buf[i--] = (char)(digit + '0'); 2851 --k; 2852 if (k == 0 && value && i >= 0) { 2853 k = 3; 2854 buf[i--] = ','; 2855 } 2856 } while (value && i >= 0); 2857 2858 while (i >= 0) 2859 buf[i--] = ' '; 2860 fputs(buf, out); 2861 2862 return origvalue; 2863 } 2864 2865 void _PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2866 _PyDebugAllocatorStats(FILE *out, 2867 const char *block_name, int num_blocks, size_t sizeof_block) 2868 { 2869 char buf1[128]; 2870 char buf2[128]; 2871 PyOS_snprintf(buf1, sizeof(buf1), 2872 "%d %ss * %zd bytes each", 2873 num_blocks, block_name, sizeof_block); 2874 PyOS_snprintf(buf2, sizeof(buf2), 2875 "%48s ", buf1); 2876 (void)printone(out, buf2, num_blocks * sizeof_block); 2877 } 2878 2879 2880 #ifdef WITH_PYMALLOC 2881 2882 #ifdef Py_DEBUG 2883 /* Is target in the list? The list is traversed via the nextpool pointers. 2884 * The list may be NULL-terminated, or circular. Return 1 if target is in 2885 * list, else 0. 2886 */ 2887 static int pool_is_in_list(const poolp target,poolp list)2888 pool_is_in_list(const poolp target, poolp list) 2889 { 2890 poolp origlist = list; 2891 assert(target != NULL); 2892 if (list == NULL) 2893 return 0; 2894 do { 2895 if (target == list) 2896 return 1; 2897 list = list->nextpool; 2898 } while (list != NULL && list != origlist); 2899 return 0; 2900 } 2901 #endif 2902 2903 /* Print summary info to "out" about the state of pymalloc's structures. 2904 * In Py_DEBUG mode, also perform some expensive internal consistency 2905 * checks. 2906 * 2907 * Return 0 if the memory debug hooks are not installed or no statistics was 2908 * written into out, return 1 otherwise. 2909 */ 2910 int _PyObject_DebugMallocStats(FILE * out)2911 _PyObject_DebugMallocStats(FILE *out) 2912 { 2913 if (!_PyMem_PymallocEnabled()) { 2914 return 0; 2915 } 2916 2917 uint i; 2918 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; 2919 /* # of pools, allocated blocks, and free blocks per class index */ 2920 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2921 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2922 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2923 /* total # of allocated bytes in used and full pools */ 2924 size_t allocated_bytes = 0; 2925 /* total # of available bytes in used pools */ 2926 size_t available_bytes = 0; 2927 /* # of free pools + pools not yet carved out of current arena */ 2928 uint numfreepools = 0; 2929 /* # of bytes for arena alignment padding */ 2930 size_t arena_alignment = 0; 2931 /* # of bytes in used and full pools used for pool_headers */ 2932 size_t pool_header_bytes = 0; 2933 /* # of bytes in used and full pools wasted due to quantization, 2934 * i.e. the necessarily leftover space at the ends of used and 2935 * full pools. 2936 */ 2937 size_t quantization = 0; 2938 /* # of arenas actually allocated. */ 2939 size_t narenas = 0; 2940 /* running total -- should equal narenas * ARENA_SIZE */ 2941 size_t total; 2942 char buf[128]; 2943 2944 fprintf(out, "Small block threshold = %d, in %u size classes.\n", 2945 SMALL_REQUEST_THRESHOLD, numclasses); 2946 2947 for (i = 0; i < numclasses; ++i) 2948 numpools[i] = numblocks[i] = numfreeblocks[i] = 0; 2949 2950 /* Because full pools aren't linked to from anything, it's easiest 2951 * to march over all the arenas. If we're lucky, most of the memory 2952 * will be living in full pools -- would be a shame to miss them. 2953 */ 2954 for (i = 0; i < maxarenas; ++i) { 2955 uint j; 2956 uintptr_t base = arenas[i].address; 2957 2958 /* Skip arenas which are not allocated. */ 2959 if (arenas[i].address == (uintptr_t)NULL) 2960 continue; 2961 narenas += 1; 2962 2963 numfreepools += arenas[i].nfreepools; 2964 2965 /* round up to pool alignment */ 2966 if (base & (uintptr_t)POOL_SIZE_MASK) { 2967 arena_alignment += POOL_SIZE; 2968 base &= ~(uintptr_t)POOL_SIZE_MASK; 2969 base += POOL_SIZE; 2970 } 2971 2972 /* visit every pool in the arena */ 2973 assert(base <= (uintptr_t) arenas[i].pool_address); 2974 for (j = 0; base < (uintptr_t) arenas[i].pool_address; 2975 ++j, base += POOL_SIZE) { 2976 poolp p = (poolp)base; 2977 const uint sz = p->szidx; 2978 uint freeblocks; 2979 2980 if (p->ref.count == 0) { 2981 /* currently unused */ 2982 #ifdef Py_DEBUG 2983 assert(pool_is_in_list(p, arenas[i].freepools)); 2984 #endif 2985 continue; 2986 } 2987 ++numpools[sz]; 2988 numblocks[sz] += p->ref.count; 2989 freeblocks = NUMBLOCKS(sz) - p->ref.count; 2990 numfreeblocks[sz] += freeblocks; 2991 #ifdef Py_DEBUG 2992 if (freeblocks > 0) 2993 assert(pool_is_in_list(p, usedpools[sz + sz])); 2994 #endif 2995 } 2996 } 2997 assert(narenas == narenas_currently_allocated); 2998 2999 fputc('\n', out); 3000 fputs("class size num pools blocks in use avail blocks\n" 3001 "----- ---- --------- ------------- ------------\n", 3002 out); 3003 3004 for (i = 0; i < numclasses; ++i) { 3005 size_t p = numpools[i]; 3006 size_t b = numblocks[i]; 3007 size_t f = numfreeblocks[i]; 3008 uint size = INDEX2SIZE(i); 3009 if (p == 0) { 3010 assert(b == 0 && f == 0); 3011 continue; 3012 } 3013 fprintf(out, "%5u %6u %11zu %15zu %13zu\n", 3014 i, size, p, b, f); 3015 allocated_bytes += b * size; 3016 available_bytes += f * size; 3017 pool_header_bytes += p * POOL_OVERHEAD; 3018 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); 3019 } 3020 fputc('\n', out); 3021 #ifdef PYMEM_DEBUG_SERIALNO 3022 if (_PyMem_DebugEnabled()) { 3023 (void)printone(out, "# times object malloc called", serialno); 3024 } 3025 #endif 3026 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); 3027 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); 3028 (void)printone(out, "# arenas highwater mark", narenas_highwater); 3029 (void)printone(out, "# arenas allocated current", narenas); 3030 3031 PyOS_snprintf(buf, sizeof(buf), 3032 "%zu arenas * %d bytes/arena", 3033 narenas, ARENA_SIZE); 3034 (void)printone(out, buf, narenas * ARENA_SIZE); 3035 3036 fputc('\n', out); 3037 3038 /* Account for what all of those arena bytes are being used for. */ 3039 total = printone(out, "# bytes in allocated blocks", allocated_bytes); 3040 total += printone(out, "# bytes in available blocks", available_bytes); 3041 3042 PyOS_snprintf(buf, sizeof(buf), 3043 "%u unused pools * %d bytes", numfreepools, POOL_SIZE); 3044 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE); 3045 3046 total += printone(out, "# bytes lost to pool headers", pool_header_bytes); 3047 total += printone(out, "# bytes lost to quantization", quantization); 3048 total += printone(out, "# bytes lost to arena alignment", arena_alignment); 3049 (void)printone(out, "Total", total); 3050 assert(narenas * ARENA_SIZE == total); 3051 3052 #if WITH_PYMALLOC_RADIX_TREE 3053 fputs("\narena map counts\n", out); 3054 #ifdef USE_INTERIOR_NODES 3055 (void)printone(out, "# arena map mid nodes", arena_map_mid_count); 3056 (void)printone(out, "# arena map bot nodes", arena_map_bot_count); 3057 fputc('\n', out); 3058 #endif 3059 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root)); 3060 #ifdef USE_INTERIOR_NODES 3061 total += printone(out, "# bytes lost to arena map mid", 3062 sizeof(arena_map_mid_t) * arena_map_mid_count); 3063 total += printone(out, "# bytes lost to arena map bot", 3064 sizeof(arena_map_bot_t) * arena_map_bot_count); 3065 (void)printone(out, "Total", total); 3066 #endif 3067 #endif 3068 3069 return 1; 3070 } 3071 3072 #endif /* #ifdef WITH_PYMALLOC */ 3073