• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_pymem.h"
3 
4 #include <stdbool.h>
5 
6 
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9 
10 
11 /* Python's malloc wrappers (see pymem.h) */
12 
13 #undef  uint
14 #define uint    unsigned int    /* assuming >= 16 bits */
15 
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21 
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26 
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
29 
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31 
32 #if defined(__has_feature)  /* Clang */
33 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
35         __attribute__((no_address_safety_analysis))
36 #  endif
37 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
38 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 #  endif
40 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
41 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 #  endif
43 #elif defined(__GNUC__)
44 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
45 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
46         __attribute__((no_address_safety_analysis))
47 #  endif
48    // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49    // is provided only since GCC 7.
50 #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 #  endif
53 #endif
54 
55 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS
56 #  define _Py_NO_ADDRESS_SAFETY_ANALYSIS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 #  define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 #  define _Py_NO_SANITIZE_MEMORY
63 #endif
64 
65 #ifdef WITH_PYMALLOC
66 
67 #ifdef MS_WINDOWS
68 #  include <windows.h>
69 #elif defined(HAVE_MMAP)
70 #  include <sys/mman.h>
71 #  ifdef MAP_ANONYMOUS
72 #    define ARENAS_USE_MMAP
73 #  endif
74 #endif
75 
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82 
83 
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85    Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86    library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88 
89 
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94        for malloc(0), which would be treated as an error. Some platforms would
95        return a pointer with no memory behind it, which would break pymalloc.
96        To solve these problems, allocate an extra byte. */
97     if (size == 0)
98         size = 1;
99     return malloc(size);
100 }
101 
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106        for calloc(0, 0), which would be treated as an error. Some platforms
107        would return a pointer with no memory behind it, which would break
108        pymalloc.  To solve these problems, allocate an extra byte. */
109     if (nelem == 0 || elsize == 0) {
110         nelem = 1;
111         elsize = 1;
112     }
113     return calloc(nelem, elsize);
114 }
115 
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119     if (size == 0)
120         size = 1;
121     return realloc(ptr, size);
122 }
123 
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127     free(ptr);
128 }
129 
130 
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135     return VirtualAlloc(NULL, size,
136                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138 
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142     VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144 
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149     void *ptr;
150     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152     if (ptr == MAP_FAILED)
153         return NULL;
154     assert(ptr != NULL);
155     return ptr;
156 }
157 
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161     munmap(ptr, size);
162 }
163 
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168     return malloc(size);
169 }
170 
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174     free(ptr);
175 }
176 #endif
177 
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182 
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 #  define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190 
191 typedef struct {
192     /* We tag each block with an API ID in order to tag API violations */
193     char api_id;
194     PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197     debug_alloc_api_t raw;
198     debug_alloc_api_t mem;
199     debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201     {'r', PYRAW_ALLOC},
202     {'m', PYMEM_ALLOC},
203     {'o', PYOBJ_ALLOC}
204     };
205 
206 #define PYDBGRAW_ALLOC \
207     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212 
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222 
223 
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226                             PyMemAllocatorEx *old_alloc)
227 {
228     if (old_alloc != NULL) {
229         PyMem_GetAllocator(domain, old_alloc);
230     }
231 
232 
233     PyMemAllocatorEx new_alloc;
234     switch(domain)
235     {
236     case PYMEM_DOMAIN_RAW:
237         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238         break;
239     case PYMEM_DOMAIN_MEM:
240         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241         break;
242     case PYMEM_DOMAIN_OBJ:
243         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244         break;
245     default:
246         /* unknown domain */
247         return -1;
248     }
249     PyMem_SetAllocator(domain, &new_alloc);
250     if (debug) {
251         _PyMem_SetupDebugHooksDomain(domain);
252     }
253     return 0;
254 }
255 
256 
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259                            PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262     const int debug = 1;
263 #else
264     const int debug = 0;
265 #endif
266     return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268 
269 
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273     if (name == NULL || *name == '\0') {
274         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275            nameions): use default memory allocators */
276         *allocator = PYMEM_ALLOCATOR_DEFAULT;
277     }
278     else if (strcmp(name, "default") == 0) {
279         *allocator = PYMEM_ALLOCATOR_DEFAULT;
280     }
281     else if (strcmp(name, "debug") == 0) {
282         *allocator = PYMEM_ALLOCATOR_DEBUG;
283     }
284 #ifdef WITH_PYMALLOC
285     else if (strcmp(name, "pymalloc") == 0) {
286         *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287     }
288     else if (strcmp(name, "pymalloc_debug") == 0) {
289         *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290     }
291 #endif
292     else if (strcmp(name, "malloc") == 0) {
293         *allocator = PYMEM_ALLOCATOR_MALLOC;
294     }
295     else if (strcmp(name, "malloc_debug") == 0) {
296         *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297     }
298     else {
299         /* unknown allocator */
300         return -1;
301     }
302     return 0;
303 }
304 
305 
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309     switch (allocator) {
310     case PYMEM_ALLOCATOR_NOT_SET:
311         /* do nothing */
312         break;
313 
314     case PYMEM_ALLOCATOR_DEFAULT:
315         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318         break;
319 
320     case PYMEM_ALLOCATOR_DEBUG:
321         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324         break;
325 
326 #ifdef WITH_PYMALLOC
327     case PYMEM_ALLOCATOR_PYMALLOC:
328     case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329     {
330         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332 
333         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336 
337         if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338             PyMem_SetupDebugHooks();
339         }
340         break;
341     }
342 #endif
343 
344     case PYMEM_ALLOCATOR_MALLOC:
345     case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346     {
347         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351 
352         if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353             PyMem_SetupDebugHooks();
354         }
355         break;
356     }
357 
358     default:
359         /* unknown allocator */
360         return -1;
361     }
362     return 0;
363 }
364 
365 
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371 
372 
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380 
381     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383         pymemallocator_eq(&_PyObject, &malloc_alloc))
384     {
385         return "malloc";
386     }
387 #ifdef WITH_PYMALLOC
388     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389         pymemallocator_eq(&_PyMem, &pymalloc) &&
390         pymemallocator_eq(&_PyObject, &pymalloc))
391     {
392         return "pymalloc";
393     }
394 #endif
395 
396     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399 
400     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401         pymemallocator_eq(&_PyMem, &dbg_mem) &&
402         pymemallocator_eq(&_PyObject, &dbg_obj))
403     {
404         /* Debug hooks installed */
405         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408         {
409             return "malloc_debug";
410         }
411 #ifdef WITH_PYMALLOC
412         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415         {
416             return "pymalloc_debug";
417         }
418 #endif
419     }
420     return NULL;
421 }
422 
423 
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432 
433 
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440     _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442     };
443 
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448     return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450 
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454     if (_PyMem_DebugEnabled()) {
455         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456     }
457     else {
458         return (_PyObject.malloc == _PyObject_Malloc);
459     }
460 }
461 #endif
462 
463 
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467     PyMemAllocatorEx alloc;
468 
469     if (domain == PYMEM_DOMAIN_RAW) {
470         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471             return;
472         }
473 
474         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475         alloc.ctx = &_PyMem_Debug.raw;
476         alloc.malloc = _PyMem_DebugRawMalloc;
477         alloc.calloc = _PyMem_DebugRawCalloc;
478         alloc.realloc = _PyMem_DebugRawRealloc;
479         alloc.free = _PyMem_DebugRawFree;
480         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481     }
482     else if (domain == PYMEM_DOMAIN_MEM) {
483         if (_PyMem.malloc == _PyMem_DebugMalloc) {
484             return;
485         }
486 
487         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488         alloc.ctx = &_PyMem_Debug.mem;
489         alloc.malloc = _PyMem_DebugMalloc;
490         alloc.calloc = _PyMem_DebugCalloc;
491         alloc.realloc = _PyMem_DebugRealloc;
492         alloc.free = _PyMem_DebugFree;
493         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494     }
495     else if (domain == PYMEM_DOMAIN_OBJ)  {
496         if (_PyObject.malloc == _PyMem_DebugMalloc) {
497             return;
498         }
499 
500         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501         alloc.ctx = &_PyMem_Debug.obj;
502         alloc.malloc = _PyMem_DebugMalloc;
503         alloc.calloc = _PyMem_DebugCalloc;
504         alloc.realloc = _PyMem_DebugRealloc;
505         alloc.free = _PyMem_DebugFree;
506         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507     }
508 }
509 
510 
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518 
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522     switch(domain)
523     {
524     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527     default:
528         /* unknown domain: set all attributes to NULL */
529         allocator->ctx = NULL;
530         allocator->malloc = NULL;
531         allocator->calloc = NULL;
532         allocator->realloc = NULL;
533         allocator->free = NULL;
534     }
535 }
536 
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540     switch(domain)
541     {
542     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545     /* ignore unknown domain */
546     }
547 }
548 
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552     *allocator = _PyObject_Arena;
553 }
554 
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558     _PyObject_Arena = *allocator;
559 }
560 
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564     /*
565      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566      * Most python internals blindly use a signed Py_ssize_t to track
567      * things without checking for overflows or negatives.
568      * As size_t is unsigned, checking for size < 0 is not required.
569      */
570     if (size > (size_t)PY_SSIZE_T_MAX)
571         return NULL;
572     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574 
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578     /* see PyMem_RawMalloc() */
579     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580         return NULL;
581     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583 
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587     /* see PyMem_RawMalloc() */
588     if (new_size > (size_t)PY_SSIZE_T_MAX)
589         return NULL;
590     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592 
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597 
598 
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602     /* see PyMem_RawMalloc() */
603     if (size > (size_t)PY_SSIZE_T_MAX)
604         return NULL;
605     return _PyMem.malloc(_PyMem.ctx, size);
606 }
607 
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611     /* see PyMem_RawMalloc() */
612     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613         return NULL;
614     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616 
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620     /* see PyMem_RawMalloc() */
621     if (new_size > (size_t)PY_SSIZE_T_MAX)
622         return NULL;
623     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625 
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629     _PyMem.free(_PyMem.ctx, ptr);
630 }
631 
632 
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636     assert(str != NULL);
637 
638     size_t len = wcslen(str);
639     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640         return NULL;
641     }
642 
643     size_t size = (len + 1) * sizeof(wchar_t);
644     wchar_t *str2 = PyMem_RawMalloc(size);
645     if (str2 == NULL) {
646         return NULL;
647     }
648 
649     memcpy(str2, str, size);
650     return str2;
651 }
652 
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656     assert(str != NULL);
657     size_t size = strlen(str) + 1;
658     char *copy = PyMem_RawMalloc(size);
659     if (copy == NULL) {
660         return NULL;
661     }
662     memcpy(copy, str, size);
663     return copy;
664 }
665 
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669     assert(str != NULL);
670     size_t size = strlen(str) + 1;
671     char *copy = PyMem_Malloc(size);
672     if (copy == NULL) {
673         return NULL;
674     }
675     memcpy(copy, str, size);
676     return copy;
677 }
678 
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682     /* see PyMem_RawMalloc() */
683     if (size > (size_t)PY_SSIZE_T_MAX)
684         return NULL;
685     return _PyObject.malloc(_PyObject.ctx, size);
686 }
687 
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691     /* see PyMem_RawMalloc() */
692     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693         return NULL;
694     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696 
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700     /* see PyMem_RawMalloc() */
701     if (new_size > (size_t)PY_SSIZE_T_MAX)
702         return NULL;
703     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705 
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709     _PyObject.free(_PyObject.ctx, ptr);
710 }
711 
712 
713 #ifdef WITH_PYMALLOC
714 
715 #ifdef WITH_VALGRIND
716 #include <valgrind/valgrind.h>
717 
718 /* If we're using GCC, use __builtin_expect() to reduce overhead of
719    the valgrind checks */
720 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
721 #  define UNLIKELY(value) __builtin_expect((value), 0)
722 #else
723 #  define UNLIKELY(value) (value)
724 #endif
725 
726 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
727 static int running_on_valgrind = -1;
728 #endif
729 
730 
731 /* An object allocator for Python.
732 
733    Here is an introduction to the layers of the Python memory architecture,
734    showing where the object allocator is actually used (layer +2), It is
735    called for every object allocation and deallocation (PyObject_New/Del),
736    unless the object-specific allocators implement a proprietary allocation
737    scheme (ex.: ints use a simple free list). This is also the place where
738    the cyclic garbage collector operates selectively on container objects.
739 
740 
741     Object-specific allocators
742     _____   ______   ______       ________
743    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
744 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
745     _______________________________       |                           |
746    [   Python's object allocator   ]      |                           |
747 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
748     ______________________________________________________________    |
749    [          Python's raw memory allocator (PyMem_ API)          ]   |
750 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
751     __________________________________________________________________
752    [    Underlying general-purpose allocator (ex: C library malloc)   ]
753  0 | <------ Virtual memory allocated for the python process -------> |
754 
755    =========================================================================
756     _______________________________________________________________________
757    [                OS-specific Virtual Memory Manager (VMM)               ]
758 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
759     __________________________________   __________________________________
760    [                                  ] [                                  ]
761 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
762 
763 */
764 /*==========================================================================*/
765 
766 /* A fast, special-purpose memory allocator for small blocks, to be used
767    on top of a general-purpose malloc -- heavily based on previous art. */
768 
769 /* Vladimir Marangozov -- August 2000 */
770 
771 /*
772  * "Memory management is where the rubber meets the road -- if we do the wrong
773  * thing at any level, the results will not be good. And if we don't make the
774  * levels work well together, we are in serious trouble." (1)
775  *
776  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
777  *    "Dynamic Storage Allocation: A Survey and Critical Review",
778  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
779  */
780 
781 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
782 
783 /*==========================================================================*/
784 
785 /*
786  * Allocation strategy abstract:
787  *
788  * For small requests, the allocator sub-allocates <Big> blocks of memory.
789  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
790  * system's allocator.
791  *
792  * Small requests are grouped in size classes spaced 8 bytes apart, due
793  * to the required valid alignment of the returned address. Requests of
794  * a particular size are serviced from memory pools of 4K (one VMM page).
795  * Pools are fragmented on demand and contain free lists of blocks of one
796  * particular size class. In other words, there is a fixed-size allocator
797  * for each size class. Free pools are shared by the different allocators
798  * thus minimizing the space reserved for a particular size class.
799  *
800  * This allocation strategy is a variant of what is known as "simple
801  * segregated storage based on array of free lists". The main drawback of
802  * simple segregated storage is that we might end up with lot of reserved
803  * memory for the different free lists, which degenerate in time. To avoid
804  * this, we partition each free list in pools and we share dynamically the
805  * reserved space between all free lists. This technique is quite efficient
806  * for memory intensive programs which allocate mainly small-sized blocks.
807  *
808  * For small requests we have the following table:
809  *
810  * Request in bytes     Size of allocated block      Size class idx
811  * ----------------------------------------------------------------
812  *        1-8                     8                       0
813  *        9-16                   16                       1
814  *       17-24                   24                       2
815  *       25-32                   32                       3
816  *       33-40                   40                       4
817  *       41-48                   48                       5
818  *       49-56                   56                       6
819  *       57-64                   64                       7
820  *       65-72                   72                       8
821  *        ...                   ...                     ...
822  *      497-504                 504                      62
823  *      505-512                 512                      63
824  *
825  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
826  *      allocator.
827  */
828 
829 /*==========================================================================*/
830 
831 /*
832  * -- Main tunable settings section --
833  */
834 
835 /*
836  * Alignment of addresses returned to the user. 8-bytes alignment works
837  * on most current architectures (with 32-bit or 64-bit address busses).
838  * The alignment value is also used for grouping small requests in size
839  * classes spaced ALIGNMENT bytes apart.
840  *
841  * You shouldn't change this unless you know what you are doing.
842  */
843 
844 #if SIZEOF_VOID_P > 4
845 #define ALIGNMENT              16               /* must be 2^N */
846 #define ALIGNMENT_SHIFT         4
847 #else
848 #define ALIGNMENT               8               /* must be 2^N */
849 #define ALIGNMENT_SHIFT         3
850 #endif
851 
852 /* Return the number of bytes in size class I, as a uint. */
853 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
854 
855 /*
856  * Max size threshold below which malloc requests are considered to be
857  * small enough in order to use preallocated memory pools. You can tune
858  * this value according to your application behaviour and memory needs.
859  *
860  * Note: a size threshold of 512 guarantees that newly created dictionaries
861  * will be allocated from preallocated memory pools on 64-bit.
862  *
863  * The following invariants must hold:
864  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
865  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
866  *
867  * Although not required, for better performance and space efficiency,
868  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
869  */
870 #define SMALL_REQUEST_THRESHOLD 512
871 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
872 
873 /*
874  * The system's VMM page size can be obtained on most unices with a
875  * getpagesize() call or deduced from various header files. To make
876  * things simpler, we assume that it is 4K, which is OK for most systems.
877  * It is probably better if this is the native page size, but it doesn't
878  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
879  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
880  * violation fault.  4K is apparently OK for all the platforms that python
881  * currently targets.
882  */
883 #define SYSTEM_PAGE_SIZE        (4 * 1024)
884 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
885 
886 /*
887  * Maximum amount of memory managed by the allocator for small requests.
888  */
889 #ifdef WITH_MEMORY_LIMITS
890 #ifndef SMALL_MEMORY_LIMIT
891 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
892 #endif
893 #endif
894 
895 /*
896  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
897  * on a page boundary. This is a reserved virtual address space for the
898  * current process (obtained through a malloc()/mmap() call). In no way this
899  * means that the memory arenas will be used entirely. A malloc(<Big>) is
900  * usually an address range reservation for <Big> bytes, unless all pages within
901  * this space are referenced subsequently. So malloc'ing big blocks and not
902  * using them does not mean "wasting memory". It's an addressable range
903  * wastage...
904  *
905  * Arenas are allocated with mmap() on systems supporting anonymous memory
906  * mappings to reduce heap fragmentation.
907  */
908 #define ARENA_SIZE              (256 << 10)     /* 256KB */
909 
910 #ifdef WITH_MEMORY_LIMITS
911 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
912 #endif
913 
914 /*
915  * Size of the pools used for small blocks. Should be a power of 2,
916  * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
917  */
918 #define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
919 #define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
920 
921 #define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
922 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
923 #   error "arena size not an exact multiple of pool size"
924 #endif
925 
926 /*
927  * -- End of tunable settings section --
928  */
929 
930 /*==========================================================================*/
931 
932 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
933 typedef uint8_t block;
934 
935 /* Pool for small blocks. */
936 struct pool_header {
937     union { block *_padding;
938             uint count; } ref;          /* number of allocated blocks    */
939     block *freeblock;                   /* pool's free list head         */
940     struct pool_header *nextpool;       /* next pool of this size class  */
941     struct pool_header *prevpool;       /* previous pool       ""        */
942     uint arenaindex;                    /* index into arenas of base adr */
943     uint szidx;                         /* block size class index        */
944     uint nextoffset;                    /* bytes to virgin block         */
945     uint maxnextoffset;                 /* largest valid nextoffset      */
946 };
947 
948 typedef struct pool_header *poolp;
949 
950 /* Record keeping for arenas. */
951 struct arena_object {
952     /* The address of the arena, as returned by malloc.  Note that 0
953      * will never be returned by a successful malloc, and is used
954      * here to mark an arena_object that doesn't correspond to an
955      * allocated arena.
956      */
957     uintptr_t address;
958 
959     /* Pool-aligned pointer to the next pool to be carved off. */
960     block* pool_address;
961 
962     /* The number of available pools in the arena:  free pools + never-
963      * allocated pools.
964      */
965     uint nfreepools;
966 
967     /* The total number of pools in the arena, whether or not available. */
968     uint ntotalpools;
969 
970     /* Singly-linked list of available pools. */
971     struct pool_header* freepools;
972 
973     /* Whenever this arena_object is not associated with an allocated
974      * arena, the nextarena member is used to link all unassociated
975      * arena_objects in the singly-linked `unused_arena_objects` list.
976      * The prevarena member is unused in this case.
977      *
978      * When this arena_object is associated with an allocated arena
979      * with at least one available pool, both members are used in the
980      * doubly-linked `usable_arenas` list, which is maintained in
981      * increasing order of `nfreepools` values.
982      *
983      * Else this arena_object is associated with an allocated arena
984      * all of whose pools are in use.  `nextarena` and `prevarena`
985      * are both meaningless in this case.
986      */
987     struct arena_object* nextarena;
988     struct arena_object* prevarena;
989 };
990 
991 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
992 
993 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
994 
995 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
996 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
997 
998 /* Return total number of blocks in pool of size index I, as a uint. */
999 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1000 
1001 /*==========================================================================*/
1002 
1003 /*
1004  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1005 
1006 This is involved.  For an index i, usedpools[i+i] is the header for a list of
1007 all partially used pools holding small blocks with "size class idx" i. So
1008 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1009 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1010 
1011 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1012 member) as needed.  Once carved off, a pool is in one of three states forever
1013 after:
1014 
1015 used == partially used, neither empty nor full
1016     At least one block in the pool is currently allocated, and at least one
1017     block in the pool is not currently allocated (note this implies a pool
1018     has room for at least two blocks).
1019     This is a pool's initial state, as a pool is created only when malloc
1020     needs space.
1021     The pool holds blocks of a fixed size, and is in the circular list headed
1022     at usedpools[i] (see above).  It's linked to the other used pools of the
1023     same size class via the pool_header's nextpool and prevpool members.
1024     If all but one block is currently allocated, a malloc can cause a
1025     transition to the full state.  If all but one block is not currently
1026     allocated, a free can cause a transition to the empty state.
1027 
1028 full == all the pool's blocks are currently allocated
1029     On transition to full, a pool is unlinked from its usedpools[] list.
1030     It's not linked to from anything then anymore, and its nextpool and
1031     prevpool members are meaningless until it transitions back to used.
1032     A free of a block in a full pool puts the pool back in the used state.
1033     Then it's linked in at the front of the appropriate usedpools[] list, so
1034     that the next allocation for its size class will reuse the freed block.
1035 
1036 empty == all the pool's blocks are currently available for allocation
1037     On transition to empty, a pool is unlinked from its usedpools[] list,
1038     and linked to the front of its arena_object's singly-linked freepools list,
1039     via its nextpool member.  The prevpool member has no meaning in this case.
1040     Empty pools have no inherent size class:  the next time a malloc finds
1041     an empty list in usedpools[], it takes the first pool off of freepools.
1042     If the size class needed happens to be the same as the size class the pool
1043     last had, some pool initialization can be skipped.
1044 
1045 
1046 Block Management
1047 
1048 Blocks within pools are again carved out as needed.  pool->freeblock points to
1049 the start of a singly-linked list of free blocks within the pool.  When a
1050 block is freed, it's inserted at the front of its pool's freeblock list.  Note
1051 that the available blocks in a pool are *not* linked all together when a pool
1052 is initialized.  Instead only "the first two" (lowest addresses) blocks are
1053 set up, returning the first such block, and setting pool->freeblock to a
1054 one-block list holding the second such block.  This is consistent with that
1055 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1056 of memory until it's actually needed.
1057 
1058 So long as a pool is in the used state, we're certain there *is* a block
1059 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1060 points to the end of the free list before we've carved the entire pool into
1061 blocks, that means we simply haven't yet gotten to one of the higher-address
1062 blocks.  The offset from the pool_header to the start of "the next" virgin
1063 block is stored in the pool_header nextoffset member, and the largest value
1064 of nextoffset that makes sense is stored in the maxnextoffset member when a
1065 pool is initialized.  All the blocks in a pool have been passed out at least
1066 once when and only when nextoffset > maxnextoffset.
1067 
1068 
1069 Major obscurity:  While the usedpools vector is declared to have poolp
1070 entries, it doesn't really.  It really contains two pointers per (conceptual)
1071 poolp entry, the nextpool and prevpool members of a pool_header.  The
1072 excruciating initialization code below fools C so that
1073 
1074     usedpool[i+i]
1075 
1076 "acts like" a genuine poolp, but only so long as you only reference its
1077 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1078 compensating for that a pool_header's nextpool and prevpool members
1079 immediately follow a pool_header's first two members:
1080 
1081     union { block *_padding;
1082             uint count; } ref;
1083     block *freeblock;
1084 
1085 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1086 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1087 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1088 circular list is empty).
1089 
1090 It's unclear why the usedpools setup is so convoluted.  It could be to
1091 minimize the amount of cache required to hold this heavily-referenced table
1092 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1093 referencing code has to remember to "double the index" and doing so isn't
1094 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1095 on that C doesn't insert any padding anywhere in a pool_header at or before
1096 the prevpool member.
1097 **************************************************************************** */
1098 
1099 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1100 #define PT(x)   PTA(x), PTA(x)
1101 
1102 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1103     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1104 #if NB_SMALL_SIZE_CLASSES > 8
1105     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1106 #if NB_SMALL_SIZE_CLASSES > 16
1107     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1108 #if NB_SMALL_SIZE_CLASSES > 24
1109     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1110 #if NB_SMALL_SIZE_CLASSES > 32
1111     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1112 #if NB_SMALL_SIZE_CLASSES > 40
1113     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1114 #if NB_SMALL_SIZE_CLASSES > 48
1115     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1116 #if NB_SMALL_SIZE_CLASSES > 56
1117     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1118 #if NB_SMALL_SIZE_CLASSES > 64
1119 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1120 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1121 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1122 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1123 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1124 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1125 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1126 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1127 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1128 };
1129 
1130 /*==========================================================================
1131 Arena management.
1132 
1133 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1134 which may not be currently used (== they're arena_objects that aren't
1135 currently associated with an allocated arena).  Note that arenas proper are
1136 separately malloc'ed.
1137 
1138 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1139 we do try to free() arenas, and use some mild heuristic strategies to increase
1140 the likelihood that arenas eventually can be freed.
1141 
1142 unused_arena_objects
1143 
1144     This is a singly-linked list of the arena_objects that are currently not
1145     being used (no arena is associated with them).  Objects are taken off the
1146     head of the list in new_arena(), and are pushed on the head of the list in
1147     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1148     is on this list if and only if its .address member is 0.
1149 
1150 usable_arenas
1151 
1152     This is a doubly-linked list of the arena_objects associated with arenas
1153     that have pools available.  These pools are either waiting to be reused,
1154     or have not been used before.  The list is sorted to have the most-
1155     allocated arenas first (ascending order based on the nfreepools member).
1156     This means that the next allocation will come from a heavily used arena,
1157     which gives the nearly empty arenas a chance to be returned to the system.
1158     In my unscientific tests this dramatically improved the number of arenas
1159     that could be freed.
1160 
1161 Note that an arena_object associated with an arena all of whose pools are
1162 currently in use isn't on either list.
1163 
1164 Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1165 used to be done by one-at-a-time linear search when an arena's number of
1166 free pools changed.  That could, overall, consume time quadratic in the
1167 number of arenas.  That didn't really matter when there were only a few
1168 hundred arenas (typical!), but could be a timing disaster when there were
1169 hundreds of thousands.  See bpo-37029.
1170 
1171 Now we have a vector of "search fingers" to eliminate the need to search:
1172 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1173 with nfp free pools.  This is NULL if and only if there is no arena with
1174 nfp free pools in usable_arenas.
1175 */
1176 
1177 /* Array of objects used to track chunks of memory (arenas). */
1178 static struct arena_object* arenas = NULL;
1179 /* Number of slots currently allocated in the `arenas` vector. */
1180 static uint maxarenas = 0;
1181 
1182 /* The head of the singly-linked, NULL-terminated list of available
1183  * arena_objects.
1184  */
1185 static struct arena_object* unused_arena_objects = NULL;
1186 
1187 /* The head of the doubly-linked, NULL-terminated at each end, list of
1188  * arena_objects associated with arenas that have pools available.
1189  */
1190 static struct arena_object* usable_arenas = NULL;
1191 
1192 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1193 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1194 
1195 /* How many arena_objects do we initially allocate?
1196  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1197  * `arenas` vector.
1198  */
1199 #define INITIAL_ARENA_OBJECTS 16
1200 
1201 /* Number of arenas allocated that haven't been free()'d. */
1202 static size_t narenas_currently_allocated = 0;
1203 
1204 /* Total number of times malloc() called to allocate an arena. */
1205 static size_t ntimes_arena_allocated = 0;
1206 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1207 static size_t narenas_highwater = 0;
1208 
1209 static Py_ssize_t _Py_AllocatedBlocks = 0;
1210 
1211 Py_ssize_t
_Py_GetAllocatedBlocks(void)1212 _Py_GetAllocatedBlocks(void)
1213 {
1214     return _Py_AllocatedBlocks;
1215 }
1216 
1217 
1218 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1219  * allocate a new arena, and return the address of an arena_object
1220  * describing the new arena.  It's expected that the caller will set
1221  * `usable_arenas` to the return value.
1222  */
1223 static struct arena_object*
new_arena(void)1224 new_arena(void)
1225 {
1226     struct arena_object* arenaobj;
1227     uint excess;        /* number of bytes above pool alignment */
1228     void *address;
1229     static int debug_stats = -1;
1230 
1231     if (debug_stats == -1) {
1232         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1233         debug_stats = (opt != NULL && *opt != '\0');
1234     }
1235     if (debug_stats)
1236         _PyObject_DebugMallocStats(stderr);
1237 
1238     if (unused_arena_objects == NULL) {
1239         uint i;
1240         uint numarenas;
1241         size_t nbytes;
1242 
1243         /* Double the number of arena objects on each allocation.
1244          * Note that it's possible for `numarenas` to overflow.
1245          */
1246         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1247         if (numarenas <= maxarenas)
1248             return NULL;                /* overflow */
1249 #if SIZEOF_SIZE_T <= SIZEOF_INT
1250         if (numarenas > SIZE_MAX / sizeof(*arenas))
1251             return NULL;                /* overflow */
1252 #endif
1253         nbytes = numarenas * sizeof(*arenas);
1254         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1255         if (arenaobj == NULL)
1256             return NULL;
1257         arenas = arenaobj;
1258 
1259         /* We might need to fix pointers that were copied.  However,
1260          * new_arena only gets called when all the pages in the
1261          * previous arenas are full.  Thus, there are *no* pointers
1262          * into the old array. Thus, we don't have to worry about
1263          * invalid pointers.  Just to be sure, some asserts:
1264          */
1265         assert(usable_arenas == NULL);
1266         assert(unused_arena_objects == NULL);
1267 
1268         /* Put the new arenas on the unused_arena_objects list. */
1269         for (i = maxarenas; i < numarenas; ++i) {
1270             arenas[i].address = 0;              /* mark as unassociated */
1271             arenas[i].nextarena = i < numarenas - 1 ?
1272                                    &arenas[i+1] : NULL;
1273         }
1274 
1275         /* Update globals. */
1276         unused_arena_objects = &arenas[maxarenas];
1277         maxarenas = numarenas;
1278     }
1279 
1280     /* Take the next available arena object off the head of the list. */
1281     assert(unused_arena_objects != NULL);
1282     arenaobj = unused_arena_objects;
1283     unused_arena_objects = arenaobj->nextarena;
1284     assert(arenaobj->address == 0);
1285     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1286     if (address == NULL) {
1287         /* The allocation failed: return NULL after putting the
1288          * arenaobj back.
1289          */
1290         arenaobj->nextarena = unused_arena_objects;
1291         unused_arena_objects = arenaobj;
1292         return NULL;
1293     }
1294     arenaobj->address = (uintptr_t)address;
1295 
1296     ++narenas_currently_allocated;
1297     ++ntimes_arena_allocated;
1298     if (narenas_currently_allocated > narenas_highwater)
1299         narenas_highwater = narenas_currently_allocated;
1300     arenaobj->freepools = NULL;
1301     /* pool_address <- first pool-aligned address in the arena
1302        nfreepools <- number of whole pools that fit after alignment */
1303     arenaobj->pool_address = (block*)arenaobj->address;
1304     arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1305     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1306     if (excess != 0) {
1307         --arenaobj->nfreepools;
1308         arenaobj->pool_address += POOL_SIZE - excess;
1309     }
1310     arenaobj->ntotalpools = arenaobj->nfreepools;
1311 
1312     return arenaobj;
1313 }
1314 
1315 
1316 /*
1317 address_in_range(P, POOL)
1318 
1319 Return true if and only if P is an address that was allocated by pymalloc.
1320 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1321 (the caller is asked to compute this because the macro expands POOL more than
1322 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1323 variable and pass the latter to the macro; because address_in_range is
1324 called on every alloc/realloc/free, micro-efficiency is important here).
1325 
1326 Tricky:  Let B be the arena base address associated with the pool, B =
1327 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1328 
1329     B <= P < B + ARENA_SIZE
1330 
1331 Subtracting B throughout, this is true iff
1332 
1333     0 <= P-B < ARENA_SIZE
1334 
1335 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1336 
1337 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1338 before the first arena has been allocated.  `arenas` is still NULL in that
1339 case.  We're relying on that maxarenas is also 0 in that case, so that
1340 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1341 into a NULL arenas.
1342 
1343 Details:  given P and POOL, the arena_object corresponding to P is AO =
1344 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1345 stores, etc), POOL is the correct address of P's pool, AO.address is the
1346 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1347 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1348 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1349 controls P.
1350 
1351 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1352 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1353 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1354 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1355 control P.
1356 
1357 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1358 allocated arena, obmalloc controls all the memory in slice AO.address :
1359 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1360 so P doesn't lie in that slice, so the macro correctly reports that P is not
1361 controlled by obmalloc.
1362 
1363 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1364 arena_object (one not currently associated with an allocated arena),
1365 AO.address is 0, and the second test in the macro reduces to:
1366 
1367     P < ARENA_SIZE
1368 
1369 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1370 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1371 of the test still passes, and the third clause (AO.address != 0) is necessary
1372 to get the correct result:  AO.address is 0 in this case, so the macro
1373 correctly reports that P is not controlled by obmalloc (despite that P lies in
1374 slice AO.address : AO.address + ARENA_SIZE).
1375 
1376 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1377 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1378 corresponded to a currently-allocated arena, so the "P is not controlled by
1379 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1380 was impossible.
1381 
1382 Note that the logic is excruciating, and reading up possibly uninitialized
1383 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1384 creates problems for some memory debuggers.  The overwhelming advantage is
1385 that this test determines whether an arbitrary address is controlled by
1386 obmalloc in a small constant time, independent of the number of arenas
1387 obmalloc controls.  Since this test is needed at every entry point, it's
1388 extremely desirable that it be this fast.
1389 */
1390 
1391 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS
1392             _Py_NO_SANITIZE_THREAD
1393             _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1394 address_in_range(void *p, poolp pool)
1395 {
1396     // Since address_in_range may be reading from memory which was not allocated
1397     // by Python, it is important that pool->arenaindex is read only once, as
1398     // another thread may be concurrently modifying the value without holding
1399     // the GIL. The following dance forces the compiler to read pool->arenaindex
1400     // only once.
1401     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1402     return arenaindex < maxarenas &&
1403         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1404         arenas[arenaindex].address != 0;
1405 }
1406 
1407 
1408 /*==========================================================================*/
1409 
1410 /* pymalloc allocator
1411 
1412    The basic blocks are ordered by decreasing execution frequency,
1413    which minimizes the number of jumps in the most common cases,
1414    improves branching prediction and instruction scheduling (small
1415    block allocations typically result in a couple of instructions).
1416    Unless the optimizer reorders everything, being too smart...
1417 
1418    Return a pointer to newly allocated memory if pymalloc allocated memory.
1419 
1420    Return NULL if pymalloc failed to allocate the memory block: on bigger
1421    requests, on error in the code below (as a last chance to serve the request)
1422    or when the max memory limit has been reached. */
1423 static void*
pymalloc_alloc(void * ctx,size_t nbytes)1424 pymalloc_alloc(void *ctx, size_t nbytes)
1425 {
1426     block *bp;
1427     poolp pool;
1428     poolp next;
1429     uint size;
1430 
1431 #ifdef WITH_VALGRIND
1432     if (UNLIKELY(running_on_valgrind == -1)) {
1433         running_on_valgrind = RUNNING_ON_VALGRIND;
1434     }
1435     if (UNLIKELY(running_on_valgrind)) {
1436         return NULL;
1437     }
1438 #endif
1439 
1440     if (nbytes == 0) {
1441         return NULL;
1442     }
1443     if (nbytes > SMALL_REQUEST_THRESHOLD) {
1444         return NULL;
1445     }
1446 
1447     /*
1448      * Most frequent paths first
1449      */
1450     size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1451     pool = usedpools[size + size];
1452     if (pool != pool->nextpool) {
1453         /*
1454          * There is a used pool for this size class.
1455          * Pick up the head block of its free list.
1456          */
1457         ++pool->ref.count;
1458         bp = pool->freeblock;
1459         assert(bp != NULL);
1460         if ((pool->freeblock = *(block **)bp) != NULL) {
1461             goto success;
1462         }
1463 
1464         /*
1465          * Reached the end of the free list, try to extend it.
1466          */
1467         if (pool->nextoffset <= pool->maxnextoffset) {
1468             /* There is room for another block. */
1469             pool->freeblock = (block*)pool +
1470                               pool->nextoffset;
1471             pool->nextoffset += INDEX2SIZE(size);
1472             *(block **)(pool->freeblock) = NULL;
1473             goto success;
1474         }
1475 
1476         /* Pool is full, unlink from used pools. */
1477         next = pool->nextpool;
1478         pool = pool->prevpool;
1479         next->prevpool = pool;
1480         pool->nextpool = next;
1481         goto success;
1482     }
1483 
1484     /* There isn't a pool of the right size class immediately
1485      * available:  use a free pool.
1486      */
1487     if (usable_arenas == NULL) {
1488         /* No arena has a free pool:  allocate a new arena. */
1489 #ifdef WITH_MEMORY_LIMITS
1490         if (narenas_currently_allocated >= MAX_ARENAS) {
1491             goto failed;
1492         }
1493 #endif
1494         usable_arenas = new_arena();
1495         if (usable_arenas == NULL) {
1496             goto failed;
1497         }
1498         usable_arenas->nextarena =
1499             usable_arenas->prevarena = NULL;
1500         assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1501         nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1502     }
1503     assert(usable_arenas->address != 0);
1504 
1505     /* This arena already had the smallest nfreepools value, so decreasing
1506      * nfreepools doesn't change that, and we don't need to rearrange the
1507      * usable_arenas list.  However, if the arena becomes wholly allocated,
1508      * we need to remove its arena_object from usable_arenas.
1509      */
1510     assert(usable_arenas->nfreepools > 0);
1511     if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1512         /* It's the last of this size, so there won't be any. */
1513         nfp2lasta[usable_arenas->nfreepools] = NULL;
1514     }
1515     /* If any free pools will remain, it will be the new smallest. */
1516     if (usable_arenas->nfreepools > 1) {
1517         assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1518         nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1519     }
1520 
1521     /* Try to get a cached free pool. */
1522     pool = usable_arenas->freepools;
1523     if (pool != NULL) {
1524         /* Unlink from cached pools. */
1525         usable_arenas->freepools = pool->nextpool;
1526         --usable_arenas->nfreepools;
1527         if (usable_arenas->nfreepools == 0) {
1528             /* Wholly allocated:  remove. */
1529             assert(usable_arenas->freepools == NULL);
1530             assert(usable_arenas->nextarena == NULL ||
1531                    usable_arenas->nextarena->prevarena ==
1532                    usable_arenas);
1533             usable_arenas = usable_arenas->nextarena;
1534             if (usable_arenas != NULL) {
1535                 usable_arenas->prevarena = NULL;
1536                 assert(usable_arenas->address != 0);
1537             }
1538         }
1539         else {
1540             /* nfreepools > 0:  it must be that freepools
1541              * isn't NULL, or that we haven't yet carved
1542              * off all the arena's pools for the first
1543              * time.
1544              */
1545             assert(usable_arenas->freepools != NULL ||
1546                    usable_arenas->pool_address <=
1547                    (block*)usable_arenas->address +
1548                        ARENA_SIZE - POOL_SIZE);
1549         }
1550 
1551     init_pool:
1552         /* Frontlink to used pools. */
1553         next = usedpools[size + size]; /* == prev */
1554         pool->nextpool = next;
1555         pool->prevpool = next;
1556         next->nextpool = pool;
1557         next->prevpool = pool;
1558         pool->ref.count = 1;
1559         if (pool->szidx == size) {
1560             /* Luckily, this pool last contained blocks
1561              * of the same size class, so its header
1562              * and free list are already initialized.
1563              */
1564             bp = pool->freeblock;
1565             assert(bp != NULL);
1566             pool->freeblock = *(block **)bp;
1567             goto success;
1568         }
1569         /*
1570          * Initialize the pool header, set up the free list to
1571          * contain just the second block, and return the first
1572          * block.
1573          */
1574         pool->szidx = size;
1575         size = INDEX2SIZE(size);
1576         bp = (block *)pool + POOL_OVERHEAD;
1577         pool->nextoffset = POOL_OVERHEAD + (size << 1);
1578         pool->maxnextoffset = POOL_SIZE - size;
1579         pool->freeblock = bp + size;
1580         *(block **)(pool->freeblock) = NULL;
1581         goto success;
1582     }
1583 
1584     /* Carve off a new pool. */
1585     assert(usable_arenas->nfreepools > 0);
1586     assert(usable_arenas->freepools == NULL);
1587     pool = (poolp)usable_arenas->pool_address;
1588     assert((block*)pool <= (block*)usable_arenas->address +
1589                              ARENA_SIZE - POOL_SIZE);
1590     pool->arenaindex = (uint)(usable_arenas - arenas);
1591     assert(&arenas[pool->arenaindex] == usable_arenas);
1592     pool->szidx = DUMMY_SIZE_IDX;
1593     usable_arenas->pool_address += POOL_SIZE;
1594     --usable_arenas->nfreepools;
1595 
1596     if (usable_arenas->nfreepools == 0) {
1597         assert(usable_arenas->nextarena == NULL ||
1598                usable_arenas->nextarena->prevarena ==
1599                usable_arenas);
1600         /* Unlink the arena:  it is completely allocated. */
1601         usable_arenas = usable_arenas->nextarena;
1602         if (usable_arenas != NULL) {
1603             usable_arenas->prevarena = NULL;
1604             assert(usable_arenas->address != 0);
1605         }
1606     }
1607 
1608     goto init_pool;
1609 
1610 success:
1611     assert(bp != NULL);
1612     return (void *)bp;
1613 
1614 failed:
1615     return NULL;
1616 }
1617 
1618 
1619 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1620 _PyObject_Malloc(void *ctx, size_t nbytes)
1621 {
1622     void* ptr = pymalloc_alloc(ctx, nbytes);
1623     if (ptr != NULL) {
1624         _Py_AllocatedBlocks++;
1625         return ptr;
1626     }
1627 
1628     ptr = PyMem_RawMalloc(nbytes);
1629     if (ptr != NULL) {
1630         _Py_AllocatedBlocks++;
1631     }
1632     return ptr;
1633 }
1634 
1635 
1636 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1637 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1638 {
1639     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1640     size_t nbytes = nelem * elsize;
1641 
1642     void *ptr = pymalloc_alloc(ctx, nbytes);
1643     if (ptr != NULL) {
1644         memset(ptr, 0, nbytes);
1645         _Py_AllocatedBlocks++;
1646         return ptr;
1647     }
1648 
1649     ptr = PyMem_RawCalloc(nelem, elsize);
1650     if (ptr != NULL) {
1651         _Py_AllocatedBlocks++;
1652     }
1653     return ptr;
1654 }
1655 
1656 
1657 /* Free a memory block allocated by pymalloc_alloc().
1658    Return 1 if it was freed.
1659    Return 0 if the block was not allocated by pymalloc_alloc(). */
1660 static int
pymalloc_free(void * ctx,void * p)1661 pymalloc_free(void *ctx, void *p)
1662 {
1663     poolp pool;
1664     block *lastfree;
1665     poolp next, prev;
1666     uint size;
1667 
1668     assert(p != NULL);
1669 
1670 #ifdef WITH_VALGRIND
1671     if (UNLIKELY(running_on_valgrind > 0)) {
1672         return 0;
1673     }
1674 #endif
1675 
1676     pool = POOL_ADDR(p);
1677     if (!address_in_range(p, pool)) {
1678         return 0;
1679     }
1680     /* We allocated this address. */
1681 
1682     /* Link p to the start of the pool's freeblock list.  Since
1683      * the pool had at least the p block outstanding, the pool
1684      * wasn't empty (so it's already in a usedpools[] list, or
1685      * was full and is in no list -- it's not in the freeblocks
1686      * list in any case).
1687      */
1688     assert(pool->ref.count > 0);            /* else it was empty */
1689     *(block **)p = lastfree = pool->freeblock;
1690     pool->freeblock = (block *)p;
1691     if (!lastfree) {
1692         /* Pool was full, so doesn't currently live in any list:
1693          * link it to the front of the appropriate usedpools[] list.
1694          * This mimics LRU pool usage for new allocations and
1695          * targets optimal filling when several pools contain
1696          * blocks of the same size class.
1697          */
1698         --pool->ref.count;
1699         assert(pool->ref.count > 0);            /* else the pool is empty */
1700         size = pool->szidx;
1701         next = usedpools[size + size];
1702         prev = next->prevpool;
1703 
1704         /* insert pool before next:   prev <-> pool <-> next */
1705         pool->nextpool = next;
1706         pool->prevpool = prev;
1707         next->prevpool = pool;
1708         prev->nextpool = pool;
1709         goto success;
1710     }
1711 
1712     struct arena_object* ao;
1713     uint nf;  /* ao->nfreepools */
1714 
1715     /* freeblock wasn't NULL, so the pool wasn't full,
1716      * and the pool is in a usedpools[] list.
1717      */
1718     if (--pool->ref.count != 0) {
1719         /* pool isn't empty:  leave it in usedpools */
1720         goto success;
1721     }
1722     /* Pool is now empty:  unlink from usedpools, and
1723      * link to the front of freepools.  This ensures that
1724      * previously freed pools will be allocated later
1725      * (being not referenced, they are perhaps paged out).
1726      */
1727     next = pool->nextpool;
1728     prev = pool->prevpool;
1729     next->prevpool = prev;
1730     prev->nextpool = next;
1731 
1732     /* Link the pool to freepools.  This is a singly-linked
1733      * list, and pool->prevpool isn't used there.
1734      */
1735     ao = &arenas[pool->arenaindex];
1736     pool->nextpool = ao->freepools;
1737     ao->freepools = pool;
1738     nf = ao->nfreepools;
1739     /* If this is the rightmost arena with this number of free pools,
1740      * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
1741      * are no arenas in usable_arenas with that value.
1742      */
1743     struct arena_object* lastnf = nfp2lasta[nf];
1744     assert((nf == 0 && lastnf == NULL) ||
1745            (nf > 0 &&
1746             lastnf != NULL &&
1747             lastnf->nfreepools == nf &&
1748             (lastnf->nextarena == NULL ||
1749              nf < lastnf->nextarena->nfreepools)));
1750     if (lastnf == ao) {  /* it is the rightmost */
1751         struct arena_object* p = ao->prevarena;
1752         nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1753     }
1754     ao->nfreepools = ++nf;
1755 
1756     /* All the rest is arena management.  We just freed
1757      * a pool, and there are 4 cases for arena mgmt:
1758      * 1. If all the pools are free, return the arena to
1759      *    the system free().
1760      * 2. If this is the only free pool in the arena,
1761      *    add the arena back to the `usable_arenas` list.
1762      * 3. If the "next" arena has a smaller count of free
1763      *    pools, we have to "slide this arena right" to
1764      *    restore that usable_arenas is sorted in order of
1765      *    nfreepools.
1766      * 4. Else there's nothing more to do.
1767      */
1768     if (nf == ao->ntotalpools) {
1769         /* Case 1.  First unlink ao from usable_arenas.
1770          */
1771         assert(ao->prevarena == NULL ||
1772                ao->prevarena->address != 0);
1773         assert(ao ->nextarena == NULL ||
1774                ao->nextarena->address != 0);
1775 
1776         /* Fix the pointer in the prevarena, or the
1777          * usable_arenas pointer.
1778          */
1779         if (ao->prevarena == NULL) {
1780             usable_arenas = ao->nextarena;
1781             assert(usable_arenas == NULL ||
1782                    usable_arenas->address != 0);
1783         }
1784         else {
1785             assert(ao->prevarena->nextarena == ao);
1786             ao->prevarena->nextarena =
1787                 ao->nextarena;
1788         }
1789         /* Fix the pointer in the nextarena. */
1790         if (ao->nextarena != NULL) {
1791             assert(ao->nextarena->prevarena == ao);
1792             ao->nextarena->prevarena =
1793                 ao->prevarena;
1794         }
1795         /* Record that this arena_object slot is
1796          * available to be reused.
1797          */
1798         ao->nextarena = unused_arena_objects;
1799         unused_arena_objects = ao;
1800 
1801         /* Free the entire arena. */
1802         _PyObject_Arena.free(_PyObject_Arena.ctx,
1803                              (void *)ao->address, ARENA_SIZE);
1804         ao->address = 0;                        /* mark unassociated */
1805         --narenas_currently_allocated;
1806 
1807         goto success;
1808     }
1809 
1810     if (nf == 1) {
1811         /* Case 2.  Put ao at the head of
1812          * usable_arenas.  Note that because
1813          * ao->nfreepools was 0 before, ao isn't
1814          * currently on the usable_arenas list.
1815          */
1816         ao->nextarena = usable_arenas;
1817         ao->prevarena = NULL;
1818         if (usable_arenas)
1819             usable_arenas->prevarena = ao;
1820         usable_arenas = ao;
1821         assert(usable_arenas->address != 0);
1822         if (nfp2lasta[1] == NULL) {
1823             nfp2lasta[1] = ao;
1824         }
1825 
1826         goto success;
1827     }
1828 
1829     /* If this arena is now out of order, we need to keep
1830      * the list sorted.  The list is kept sorted so that
1831      * the "most full" arenas are used first, which allows
1832      * the nearly empty arenas to be completely freed.  In
1833      * a few un-scientific tests, it seems like this
1834      * approach allowed a lot more memory to be freed.
1835      */
1836     /* If this is the only arena with nf, record that. */
1837     if (nfp2lasta[nf] == NULL) {
1838         nfp2lasta[nf] = ao;
1839     } /* else the rightmost with nf doesn't change */
1840     /* If this was the rightmost of the old size, it remains in place. */
1841     if (ao == lastnf) {
1842         /* Case 4.  Nothing to do. */
1843         goto success;
1844     }
1845     /* If ao were the only arena in the list, the last block would have
1846      * gotten us out.
1847      */
1848     assert(ao->nextarena != NULL);
1849 
1850     /* Case 3:  We have to move the arena towards the end of the list,
1851      * because it has more free pools than the arena to its right.  It needs
1852      * to move to follow lastnf.
1853      * First unlink ao from usable_arenas.
1854      */
1855     if (ao->prevarena != NULL) {
1856         /* ao isn't at the head of the list */
1857         assert(ao->prevarena->nextarena == ao);
1858         ao->prevarena->nextarena = ao->nextarena;
1859     }
1860     else {
1861         /* ao is at the head of the list */
1862         assert(usable_arenas == ao);
1863         usable_arenas = ao->nextarena;
1864     }
1865     ao->nextarena->prevarena = ao->prevarena;
1866     /* And insert after lastnf. */
1867     ao->prevarena = lastnf;
1868     ao->nextarena = lastnf->nextarena;
1869     if (ao->nextarena != NULL) {
1870         ao->nextarena->prevarena = ao;
1871     }
1872     lastnf->nextarena = ao;
1873     /* Verify that the swaps worked. */
1874     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1875     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1876     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1877     assert((usable_arenas == ao && ao->prevarena == NULL)
1878            || ao->prevarena->nextarena == ao);
1879 
1880     goto success;
1881 
1882 success:
1883     return 1;
1884 }
1885 
1886 
1887 static void
_PyObject_Free(void * ctx,void * p)1888 _PyObject_Free(void *ctx, void *p)
1889 {
1890     /* PyObject_Free(NULL) has no effect */
1891     if (p == NULL) {
1892         return;
1893     }
1894 
1895     _Py_AllocatedBlocks--;
1896     if (!pymalloc_free(ctx, p)) {
1897         /* pymalloc didn't allocate this address */
1898         PyMem_RawFree(p);
1899     }
1900 }
1901 
1902 
1903 /* pymalloc realloc.
1904 
1905    If nbytes==0, then as the Python docs promise, we do not treat this like
1906    free(p), and return a non-NULL result.
1907 
1908    Return 1 if pymalloc reallocated memory and wrote the new pointer into
1909    newptr_p.
1910 
1911    Return 0 if pymalloc didn't allocated p. */
1912 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1913 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1914 {
1915     void *bp;
1916     poolp pool;
1917     size_t size;
1918 
1919     assert(p != NULL);
1920 
1921 #ifdef WITH_VALGRIND
1922     /* Treat running_on_valgrind == -1 the same as 0 */
1923     if (UNLIKELY(running_on_valgrind > 0)) {
1924         return 0;
1925     }
1926 #endif
1927 
1928     pool = POOL_ADDR(p);
1929     if (!address_in_range(p, pool)) {
1930         /* pymalloc is not managing this block.
1931 
1932            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1933            over this block.  However, if we do, we need to copy the valid data
1934            from the C-managed block to one of our blocks, and there's no
1935            portable way to know how much of the memory space starting at p is
1936            valid.
1937 
1938            As bug 1185883 pointed out the hard way, it's possible that the
1939            C-managed block is "at the end" of allocated VM space, so that a
1940            memory fault can occur if we try to copy nbytes bytes starting at p.
1941            Instead we punt: let C continue to manage this block. */
1942         return 0;
1943     }
1944 
1945     /* pymalloc is in charge of this block */
1946     size = INDEX2SIZE(pool->szidx);
1947     if (nbytes <= size) {
1948         /* The block is staying the same or shrinking.
1949 
1950            If it's shrinking, there's a tradeoff: it costs cycles to copy the
1951            block to a smaller size class, but it wastes memory not to copy it.
1952 
1953            The compromise here is to copy on shrink only if at least 25% of
1954            size can be shaved off. */
1955         if (4 * nbytes > 3 * size) {
1956             /* It's the same, or shrinking and new/old > 3/4. */
1957             *newptr_p = p;
1958             return 1;
1959         }
1960         size = nbytes;
1961     }
1962 
1963     bp = _PyObject_Malloc(ctx, nbytes);
1964     if (bp != NULL) {
1965         memcpy(bp, p, size);
1966         _PyObject_Free(ctx, p);
1967     }
1968     *newptr_p = bp;
1969     return 1;
1970 }
1971 
1972 
1973 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1974 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
1975 {
1976     void *ptr2;
1977 
1978     if (ptr == NULL) {
1979         return _PyObject_Malloc(ctx, nbytes);
1980     }
1981 
1982     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
1983         return ptr2;
1984     }
1985 
1986     return PyMem_RawRealloc(ptr, nbytes);
1987 }
1988 
1989 #else   /* ! WITH_PYMALLOC */
1990 
1991 /*==========================================================================*/
1992 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
1993  * only be used by extensions that are compiled with pymalloc enabled. */
1994 
1995 Py_ssize_t
_Py_GetAllocatedBlocks(void)1996 _Py_GetAllocatedBlocks(void)
1997 {
1998     return 0;
1999 }
2000 
2001 #endif /* WITH_PYMALLOC */
2002 
2003 
2004 /*==========================================================================*/
2005 /* A x-platform debugging allocator.  This doesn't manage memory directly,
2006  * it wraps a real allocator, adding extra debugging info to the memory blocks.
2007  */
2008 
2009 /* Uncomment this define to add the "serialno" field */
2010 /* #define PYMEM_DEBUG_SERIALNO */
2011 
2012 #ifdef PYMEM_DEBUG_SERIALNO
2013 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2014 
2015 /* serialno is always incremented via calling this routine.  The point is
2016  * to supply a single place to set a breakpoint.
2017  */
2018 static void
bumpserialno(void)2019 bumpserialno(void)
2020 {
2021     ++serialno;
2022 }
2023 #endif
2024 
2025 #define SST SIZEOF_SIZE_T
2026 
2027 #ifdef PYMEM_DEBUG_SERIALNO
2028 #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2029 #else
2030 #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2031 #endif
2032 
2033 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2034 static size_t
read_size_t(const void * p)2035 read_size_t(const void *p)
2036 {
2037     const uint8_t *q = (const uint8_t *)p;
2038     size_t result = *q++;
2039     int i;
2040 
2041     for (i = SST; --i > 0; ++q)
2042         result = (result << 8) | *q;
2043     return result;
2044 }
2045 
2046 /* Write n as a big-endian size_t, MSB at address p, LSB at
2047  * p + sizeof(size_t) - 1.
2048  */
2049 static void
write_size_t(void * p,size_t n)2050 write_size_t(void *p, size_t n)
2051 {
2052     uint8_t *q = (uint8_t *)p + SST - 1;
2053     int i;
2054 
2055     for (i = SST; --i >= 0; --q) {
2056         *q = (uint8_t)(n & 0xff);
2057         n >>= 8;
2058     }
2059 }
2060 
2061 /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2062    fills them with useful stuff, here calling the underlying malloc's result p:
2063 
2064 p[0: S]
2065     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2066     to read in a memory dump).
2067 p[S]
2068     API ID.  See PEP 445.  This is a character, but seems undocumented.
2069 p[S+1: 2*S]
2070     Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2071 p[2*S: 2*S+n]
2072     The requested memory, filled with copies of PYMEM_CLEANBYTE.
2073     Used to catch reference to uninitialized memory.
2074     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2075     handled the request itself.
2076 p[2*S+n: 2*S+n+S]
2077     Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2078 p[2*S+n+S: 2*S+n+2*S]
2079     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2080     and _PyMem_DebugRealloc.
2081     This is a big-endian size_t.
2082     If "bad memory" is detected later, the serial number gives an
2083     excellent way to set a breakpoint on the next run, to capture the
2084     instant at which this block was passed out.
2085 
2086 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2087 for 3 * S extra bytes, and omits the last serialno field.
2088 */
2089 
2090 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2091 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2092 {
2093     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2094     uint8_t *p;           /* base address of malloc'ed pad block */
2095     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2096     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2097     size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2098 
2099     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2100         /* integer overflow: can't represent total as a Py_ssize_t */
2101         return NULL;
2102     }
2103     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2104 
2105     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2106                 ^--- p    ^--- data   ^--- tail
2107        S: nbytes stored as size_t
2108        I: API identifier (1 byte)
2109        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2110        C: Clean bytes used later to store actual data
2111        N: Serial number stored as size_t
2112 
2113        If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2114        is omitted. */
2115 
2116     if (use_calloc) {
2117         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2118     }
2119     else {
2120         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2121     }
2122     if (p == NULL) {
2123         return NULL;
2124     }
2125     data = p + 2*SST;
2126 
2127 #ifdef PYMEM_DEBUG_SERIALNO
2128     bumpserialno();
2129 #endif
2130 
2131     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2132     write_size_t(p, nbytes);
2133     p[SST] = (uint8_t)api->api_id;
2134     memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2135 
2136     if (nbytes > 0 && !use_calloc) {
2137         memset(data, PYMEM_CLEANBYTE, nbytes);
2138     }
2139 
2140     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2141     tail = data + nbytes;
2142     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2143 #ifdef PYMEM_DEBUG_SERIALNO
2144     write_size_t(tail + SST, serialno);
2145 #endif
2146 
2147     return data;
2148 }
2149 
2150 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2151 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2152 {
2153     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2154 }
2155 
2156 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2157 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2158 {
2159     size_t nbytes;
2160     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2161     nbytes = nelem * elsize;
2162     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2163 }
2164 
2165 
2166 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2167    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2168    Then fills the original bytes with PYMEM_DEADBYTE.
2169    Then calls the underlying free.
2170 */
2171 static void
_PyMem_DebugRawFree(void * ctx,void * p)2172 _PyMem_DebugRawFree(void *ctx, void *p)
2173 {
2174     /* PyMem_Free(NULL) has no effect */
2175     if (p == NULL) {
2176         return;
2177     }
2178 
2179     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2180     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2181     size_t nbytes;
2182 
2183     _PyMem_DebugCheckAddress(api->api_id, p);
2184     nbytes = read_size_t(q);
2185     nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2186     memset(q, PYMEM_DEADBYTE, nbytes);
2187     api->alloc.free(api->alloc.ctx, q);
2188 }
2189 
2190 
2191 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2192 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2193 {
2194     if (p == NULL) {
2195         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2196     }
2197 
2198     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2199     uint8_t *head;        /* base address of malloc'ed pad block */
2200     uint8_t *data;        /* pointer to data bytes */
2201     uint8_t *r;
2202     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2203     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2204     size_t original_nbytes;
2205 #define ERASED_SIZE 64
2206     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2207 
2208     _PyMem_DebugCheckAddress(api->api_id, p);
2209 
2210     data = (uint8_t *)p;
2211     head = data - 2*SST;
2212     original_nbytes = read_size_t(head);
2213     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2214         /* integer overflow: can't represent total as a Py_ssize_t */
2215         return NULL;
2216     }
2217     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2218 
2219     tail = data + original_nbytes;
2220 #ifdef PYMEM_DEBUG_SERIALNO
2221     size_t block_serialno = read_size_t(tail + SST);
2222 #endif
2223     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2224        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2225      */
2226     if (original_nbytes <= sizeof(save)) {
2227         memcpy(save, data, original_nbytes);
2228         memset(data - 2 * SST, PYMEM_DEADBYTE,
2229                original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2230     }
2231     else {
2232         memcpy(save, data, ERASED_SIZE);
2233         memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2234         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2235         memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2236                ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2237     }
2238 
2239     /* Resize and add decorations. */
2240     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2241     if (r == NULL) {
2242         /* if realloc() failed: rewrite header and footer which have
2243            just been erased */
2244         nbytes = original_nbytes;
2245     }
2246     else {
2247         head = r;
2248 #ifdef PYMEM_DEBUG_SERIALNO
2249         bumpserialno();
2250         block_serialno = serialno;
2251 #endif
2252     }
2253     data = head + 2*SST;
2254 
2255     write_size_t(head, nbytes);
2256     head[SST] = (uint8_t)api->api_id;
2257     memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2258 
2259     tail = data + nbytes;
2260     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2261 #ifdef PYMEM_DEBUG_SERIALNO
2262     write_size_t(tail + SST, block_serialno);
2263 #endif
2264 
2265     /* Restore saved bytes. */
2266     if (original_nbytes <= sizeof(save)) {
2267         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2268     }
2269     else {
2270         size_t i = original_nbytes - ERASED_SIZE;
2271         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2272         if (nbytes > i) {
2273             memcpy(data + i, &save[ERASED_SIZE],
2274                    Py_MIN(nbytes - i, ERASED_SIZE));
2275         }
2276     }
2277 
2278     if (r == NULL) {
2279         return NULL;
2280     }
2281 
2282     if (nbytes > original_nbytes) {
2283         /* growing: mark new extra memory clean */
2284         memset(data + original_nbytes, PYMEM_CLEANBYTE,
2285                nbytes - original_nbytes);
2286     }
2287 
2288     return data;
2289 }
2290 
2291 static void
_PyMem_DebugCheckGIL(void)2292 _PyMem_DebugCheckGIL(void)
2293 {
2294     if (!PyGILState_Check())
2295         Py_FatalError("Python memory allocator called "
2296                       "without holding the GIL");
2297 }
2298 
2299 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2300 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2301 {
2302     _PyMem_DebugCheckGIL();
2303     return _PyMem_DebugRawMalloc(ctx, nbytes);
2304 }
2305 
2306 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2307 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2308 {
2309     _PyMem_DebugCheckGIL();
2310     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2311 }
2312 
2313 
2314 static void
_PyMem_DebugFree(void * ctx,void * ptr)2315 _PyMem_DebugFree(void *ctx, void *ptr)
2316 {
2317     _PyMem_DebugCheckGIL();
2318     _PyMem_DebugRawFree(ctx, ptr);
2319 }
2320 
2321 
2322 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2323 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2324 {
2325     _PyMem_DebugCheckGIL();
2326     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2327 }
2328 
2329 /* Check the forbidden bytes on both ends of the memory allocated for p.
2330  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2331  * and call Py_FatalError to kill the program.
2332  * The API id, is also checked.
2333  */
2334 static void
_PyMem_DebugCheckAddress(char api,const void * p)2335 _PyMem_DebugCheckAddress(char api, const void *p)
2336 {
2337     const uint8_t *q = (const uint8_t *)p;
2338     char msgbuf[64];
2339     const char *msg;
2340     size_t nbytes;
2341     const uint8_t *tail;
2342     int i;
2343     char id;
2344 
2345     if (p == NULL) {
2346         msg = "didn't expect a NULL pointer";
2347         goto error;
2348     }
2349 
2350     /* Check the API id */
2351     id = (char)q[-SST];
2352     if (id != api) {
2353         msg = msgbuf;
2354         snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2355         msgbuf[sizeof(msgbuf)-1] = 0;
2356         goto error;
2357     }
2358 
2359     /* Check the stuff at the start of p first:  if there's underwrite
2360      * corruption, the number-of-bytes field may be nuts, and checking
2361      * the tail could lead to a segfault then.
2362      */
2363     for (i = SST-1; i >= 1; --i) {
2364         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2365             msg = "bad leading pad byte";
2366             goto error;
2367         }
2368     }
2369 
2370     nbytes = read_size_t(q - 2*SST);
2371     tail = q + nbytes;
2372     for (i = 0; i < SST; ++i) {
2373         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2374             msg = "bad trailing pad byte";
2375             goto error;
2376         }
2377     }
2378 
2379     return;
2380 
2381 error:
2382     _PyObject_DebugDumpAddress(p);
2383     Py_FatalError(msg);
2384 }
2385 
2386 /* Display info to stderr about the memory block at p. */
2387 static void
_PyObject_DebugDumpAddress(const void * p)2388 _PyObject_DebugDumpAddress(const void *p)
2389 {
2390     const uint8_t *q = (const uint8_t *)p;
2391     const uint8_t *tail;
2392     size_t nbytes;
2393     int i;
2394     int ok;
2395     char id;
2396 
2397     fprintf(stderr, "Debug memory block at address p=%p:", p);
2398     if (p == NULL) {
2399         fprintf(stderr, "\n");
2400         return;
2401     }
2402     id = (char)q[-SST];
2403     fprintf(stderr, " API '%c'\n", id);
2404 
2405     nbytes = read_size_t(q - 2*SST);
2406     fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
2407                     "requested\n", nbytes);
2408 
2409     /* In case this is nuts, check the leading pad bytes first. */
2410     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2411     ok = 1;
2412     for (i = 1; i <= SST-1; ++i) {
2413         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2414             ok = 0;
2415             break;
2416         }
2417     }
2418     if (ok)
2419         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2420     else {
2421         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2422             PYMEM_FORBIDDENBYTE);
2423         for (i = SST-1; i >= 1; --i) {
2424             const uint8_t byte = *(q-i);
2425             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2426             if (byte != PYMEM_FORBIDDENBYTE)
2427                 fputs(" *** OUCH", stderr);
2428             fputc('\n', stderr);
2429         }
2430 
2431         fputs("    Because memory is corrupted at the start, the "
2432               "count of bytes requested\n"
2433               "       may be bogus, and checking the trailing pad "
2434               "bytes may segfault.\n", stderr);
2435     }
2436 
2437     tail = q + nbytes;
2438     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2439     ok = 1;
2440     for (i = 0; i < SST; ++i) {
2441         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2442             ok = 0;
2443             break;
2444         }
2445     }
2446     if (ok)
2447         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2448     else {
2449         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2450                 PYMEM_FORBIDDENBYTE);
2451         for (i = 0; i < SST; ++i) {
2452             const uint8_t byte = tail[i];
2453             fprintf(stderr, "        at tail+%d: 0x%02x",
2454                     i, byte);
2455             if (byte != PYMEM_FORBIDDENBYTE)
2456                 fputs(" *** OUCH", stderr);
2457             fputc('\n', stderr);
2458         }
2459     }
2460 
2461 #ifdef PYMEM_DEBUG_SERIALNO
2462     size_t serial = read_size_t(tail + SST);
2463     fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
2464                     "u to debug malloc/realloc.\n", serial);
2465 #endif
2466 
2467     if (nbytes > 0) {
2468         i = 0;
2469         fputs("    Data at p:", stderr);
2470         /* print up to 8 bytes at the start */
2471         while (q < tail && i < 8) {
2472             fprintf(stderr, " %02x", *q);
2473             ++i;
2474             ++q;
2475         }
2476         /* and up to 8 at the end */
2477         if (q < tail) {
2478             if (tail - q > 8) {
2479                 fputs(" ...", stderr);
2480                 q = tail - 8;
2481             }
2482             while (q < tail) {
2483                 fprintf(stderr, " %02x", *q);
2484                 ++q;
2485             }
2486         }
2487         fputc('\n', stderr);
2488     }
2489     fputc('\n', stderr);
2490 
2491     fflush(stderr);
2492     _PyMem_DumpTraceback(fileno(stderr), p);
2493 }
2494 
2495 
2496 static size_t
printone(FILE * out,const char * msg,size_t value)2497 printone(FILE *out, const char* msg, size_t value)
2498 {
2499     int i, k;
2500     char buf[100];
2501     size_t origvalue = value;
2502 
2503     fputs(msg, out);
2504     for (i = (int)strlen(msg); i < 35; ++i)
2505         fputc(' ', out);
2506     fputc('=', out);
2507 
2508     /* Write the value with commas. */
2509     i = 22;
2510     buf[i--] = '\0';
2511     buf[i--] = '\n';
2512     k = 3;
2513     do {
2514         size_t nextvalue = value / 10;
2515         unsigned int digit = (unsigned int)(value - nextvalue * 10);
2516         value = nextvalue;
2517         buf[i--] = (char)(digit + '0');
2518         --k;
2519         if (k == 0 && value && i >= 0) {
2520             k = 3;
2521             buf[i--] = ',';
2522         }
2523     } while (value && i >= 0);
2524 
2525     while (i >= 0)
2526         buf[i--] = ' ';
2527     fputs(buf, out);
2528 
2529     return origvalue;
2530 }
2531 
2532 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2533 _PyDebugAllocatorStats(FILE *out,
2534                        const char *block_name, int num_blocks, size_t sizeof_block)
2535 {
2536     char buf1[128];
2537     char buf2[128];
2538     PyOS_snprintf(buf1, sizeof(buf1),
2539                   "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2540                   num_blocks, block_name, sizeof_block);
2541     PyOS_snprintf(buf2, sizeof(buf2),
2542                   "%48s ", buf1);
2543     (void)printone(out, buf2, num_blocks * sizeof_block);
2544 }
2545 
2546 
2547 #ifdef WITH_PYMALLOC
2548 
2549 #ifdef Py_DEBUG
2550 /* Is target in the list?  The list is traversed via the nextpool pointers.
2551  * The list may be NULL-terminated, or circular.  Return 1 if target is in
2552  * list, else 0.
2553  */
2554 static int
pool_is_in_list(const poolp target,poolp list)2555 pool_is_in_list(const poolp target, poolp list)
2556 {
2557     poolp origlist = list;
2558     assert(target != NULL);
2559     if (list == NULL)
2560         return 0;
2561     do {
2562         if (target == list)
2563             return 1;
2564         list = list->nextpool;
2565     } while (list != NULL && list != origlist);
2566     return 0;
2567 }
2568 #endif
2569 
2570 /* Print summary info to "out" about the state of pymalloc's structures.
2571  * In Py_DEBUG mode, also perform some expensive internal consistency
2572  * checks.
2573  *
2574  * Return 0 if the memory debug hooks are not installed or no statistics was
2575  * written into out, return 1 otherwise.
2576  */
2577 int
_PyObject_DebugMallocStats(FILE * out)2578 _PyObject_DebugMallocStats(FILE *out)
2579 {
2580     if (!_PyMem_PymallocEnabled()) {
2581         return 0;
2582     }
2583 
2584     uint i;
2585     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2586     /* # of pools, allocated blocks, and free blocks per class index */
2587     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2588     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2589     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2590     /* total # of allocated bytes in used and full pools */
2591     size_t allocated_bytes = 0;
2592     /* total # of available bytes in used pools */
2593     size_t available_bytes = 0;
2594     /* # of free pools + pools not yet carved out of current arena */
2595     uint numfreepools = 0;
2596     /* # of bytes for arena alignment padding */
2597     size_t arena_alignment = 0;
2598     /* # of bytes in used and full pools used for pool_headers */
2599     size_t pool_header_bytes = 0;
2600     /* # of bytes in used and full pools wasted due to quantization,
2601      * i.e. the necessarily leftover space at the ends of used and
2602      * full pools.
2603      */
2604     size_t quantization = 0;
2605     /* # of arenas actually allocated. */
2606     size_t narenas = 0;
2607     /* running total -- should equal narenas * ARENA_SIZE */
2608     size_t total;
2609     char buf[128];
2610 
2611     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2612             SMALL_REQUEST_THRESHOLD, numclasses);
2613 
2614     for (i = 0; i < numclasses; ++i)
2615         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2616 
2617     /* Because full pools aren't linked to from anything, it's easiest
2618      * to march over all the arenas.  If we're lucky, most of the memory
2619      * will be living in full pools -- would be a shame to miss them.
2620      */
2621     for (i = 0; i < maxarenas; ++i) {
2622         uint j;
2623         uintptr_t base = arenas[i].address;
2624 
2625         /* Skip arenas which are not allocated. */
2626         if (arenas[i].address == (uintptr_t)NULL)
2627             continue;
2628         narenas += 1;
2629 
2630         numfreepools += arenas[i].nfreepools;
2631 
2632         /* round up to pool alignment */
2633         if (base & (uintptr_t)POOL_SIZE_MASK) {
2634             arena_alignment += POOL_SIZE;
2635             base &= ~(uintptr_t)POOL_SIZE_MASK;
2636             base += POOL_SIZE;
2637         }
2638 
2639         /* visit every pool in the arena */
2640         assert(base <= (uintptr_t) arenas[i].pool_address);
2641         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2642              ++j, base += POOL_SIZE) {
2643             poolp p = (poolp)base;
2644             const uint sz = p->szidx;
2645             uint freeblocks;
2646 
2647             if (p->ref.count == 0) {
2648                 /* currently unused */
2649 #ifdef Py_DEBUG
2650                 assert(pool_is_in_list(p, arenas[i].freepools));
2651 #endif
2652                 continue;
2653             }
2654             ++numpools[sz];
2655             numblocks[sz] += p->ref.count;
2656             freeblocks = NUMBLOCKS(sz) - p->ref.count;
2657             numfreeblocks[sz] += freeblocks;
2658 #ifdef Py_DEBUG
2659             if (freeblocks > 0)
2660                 assert(pool_is_in_list(p, usedpools[sz + sz]));
2661 #endif
2662         }
2663     }
2664     assert(narenas == narenas_currently_allocated);
2665 
2666     fputc('\n', out);
2667     fputs("class   size   num pools   blocks in use  avail blocks\n"
2668           "-----   ----   ---------   -------------  ------------\n",
2669           out);
2670 
2671     for (i = 0; i < numclasses; ++i) {
2672         size_t p = numpools[i];
2673         size_t b = numblocks[i];
2674         size_t f = numfreeblocks[i];
2675         uint size = INDEX2SIZE(i);
2676         if (p == 0) {
2677             assert(b == 0 && f == 0);
2678             continue;
2679         }
2680         fprintf(out, "%5u %6u "
2681                         "%11" PY_FORMAT_SIZE_T "u "
2682                         "%15" PY_FORMAT_SIZE_T "u "
2683                         "%13" PY_FORMAT_SIZE_T "u\n",
2684                 i, size, p, b, f);
2685         allocated_bytes += b * size;
2686         available_bytes += f * size;
2687         pool_header_bytes += p * POOL_OVERHEAD;
2688         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2689     }
2690     fputc('\n', out);
2691 #ifdef PYMEM_DEBUG_SERIALNO
2692     if (_PyMem_DebugEnabled()) {
2693         (void)printone(out, "# times object malloc called", serialno);
2694     }
2695 #endif
2696     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2697     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2698     (void)printone(out, "# arenas highwater mark", narenas_highwater);
2699     (void)printone(out, "# arenas allocated current", narenas);
2700 
2701     PyOS_snprintf(buf, sizeof(buf),
2702         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2703         narenas, ARENA_SIZE);
2704     (void)printone(out, buf, narenas * ARENA_SIZE);
2705 
2706     fputc('\n', out);
2707 
2708     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2709     total += printone(out, "# bytes in available blocks", available_bytes);
2710 
2711     PyOS_snprintf(buf, sizeof(buf),
2712         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2713     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2714 
2715     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2716     total += printone(out, "# bytes lost to quantization", quantization);
2717     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2718     (void)printone(out, "Total", total);
2719     return 1;
2720 }
2721 
2722 #endif /* #ifdef WITH_PYMALLOC */
2723