• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  #include "Python.h"
2  #include "pycore_pymem.h"         // _PyTraceMalloc_Config
3  
4  #include <stdbool.h>
5  
6  
7  /* Defined in tracemalloc.c */
8  extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9  
10  
11  /* Python's malloc wrappers (see pymem.h) */
12  
13  #undef  uint
14  #define uint    unsigned int    /* assuming >= 16 bits */
15  
16  /* Forward declaration */
17  static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18  static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19  static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20  static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21  
22  static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23  static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24  static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25  static void _PyMem_DebugFree(void *ctx, void *p);
26  
27  static void _PyObject_DebugDumpAddress(const void *p);
28  static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
29  
30  static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31  
32  #if defined(__has_feature)  /* Clang */
33  #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
34  #    define _Py_NO_SANITIZE_ADDRESS \
35          __attribute__((no_sanitize("address")))
36  #  endif
37  #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
38  #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39  #  endif
40  #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
41  #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42  #  endif
43  #elif defined(__GNUC__)
44  #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
45  #    define _Py_NO_SANITIZE_ADDRESS \
46          __attribute__((no_sanitize_address))
47  #  endif
48     // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49     // is provided only since GCC 7.
50  #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51  #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52  #  endif
53  #endif
54  
55  #ifndef _Py_NO_SANITIZE_ADDRESS
56  #  define _Py_NO_SANITIZE_ADDRESS
57  #endif
58  #ifndef _Py_NO_SANITIZE_THREAD
59  #  define _Py_NO_SANITIZE_THREAD
60  #endif
61  #ifndef _Py_NO_SANITIZE_MEMORY
62  #  define _Py_NO_SANITIZE_MEMORY
63  #endif
64  
65  #ifdef WITH_PYMALLOC
66  
67  #ifdef MS_WINDOWS
68  #  include <windows.h>
69  #elif defined(HAVE_MMAP)
70  #  include <sys/mman.h>
71  #  ifdef MAP_ANONYMOUS
72  #    define ARENAS_USE_MMAP
73  #  endif
74  #endif
75  
76  /* Forward declaration */
77  static void* _PyObject_Malloc(void *ctx, size_t size);
78  static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79  static void _PyObject_Free(void *ctx, void *p);
80  static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81  #endif
82  
83  
84  /* bpo-35053: Declare tracemalloc configuration here rather than
85     Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86     library, whereas _Py_NewReference() requires it. */
87  struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88  
89  
90  static void *
_PyMem_RawMalloc(void * ctx,size_t size)91  _PyMem_RawMalloc(void *ctx, size_t size)
92  {
93      /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94         for malloc(0), which would be treated as an error. Some platforms would
95         return a pointer with no memory behind it, which would break pymalloc.
96         To solve these problems, allocate an extra byte. */
97      if (size == 0)
98          size = 1;
99      return malloc(size);
100  }
101  
102  static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103  _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104  {
105      /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106         for calloc(0, 0), which would be treated as an error. Some platforms
107         would return a pointer with no memory behind it, which would break
108         pymalloc.  To solve these problems, allocate an extra byte. */
109      if (nelem == 0 || elsize == 0) {
110          nelem = 1;
111          elsize = 1;
112      }
113      return calloc(nelem, elsize);
114  }
115  
116  static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117  _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118  {
119      if (size == 0)
120          size = 1;
121      return realloc(ptr, size);
122  }
123  
124  static void
_PyMem_RawFree(void * ctx,void * ptr)125  _PyMem_RawFree(void *ctx, void *ptr)
126  {
127      free(ptr);
128  }
129  
130  
131  #ifdef MS_WINDOWS
132  static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133  _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134  {
135      return VirtualAlloc(NULL, size,
136                          MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137  }
138  
139  static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140  _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141  {
142      VirtualFree(ptr, 0, MEM_RELEASE);
143  }
144  
145  #elif defined(ARENAS_USE_MMAP)
146  static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147  _PyObject_ArenaMmap(void *ctx, size_t size)
148  {
149      void *ptr;
150      ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151                 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152      if (ptr == MAP_FAILED)
153          return NULL;
154      assert(ptr != NULL);
155      return ptr;
156  }
157  
158  static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159  _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160  {
161      munmap(ptr, size);
162  }
163  
164  #else
165  static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166  _PyObject_ArenaMalloc(void *ctx, size_t size)
167  {
168      return malloc(size);
169  }
170  
171  static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172  _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173  {
174      free(ptr);
175  }
176  #endif
177  
178  #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179  #ifdef WITH_PYMALLOC
180  #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181  #endif
182  
183  #define PYRAW_ALLOC MALLOC_ALLOC
184  #ifdef WITH_PYMALLOC
185  #  define PYOBJ_ALLOC PYMALLOC_ALLOC
186  #else
187  #  define PYOBJ_ALLOC MALLOC_ALLOC
188  #endif
189  #define PYMEM_ALLOC PYOBJ_ALLOC
190  
191  typedef struct {
192      /* We tag each block with an API ID in order to tag API violations */
193      char api_id;
194      PyMemAllocatorEx alloc;
195  } debug_alloc_api_t;
196  static struct {
197      debug_alloc_api_t raw;
198      debug_alloc_api_t mem;
199      debug_alloc_api_t obj;
200  } _PyMem_Debug = {
201      {'r', PYRAW_ALLOC},
202      {'m', PYMEM_ALLOC},
203      {'o', PYOBJ_ALLOC}
204      };
205  
206  #define PYDBGRAW_ALLOC \
207      {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208  #define PYDBGMEM_ALLOC \
209      {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210  #define PYDBGOBJ_ALLOC \
211      {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212  
213  #ifdef Py_DEBUG
214  static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215  static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216  static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217  #else
218  static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219  static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220  static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221  #endif
222  
223  
224  static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225  pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226                              PyMemAllocatorEx *old_alloc)
227  {
228      if (old_alloc != NULL) {
229          PyMem_GetAllocator(domain, old_alloc);
230      }
231  
232  
233      PyMemAllocatorEx new_alloc;
234      switch(domain)
235      {
236      case PYMEM_DOMAIN_RAW:
237          new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238          break;
239      case PYMEM_DOMAIN_MEM:
240          new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241          break;
242      case PYMEM_DOMAIN_OBJ:
243          new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244          break;
245      default:
246          /* unknown domain */
247          return -1;
248      }
249      PyMem_SetAllocator(domain, &new_alloc);
250      if (debug) {
251          _PyMem_SetupDebugHooksDomain(domain);
252      }
253      return 0;
254  }
255  
256  
257  int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258  _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259                             PyMemAllocatorEx *old_alloc)
260  {
261  #ifdef Py_DEBUG
262      const int debug = 1;
263  #else
264      const int debug = 0;
265  #endif
266      return pymem_set_default_allocator(domain, debug, old_alloc);
267  }
268  
269  
270  int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271  _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272  {
273      if (name == NULL || *name == '\0') {
274          /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275             nameions): use default memory allocators */
276          *allocator = PYMEM_ALLOCATOR_DEFAULT;
277      }
278      else if (strcmp(name, "default") == 0) {
279          *allocator = PYMEM_ALLOCATOR_DEFAULT;
280      }
281      else if (strcmp(name, "debug") == 0) {
282          *allocator = PYMEM_ALLOCATOR_DEBUG;
283      }
284  #ifdef WITH_PYMALLOC
285      else if (strcmp(name, "pymalloc") == 0) {
286          *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287      }
288      else if (strcmp(name, "pymalloc_debug") == 0) {
289          *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290      }
291  #endif
292      else if (strcmp(name, "malloc") == 0) {
293          *allocator = PYMEM_ALLOCATOR_MALLOC;
294      }
295      else if (strcmp(name, "malloc_debug") == 0) {
296          *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297      }
298      else {
299          /* unknown allocator */
300          return -1;
301      }
302      return 0;
303  }
304  
305  
306  int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307  _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308  {
309      switch (allocator) {
310      case PYMEM_ALLOCATOR_NOT_SET:
311          /* do nothing */
312          break;
313  
314      case PYMEM_ALLOCATOR_DEFAULT:
315          (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316          (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317          (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318          break;
319  
320      case PYMEM_ALLOCATOR_DEBUG:
321          (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322          (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323          (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324          break;
325  
326  #ifdef WITH_PYMALLOC
327      case PYMEM_ALLOCATOR_PYMALLOC:
328      case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329      {
330          PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331          PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332  
333          PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334          PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335          PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336  
337          if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338              PyMem_SetupDebugHooks();
339          }
340          break;
341      }
342  #endif
343  
344      case PYMEM_ALLOCATOR_MALLOC:
345      case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346      {
347          PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348          PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349          PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350          PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351  
352          if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353              PyMem_SetupDebugHooks();
354          }
355          break;
356      }
357  
358      default:
359          /* unknown allocator */
360          return -1;
361      }
362      return 0;
363  }
364  
365  
366  static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367  pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368  {
369      return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370  }
371  
372  
373  const char*
_PyMem_GetCurrentAllocatorName(void)374  _PyMem_GetCurrentAllocatorName(void)
375  {
376      PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377  #ifdef WITH_PYMALLOC
378      PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379  #endif
380  
381      if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382          pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383          pymemallocator_eq(&_PyObject, &malloc_alloc))
384      {
385          return "malloc";
386      }
387  #ifdef WITH_PYMALLOC
388      if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389          pymemallocator_eq(&_PyMem, &pymalloc) &&
390          pymemallocator_eq(&_PyObject, &pymalloc))
391      {
392          return "pymalloc";
393      }
394  #endif
395  
396      PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397      PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398      PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399  
400      if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401          pymemallocator_eq(&_PyMem, &dbg_mem) &&
402          pymemallocator_eq(&_PyObject, &dbg_obj))
403      {
404          /* Debug hooks installed */
405          if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406              pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407              pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408          {
409              return "malloc_debug";
410          }
411  #ifdef WITH_PYMALLOC
412          if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413              pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414              pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415          {
416              return "pymalloc_debug";
417          }
418  #endif
419      }
420      return NULL;
421  }
422  
423  
424  #undef MALLOC_ALLOC
425  #undef PYMALLOC_ALLOC
426  #undef PYRAW_ALLOC
427  #undef PYMEM_ALLOC
428  #undef PYOBJ_ALLOC
429  #undef PYDBGRAW_ALLOC
430  #undef PYDBGMEM_ALLOC
431  #undef PYDBGOBJ_ALLOC
432  
433  
434  static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435  #ifdef MS_WINDOWS
436      _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437  #elif defined(ARENAS_USE_MMAP)
438      _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439  #else
440      _PyObject_ArenaMalloc, _PyObject_ArenaFree
441  #endif
442      };
443  
444  #ifdef WITH_PYMALLOC
445  static int
_PyMem_DebugEnabled(void)446  _PyMem_DebugEnabled(void)
447  {
448      return (_PyObject.malloc == _PyMem_DebugMalloc);
449  }
450  
451  static int
_PyMem_PymallocEnabled(void)452  _PyMem_PymallocEnabled(void)
453  {
454      if (_PyMem_DebugEnabled()) {
455          return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456      }
457      else {
458          return (_PyObject.malloc == _PyObject_Malloc);
459      }
460  }
461  #endif
462  
463  
464  static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465  _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466  {
467      PyMemAllocatorEx alloc;
468  
469      if (domain == PYMEM_DOMAIN_RAW) {
470          if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471              return;
472          }
473  
474          PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475          alloc.ctx = &_PyMem_Debug.raw;
476          alloc.malloc = _PyMem_DebugRawMalloc;
477          alloc.calloc = _PyMem_DebugRawCalloc;
478          alloc.realloc = _PyMem_DebugRawRealloc;
479          alloc.free = _PyMem_DebugRawFree;
480          PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481      }
482      else if (domain == PYMEM_DOMAIN_MEM) {
483          if (_PyMem.malloc == _PyMem_DebugMalloc) {
484              return;
485          }
486  
487          PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488          alloc.ctx = &_PyMem_Debug.mem;
489          alloc.malloc = _PyMem_DebugMalloc;
490          alloc.calloc = _PyMem_DebugCalloc;
491          alloc.realloc = _PyMem_DebugRealloc;
492          alloc.free = _PyMem_DebugFree;
493          PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494      }
495      else if (domain == PYMEM_DOMAIN_OBJ)  {
496          if (_PyObject.malloc == _PyMem_DebugMalloc) {
497              return;
498          }
499  
500          PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501          alloc.ctx = &_PyMem_Debug.obj;
502          alloc.malloc = _PyMem_DebugMalloc;
503          alloc.calloc = _PyMem_DebugCalloc;
504          alloc.realloc = _PyMem_DebugRealloc;
505          alloc.free = _PyMem_DebugFree;
506          PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507      }
508  }
509  
510  
511  void
PyMem_SetupDebugHooks(void)512  PyMem_SetupDebugHooks(void)
513  {
514      _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515      _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516      _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517  }
518  
519  void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520  PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521  {
522      switch(domain)
523      {
524      case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525      case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526      case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527      default:
528          /* unknown domain: set all attributes to NULL */
529          allocator->ctx = NULL;
530          allocator->malloc = NULL;
531          allocator->calloc = NULL;
532          allocator->realloc = NULL;
533          allocator->free = NULL;
534      }
535  }
536  
537  void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538  PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539  {
540      switch(domain)
541      {
542      case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543      case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544      case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545      /* ignore unknown domain */
546      }
547  }
548  
549  void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550  PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551  {
552      *allocator = _PyObject_Arena;
553  }
554  
555  void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556  PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557  {
558      _PyObject_Arena = *allocator;
559  }
560  
561  void *
PyMem_RawMalloc(size_t size)562  PyMem_RawMalloc(size_t size)
563  {
564      /*
565       * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566       * Most python internals blindly use a signed Py_ssize_t to track
567       * things without checking for overflows or negatives.
568       * As size_t is unsigned, checking for size < 0 is not required.
569       */
570      if (size > (size_t)PY_SSIZE_T_MAX)
571          return NULL;
572      return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573  }
574  
575  void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576  PyMem_RawCalloc(size_t nelem, size_t elsize)
577  {
578      /* see PyMem_RawMalloc() */
579      if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580          return NULL;
581      return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582  }
583  
584  void*
PyMem_RawRealloc(void * ptr,size_t new_size)585  PyMem_RawRealloc(void *ptr, size_t new_size)
586  {
587      /* see PyMem_RawMalloc() */
588      if (new_size > (size_t)PY_SSIZE_T_MAX)
589          return NULL;
590      return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591  }
592  
PyMem_RawFree(void * ptr)593  void PyMem_RawFree(void *ptr)
594  {
595      _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596  }
597  
598  
599  void *
PyMem_Malloc(size_t size)600  PyMem_Malloc(size_t size)
601  {
602      /* see PyMem_RawMalloc() */
603      if (size > (size_t)PY_SSIZE_T_MAX)
604          return NULL;
605      return _PyMem.malloc(_PyMem.ctx, size);
606  }
607  
608  void *
PyMem_Calloc(size_t nelem,size_t elsize)609  PyMem_Calloc(size_t nelem, size_t elsize)
610  {
611      /* see PyMem_RawMalloc() */
612      if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613          return NULL;
614      return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615  }
616  
617  void *
PyMem_Realloc(void * ptr,size_t new_size)618  PyMem_Realloc(void *ptr, size_t new_size)
619  {
620      /* see PyMem_RawMalloc() */
621      if (new_size > (size_t)PY_SSIZE_T_MAX)
622          return NULL;
623      return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624  }
625  
626  void
PyMem_Free(void * ptr)627  PyMem_Free(void *ptr)
628  {
629      _PyMem.free(_PyMem.ctx, ptr);
630  }
631  
632  
633  wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634  _PyMem_RawWcsdup(const wchar_t *str)
635  {
636      assert(str != NULL);
637  
638      size_t len = wcslen(str);
639      if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640          return NULL;
641      }
642  
643      size_t size = (len + 1) * sizeof(wchar_t);
644      wchar_t *str2 = PyMem_RawMalloc(size);
645      if (str2 == NULL) {
646          return NULL;
647      }
648  
649      memcpy(str2, str, size);
650      return str2;
651  }
652  
653  char *
_PyMem_RawStrdup(const char * str)654  _PyMem_RawStrdup(const char *str)
655  {
656      assert(str != NULL);
657      size_t size = strlen(str) + 1;
658      char *copy = PyMem_RawMalloc(size);
659      if (copy == NULL) {
660          return NULL;
661      }
662      memcpy(copy, str, size);
663      return copy;
664  }
665  
666  char *
_PyMem_Strdup(const char * str)667  _PyMem_Strdup(const char *str)
668  {
669      assert(str != NULL);
670      size_t size = strlen(str) + 1;
671      char *copy = PyMem_Malloc(size);
672      if (copy == NULL) {
673          return NULL;
674      }
675      memcpy(copy, str, size);
676      return copy;
677  }
678  
679  void *
PyObject_Malloc(size_t size)680  PyObject_Malloc(size_t size)
681  {
682      /* see PyMem_RawMalloc() */
683      if (size > (size_t)PY_SSIZE_T_MAX)
684          return NULL;
685      return _PyObject.malloc(_PyObject.ctx, size);
686  }
687  
688  void *
PyObject_Calloc(size_t nelem,size_t elsize)689  PyObject_Calloc(size_t nelem, size_t elsize)
690  {
691      /* see PyMem_RawMalloc() */
692      if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693          return NULL;
694      return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695  }
696  
697  void *
PyObject_Realloc(void * ptr,size_t new_size)698  PyObject_Realloc(void *ptr, size_t new_size)
699  {
700      /* see PyMem_RawMalloc() */
701      if (new_size > (size_t)PY_SSIZE_T_MAX)
702          return NULL;
703      return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704  }
705  
706  void
PyObject_Free(void * ptr)707  PyObject_Free(void *ptr)
708  {
709      _PyObject.free(_PyObject.ctx, ptr);
710  }
711  
712  
713  /* If we're using GCC, use __builtin_expect() to reduce overhead of
714     the valgrind checks */
715  #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
716  #  define UNLIKELY(value) __builtin_expect((value), 0)
717  #  define LIKELY(value) __builtin_expect((value), 1)
718  #else
719  #  define UNLIKELY(value) (value)
720  #  define LIKELY(value) (value)
721  #endif
722  
723  #ifdef WITH_PYMALLOC
724  
725  #ifdef WITH_VALGRIND
726  #include <valgrind/valgrind.h>
727  
728  /* -1 indicates that we haven't checked that we're running on valgrind yet. */
729  static int running_on_valgrind = -1;
730  #endif
731  
732  
733  /* An object allocator for Python.
734  
735     Here is an introduction to the layers of the Python memory architecture,
736     showing where the object allocator is actually used (layer +2), It is
737     called for every object allocation and deallocation (PyObject_New/Del),
738     unless the object-specific allocators implement a proprietary allocation
739     scheme (ex.: ints use a simple free list). This is also the place where
740     the cyclic garbage collector operates selectively on container objects.
741  
742  
743      Object-specific allocators
744      _____   ______   ______       ________
745     [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
746  +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
747      _______________________________       |                           |
748     [   Python's object allocator   ]      |                           |
749  +2 | ####### Object memory ####### | <------ Internal buffers ------> |
750      ______________________________________________________________    |
751     [          Python's raw memory allocator (PyMem_ API)          ]   |
752  +1 | <----- Python memory (under PyMem manager's control) ------> |   |
753      __________________________________________________________________
754     [    Underlying general-purpose allocator (ex: C library malloc)   ]
755   0 | <------ Virtual memory allocated for the python process -------> |
756  
757     =========================================================================
758      _______________________________________________________________________
759     [                OS-specific Virtual Memory Manager (VMM)               ]
760  -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
761      __________________________________   __________________________________
762     [                                  ] [                                  ]
763  -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
764  
765  */
766  /*==========================================================================*/
767  
768  /* A fast, special-purpose memory allocator for small blocks, to be used
769     on top of a general-purpose malloc -- heavily based on previous art. */
770  
771  /* Vladimir Marangozov -- August 2000 */
772  
773  /*
774   * "Memory management is where the rubber meets the road -- if we do the wrong
775   * thing at any level, the results will not be good. And if we don't make the
776   * levels work well together, we are in serious trouble." (1)
777   *
778   * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
779   *    "Dynamic Storage Allocation: A Survey and Critical Review",
780   *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
781   */
782  
783  /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
784  
785  /*==========================================================================*/
786  
787  /*
788   * Allocation strategy abstract:
789   *
790   * For small requests, the allocator sub-allocates <Big> blocks of memory.
791   * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
792   * system's allocator.
793   *
794   * Small requests are grouped in size classes spaced 8 bytes apart, due
795   * to the required valid alignment of the returned address. Requests of
796   * a particular size are serviced from memory pools of 4K (one VMM page).
797   * Pools are fragmented on demand and contain free lists of blocks of one
798   * particular size class. In other words, there is a fixed-size allocator
799   * for each size class. Free pools are shared by the different allocators
800   * thus minimizing the space reserved for a particular size class.
801   *
802   * This allocation strategy is a variant of what is known as "simple
803   * segregated storage based on array of free lists". The main drawback of
804   * simple segregated storage is that we might end up with lot of reserved
805   * memory for the different free lists, which degenerate in time. To avoid
806   * this, we partition each free list in pools and we share dynamically the
807   * reserved space between all free lists. This technique is quite efficient
808   * for memory intensive programs which allocate mainly small-sized blocks.
809   *
810   * For small requests we have the following table:
811   *
812   * Request in bytes     Size of allocated block      Size class idx
813   * ----------------------------------------------------------------
814   *        1-8                     8                       0
815   *        9-16                   16                       1
816   *       17-24                   24                       2
817   *       25-32                   32                       3
818   *       33-40                   40                       4
819   *       41-48                   48                       5
820   *       49-56                   56                       6
821   *       57-64                   64                       7
822   *       65-72                   72                       8
823   *        ...                   ...                     ...
824   *      497-504                 504                      62
825   *      505-512                 512                      63
826   *
827   *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
828   *      allocator.
829   */
830  
831  /*==========================================================================*/
832  
833  /*
834   * -- Main tunable settings section --
835   */
836  
837  /*
838   * Alignment of addresses returned to the user. 8-bytes alignment works
839   * on most current architectures (with 32-bit or 64-bit address buses).
840   * The alignment value is also used for grouping small requests in size
841   * classes spaced ALIGNMENT bytes apart.
842   *
843   * You shouldn't change this unless you know what you are doing.
844   */
845  
846  #if SIZEOF_VOID_P > 4
847  #define ALIGNMENT              16               /* must be 2^N */
848  #define ALIGNMENT_SHIFT         4
849  #else
850  #define ALIGNMENT               8               /* must be 2^N */
851  #define ALIGNMENT_SHIFT         3
852  #endif
853  
854  /* Return the number of bytes in size class I, as a uint. */
855  #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
856  
857  /*
858   * Max size threshold below which malloc requests are considered to be
859   * small enough in order to use preallocated memory pools. You can tune
860   * this value according to your application behaviour and memory needs.
861   *
862   * Note: a size threshold of 512 guarantees that newly created dictionaries
863   * will be allocated from preallocated memory pools on 64-bit.
864   *
865   * The following invariants must hold:
866   *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
867   *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
868   *
869   * Although not required, for better performance and space efficiency,
870   * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
871   */
872  #define SMALL_REQUEST_THRESHOLD 512
873  #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
874  
875  /*
876   * The system's VMM page size can be obtained on most unices with a
877   * getpagesize() call or deduced from various header files. To make
878   * things simpler, we assume that it is 4K, which is OK for most systems.
879   * It is probably better if this is the native page size, but it doesn't
880   * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
881   * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
882   * violation fault.  4K is apparently OK for all the platforms that python
883   * currently targets.
884   */
885  #define SYSTEM_PAGE_SIZE        (4 * 1024)
886  #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
887  
888  /*
889   * Maximum amount of memory managed by the allocator for small requests.
890   */
891  #ifdef WITH_MEMORY_LIMITS
892  #ifndef SMALL_MEMORY_LIMIT
893  #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
894  #endif
895  #endif
896  
897  #if !defined(WITH_PYMALLOC_RADIX_TREE)
898  /* Use radix-tree to track arena memory regions, for address_in_range().
899   * Enable by default since it allows larger pool sizes.  Can be disabled
900   * using -DWITH_PYMALLOC_RADIX_TREE=0 */
901  #define WITH_PYMALLOC_RADIX_TREE 1
902  #endif
903  
904  #if SIZEOF_VOID_P > 4
905  /* on 64-bit platforms use larger pools and arenas if we can */
906  #define USE_LARGE_ARENAS
907  #if WITH_PYMALLOC_RADIX_TREE
908  /* large pools only supported if radix-tree is enabled */
909  #define USE_LARGE_POOLS
910  #endif
911  #endif
912  
913  /*
914   * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
915   * on a page boundary. This is a reserved virtual address space for the
916   * current process (obtained through a malloc()/mmap() call). In no way this
917   * means that the memory arenas will be used entirely. A malloc(<Big>) is
918   * usually an address range reservation for <Big> bytes, unless all pages within
919   * this space are referenced subsequently. So malloc'ing big blocks and not
920   * using them does not mean "wasting memory". It's an addressable range
921   * wastage...
922   *
923   * Arenas are allocated with mmap() on systems supporting anonymous memory
924   * mappings to reduce heap fragmentation.
925   */
926  #ifdef USE_LARGE_ARENAS
927  #define ARENA_BITS              20                    /* 1 MiB */
928  #else
929  #define ARENA_BITS              18                    /* 256 KiB */
930  #endif
931  #define ARENA_SIZE              (1 << ARENA_BITS)
932  #define ARENA_SIZE_MASK         (ARENA_SIZE - 1)
933  
934  #ifdef WITH_MEMORY_LIMITS
935  #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
936  #endif
937  
938  /*
939   * Size of the pools used for small blocks.  Must be a power of 2.
940   */
941  #ifdef USE_LARGE_POOLS
942  #define POOL_BITS               14                  /* 16 KiB */
943  #else
944  #define POOL_BITS               12                  /* 4 KiB */
945  #endif
946  #define POOL_SIZE               (1 << POOL_BITS)
947  #define POOL_SIZE_MASK          (POOL_SIZE - 1)
948  
949  #if !WITH_PYMALLOC_RADIX_TREE
950  #if POOL_SIZE != SYSTEM_PAGE_SIZE
951  #   error "pool size must be equal to system page size"
952  #endif
953  #endif
954  
955  #define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
956  #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
957  #   error "arena size not an exact multiple of pool size"
958  #endif
959  
960  /*
961   * -- End of tunable settings section --
962   */
963  
964  /*==========================================================================*/
965  
966  /* When you say memory, my mind reasons in terms of (pointers to) blocks */
967  typedef uint8_t block;
968  
969  /* Pool for small blocks. */
970  struct pool_header {
971      union { block *_padding;
972              uint count; } ref;          /* number of allocated blocks    */
973      block *freeblock;                   /* pool's free list head         */
974      struct pool_header *nextpool;       /* next pool of this size class  */
975      struct pool_header *prevpool;       /* previous pool       ""        */
976      uint arenaindex;                    /* index into arenas of base adr */
977      uint szidx;                         /* block size class index        */
978      uint nextoffset;                    /* bytes to virgin block         */
979      uint maxnextoffset;                 /* largest valid nextoffset      */
980  };
981  
982  typedef struct pool_header *poolp;
983  
984  /* Record keeping for arenas. */
985  struct arena_object {
986      /* The address of the arena, as returned by malloc.  Note that 0
987       * will never be returned by a successful malloc, and is used
988       * here to mark an arena_object that doesn't correspond to an
989       * allocated arena.
990       */
991      uintptr_t address;
992  
993      /* Pool-aligned pointer to the next pool to be carved off. */
994      block* pool_address;
995  
996      /* The number of available pools in the arena:  free pools + never-
997       * allocated pools.
998       */
999      uint nfreepools;
1000  
1001      /* The total number of pools in the arena, whether or not available. */
1002      uint ntotalpools;
1003  
1004      /* Singly-linked list of available pools. */
1005      struct pool_header* freepools;
1006  
1007      /* Whenever this arena_object is not associated with an allocated
1008       * arena, the nextarena member is used to link all unassociated
1009       * arena_objects in the singly-linked `unused_arena_objects` list.
1010       * The prevarena member is unused in this case.
1011       *
1012       * When this arena_object is associated with an allocated arena
1013       * with at least one available pool, both members are used in the
1014       * doubly-linked `usable_arenas` list, which is maintained in
1015       * increasing order of `nfreepools` values.
1016       *
1017       * Else this arena_object is associated with an allocated arena
1018       * all of whose pools are in use.  `nextarena` and `prevarena`
1019       * are both meaningless in this case.
1020       */
1021      struct arena_object* nextarena;
1022      struct arena_object* prevarena;
1023  };
1024  
1025  #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1026  
1027  #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
1028  
1029  /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1030  #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1031  
1032  /* Return total number of blocks in pool of size index I, as a uint. */
1033  #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1034  
1035  /*==========================================================================*/
1036  
1037  /*
1038   * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1039  
1040  This is involved.  For an index i, usedpools[i+i] is the header for a list of
1041  all partially used pools holding small blocks with "size class idx" i. So
1042  usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1043  16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1044  
1045  Pools are carved off an arena's highwater mark (an arena_object's pool_address
1046  member) as needed.  Once carved off, a pool is in one of three states forever
1047  after:
1048  
1049  used == partially used, neither empty nor full
1050      At least one block in the pool is currently allocated, and at least one
1051      block in the pool is not currently allocated (note this implies a pool
1052      has room for at least two blocks).
1053      This is a pool's initial state, as a pool is created only when malloc
1054      needs space.
1055      The pool holds blocks of a fixed size, and is in the circular list headed
1056      at usedpools[i] (see above).  It's linked to the other used pools of the
1057      same size class via the pool_header's nextpool and prevpool members.
1058      If all but one block is currently allocated, a malloc can cause a
1059      transition to the full state.  If all but one block is not currently
1060      allocated, a free can cause a transition to the empty state.
1061  
1062  full == all the pool's blocks are currently allocated
1063      On transition to full, a pool is unlinked from its usedpools[] list.
1064      It's not linked to from anything then anymore, and its nextpool and
1065      prevpool members are meaningless until it transitions back to used.
1066      A free of a block in a full pool puts the pool back in the used state.
1067      Then it's linked in at the front of the appropriate usedpools[] list, so
1068      that the next allocation for its size class will reuse the freed block.
1069  
1070  empty == all the pool's blocks are currently available for allocation
1071      On transition to empty, a pool is unlinked from its usedpools[] list,
1072      and linked to the front of its arena_object's singly-linked freepools list,
1073      via its nextpool member.  The prevpool member has no meaning in this case.
1074      Empty pools have no inherent size class:  the next time a malloc finds
1075      an empty list in usedpools[], it takes the first pool off of freepools.
1076      If the size class needed happens to be the same as the size class the pool
1077      last had, some pool initialization can be skipped.
1078  
1079  
1080  Block Management
1081  
1082  Blocks within pools are again carved out as needed.  pool->freeblock points to
1083  the start of a singly-linked list of free blocks within the pool.  When a
1084  block is freed, it's inserted at the front of its pool's freeblock list.  Note
1085  that the available blocks in a pool are *not* linked all together when a pool
1086  is initialized.  Instead only "the first two" (lowest addresses) blocks are
1087  set up, returning the first such block, and setting pool->freeblock to a
1088  one-block list holding the second such block.  This is consistent with that
1089  pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1090  of memory until it's actually needed.
1091  
1092  So long as a pool is in the used state, we're certain there *is* a block
1093  available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1094  points to the end of the free list before we've carved the entire pool into
1095  blocks, that means we simply haven't yet gotten to one of the higher-address
1096  blocks.  The offset from the pool_header to the start of "the next" virgin
1097  block is stored in the pool_header nextoffset member, and the largest value
1098  of nextoffset that makes sense is stored in the maxnextoffset member when a
1099  pool is initialized.  All the blocks in a pool have been passed out at least
1100  once when and only when nextoffset > maxnextoffset.
1101  
1102  
1103  Major obscurity:  While the usedpools vector is declared to have poolp
1104  entries, it doesn't really.  It really contains two pointers per (conceptual)
1105  poolp entry, the nextpool and prevpool members of a pool_header.  The
1106  excruciating initialization code below fools C so that
1107  
1108      usedpool[i+i]
1109  
1110  "acts like" a genuine poolp, but only so long as you only reference its
1111  nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1112  compensating for that a pool_header's nextpool and prevpool members
1113  immediately follow a pool_header's first two members:
1114  
1115      union { block *_padding;
1116              uint count; } ref;
1117      block *freeblock;
1118  
1119  each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1120  contains is a fudged-up pointer p such that *if* C believes it's a poolp
1121  pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1122  circular list is empty).
1123  
1124  It's unclear why the usedpools setup is so convoluted.  It could be to
1125  minimize the amount of cache required to hold this heavily-referenced table
1126  (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1127  referencing code has to remember to "double the index" and doing so isn't
1128  free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1129  on that C doesn't insert any padding anywhere in a pool_header at or before
1130  the prevpool member.
1131  **************************************************************************** */
1132  
1133  #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1134  #define PT(x)   PTA(x), PTA(x)
1135  
1136  static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1137      PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1138  #if NB_SMALL_SIZE_CLASSES > 8
1139      , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1140  #if NB_SMALL_SIZE_CLASSES > 16
1141      , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1142  #if NB_SMALL_SIZE_CLASSES > 24
1143      , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1144  #if NB_SMALL_SIZE_CLASSES > 32
1145      , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1146  #if NB_SMALL_SIZE_CLASSES > 40
1147      , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1148  #if NB_SMALL_SIZE_CLASSES > 48
1149      , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1150  #if NB_SMALL_SIZE_CLASSES > 56
1151      , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1152  #if NB_SMALL_SIZE_CLASSES > 64
1153  #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1154  #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1155  #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1156  #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1157  #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1158  #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1159  #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1160  #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1161  #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1162  };
1163  
1164  /*==========================================================================
1165  Arena management.
1166  
1167  `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1168  which may not be currently used (== they're arena_objects that aren't
1169  currently associated with an allocated arena).  Note that arenas proper are
1170  separately malloc'ed.
1171  
1172  Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1173  we do try to free() arenas, and use some mild heuristic strategies to increase
1174  the likelihood that arenas eventually can be freed.
1175  
1176  unused_arena_objects
1177  
1178      This is a singly-linked list of the arena_objects that are currently not
1179      being used (no arena is associated with them).  Objects are taken off the
1180      head of the list in new_arena(), and are pushed on the head of the list in
1181      PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1182      is on this list if and only if its .address member is 0.
1183  
1184  usable_arenas
1185  
1186      This is a doubly-linked list of the arena_objects associated with arenas
1187      that have pools available.  These pools are either waiting to be reused,
1188      or have not been used before.  The list is sorted to have the most-
1189      allocated arenas first (ascending order based on the nfreepools member).
1190      This means that the next allocation will come from a heavily used arena,
1191      which gives the nearly empty arenas a chance to be returned to the system.
1192      In my unscientific tests this dramatically improved the number of arenas
1193      that could be freed.
1194  
1195  Note that an arena_object associated with an arena all of whose pools are
1196  currently in use isn't on either list.
1197  
1198  Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1199  used to be done by one-at-a-time linear search when an arena's number of
1200  free pools changed.  That could, overall, consume time quadratic in the
1201  number of arenas.  That didn't really matter when there were only a few
1202  hundred arenas (typical!), but could be a timing disaster when there were
1203  hundreds of thousands.  See bpo-37029.
1204  
1205  Now we have a vector of "search fingers" to eliminate the need to search:
1206  nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1207  with nfp free pools.  This is NULL if and only if there is no arena with
1208  nfp free pools in usable_arenas.
1209  */
1210  
1211  /* Array of objects used to track chunks of memory (arenas). */
1212  static struct arena_object* arenas = NULL;
1213  /* Number of slots currently allocated in the `arenas` vector. */
1214  static uint maxarenas = 0;
1215  
1216  /* The head of the singly-linked, NULL-terminated list of available
1217   * arena_objects.
1218   */
1219  static struct arena_object* unused_arena_objects = NULL;
1220  
1221  /* The head of the doubly-linked, NULL-terminated at each end, list of
1222   * arena_objects associated with arenas that have pools available.
1223   */
1224  static struct arena_object* usable_arenas = NULL;
1225  
1226  /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1227  static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1228  
1229  /* How many arena_objects do we initially allocate?
1230   * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1231   * `arenas` vector.
1232   */
1233  #define INITIAL_ARENA_OBJECTS 16
1234  
1235  /* Number of arenas allocated that haven't been free()'d. */
1236  static size_t narenas_currently_allocated = 0;
1237  
1238  /* Total number of times malloc() called to allocate an arena. */
1239  static size_t ntimes_arena_allocated = 0;
1240  /* High water mark (max value ever seen) for narenas_currently_allocated. */
1241  static size_t narenas_highwater = 0;
1242  
1243  static Py_ssize_t raw_allocated_blocks;
1244  
1245  Py_ssize_t
_Py_GetAllocatedBlocks(void)1246  _Py_GetAllocatedBlocks(void)
1247  {
1248      Py_ssize_t n = raw_allocated_blocks;
1249      /* add up allocated blocks for used pools */
1250      for (uint i = 0; i < maxarenas; ++i) {
1251          /* Skip arenas which are not allocated. */
1252          if (arenas[i].address == 0) {
1253              continue;
1254          }
1255  
1256          uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1257  
1258          /* visit every pool in the arena */
1259          assert(base <= (uintptr_t) arenas[i].pool_address);
1260          for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1261              poolp p = (poolp)base;
1262              n += p->ref.count;
1263          }
1264      }
1265      return n;
1266  }
1267  
1268  #if WITH_PYMALLOC_RADIX_TREE
1269  /*==========================================================================*/
1270  /* radix tree for tracking arena usage
1271  
1272     bit allocation for keys
1273  
1274     64-bit pointers and 2^20 arena size:
1275       16 -> ignored (POINTER_BITS - ADDRESS_BITS)
1276       10 -> MAP_TOP
1277       10 -> MAP_MID
1278        8 -> MAP_BOT
1279       20 -> ideal aligned arena
1280     ----
1281       64
1282  
1283     32-bit pointers and 2^18 arena size:
1284       14 -> MAP_BOT
1285       18 -> ideal aligned arena
1286     ----
1287       32
1288  
1289  */
1290  
1291  #if SIZEOF_VOID_P == 8
1292  
1293  /* number of bits in a pointer */
1294  #define POINTER_BITS 64
1295  
1296  /* Current 64-bit processors are limited to 48-bit physical addresses.  For
1297   * now, the top 17 bits of addresses will all be equal to bit 2**47.  If that
1298   * changes in the future, this must be adjusted upwards.
1299   */
1300  #define ADDRESS_BITS 48
1301  
1302  /* use the top and mid layers of the radix tree */
1303  #define USE_INTERIOR_NODES
1304  
1305  #elif SIZEOF_VOID_P == 4
1306  
1307  #define POINTER_BITS 32
1308  #define ADDRESS_BITS 32
1309  
1310  #else
1311  
1312   /* Currently this code works for 64-bit or 32-bit pointers only.  */
1313  #error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1314  
1315  #endif /* SIZEOF_VOID_P */
1316  
1317  /* arena_coverage_t members require this to be true  */
1318  #if ARENA_BITS >= 32
1319  #   error "arena size must be < 2^32"
1320  #endif
1321  
1322  #ifdef USE_INTERIOR_NODES
1323  /* number of bits used for MAP_TOP and MAP_MID nodes */
1324  #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1325  #else
1326  #define INTERIOR_BITS 0
1327  #endif
1328  
1329  #define MAP_TOP_BITS INTERIOR_BITS
1330  #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1331  #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1332  
1333  #define MAP_MID_BITS INTERIOR_BITS
1334  #define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1335  #define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1336  
1337  #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1338  #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1339  #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1340  
1341  #define MAP_BOT_SHIFT ARENA_BITS
1342  #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1343  #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1344  
1345  #define AS_UINT(p) ((uintptr_t)(p))
1346  #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1347  #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1348  #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1349  
1350  #if ADDRESS_BITS > POINTER_BITS
1351  /* Return non-physical address bits of a pointer.  Those bits should be same
1352   * for all valid pointers if ADDRESS_BITS set correctly.  Linux has support for
1353   * 57-bit address space (Intel 5-level paging) but will not currently give
1354   * those addresses to user space.
1355   */
1356  #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1357  #else
1358  #define HIGH_BITS(p) 0
1359  #endif
1360  
1361  
1362  /* This is the leaf of the radix tree.  See arena_map_mark_used() for the
1363   * meaning of these members. */
1364  typedef struct {
1365      int32_t tail_hi;
1366      int32_t tail_lo;
1367  } arena_coverage_t;
1368  
1369  typedef struct arena_map_bot {
1370      /* The members tail_hi and tail_lo are accessed together.  So, it
1371       * better to have them as an array of structs, rather than two
1372       * arrays.
1373       */
1374      arena_coverage_t arenas[MAP_BOT_LENGTH];
1375  } arena_map_bot_t;
1376  
1377  #ifdef USE_INTERIOR_NODES
1378  typedef struct arena_map_mid {
1379      struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1380  } arena_map_mid_t;
1381  
1382  typedef struct arena_map_top {
1383      struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1384  } arena_map_top_t;
1385  #endif
1386  
1387  /* The root of radix tree.  Note that by initializing like this, the memory
1388   * should be in the BSS.  The OS will only memory map pages as the MAP_MID
1389   * nodes get used (OS pages are demand loaded as needed).
1390   */
1391  #ifdef USE_INTERIOR_NODES
1392  static arena_map_top_t arena_map_root;
1393  /* accounting for number of used interior nodes */
1394  static int arena_map_mid_count;
1395  static int arena_map_bot_count;
1396  #else
1397  static arena_map_bot_t arena_map_root;
1398  #endif
1399  
1400  /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1401   * it cannot be created */
1402  static arena_map_bot_t *
arena_map_get(block * p,int create)1403  arena_map_get(block *p, int create)
1404  {
1405  #ifdef USE_INTERIOR_NODES
1406      /* sanity check that ADDRESS_BITS is correct */
1407      assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1408      int i1 = MAP_TOP_INDEX(p);
1409      if (arena_map_root.ptrs[i1] == NULL) {
1410          if (!create) {
1411              return NULL;
1412          }
1413          arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1414          if (n == NULL) {
1415              return NULL;
1416          }
1417          arena_map_root.ptrs[i1] = n;
1418          arena_map_mid_count++;
1419      }
1420      int i2 = MAP_MID_INDEX(p);
1421      if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1422          if (!create) {
1423              return NULL;
1424          }
1425          arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1426          if (n == NULL) {
1427              return NULL;
1428          }
1429          arena_map_root.ptrs[i1]->ptrs[i2] = n;
1430          arena_map_bot_count++;
1431      }
1432      return arena_map_root.ptrs[i1]->ptrs[i2];
1433  #else
1434      return &arena_map_root;
1435  #endif
1436  }
1437  
1438  
1439  /* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1440   * away 24 bits of the address.  That reduces the space requirement of
1441   * the tree compared to similar radix tree page-map schemes.  In
1442   * exchange for slashing the space requirement, it needs more
1443   * computation to check an address.
1444   *
1445   * Tracking coverage is done by "ideal" arena address.  It is easier to
1446   * explain in decimal so let's say that the arena size is 100 bytes.
1447   * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1448   * pointer address is inside an actual arena, we have to check two ideal
1449   * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1450   * 300.  In the rare case that an arena is aligned in the ideal way
1451   * (e.g. base address of arena is 200) then we only have to check one
1452   * ideal address.
1453   *
1454   * The tree nodes for 200 and 300 both store the address of arena.
1455   * There are two cases: the arena starts at a lower ideal arena and
1456   * extends to this one, or the arena starts in this arena and extends to
1457   * the next ideal arena.  The tail_lo and tail_hi members correspond to
1458   * these two cases.
1459   */
1460  
1461  
1462  /* mark or unmark addresses covered by arena */
1463  static int
arena_map_mark_used(uintptr_t arena_base,int is_used)1464  arena_map_mark_used(uintptr_t arena_base, int is_used)
1465  {
1466      /* sanity check that ADDRESS_BITS is correct */
1467      assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1468      arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1469      if (n_hi == NULL) {
1470          assert(is_used); /* otherwise node should already exist */
1471          return 0; /* failed to allocate space for node */
1472      }
1473      int i3 = MAP_BOT_INDEX((block *)arena_base);
1474      int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1475      if (tail == 0) {
1476          /* is ideal arena address */
1477          n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1478      }
1479      else {
1480          /* arena_base address is not ideal (aligned to arena size) and
1481           * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1482           * for the next arena.  Note that it might be in different MAP_TOP
1483           * and MAP_MID nodes as well so we need to call arena_map_get()
1484           * again (do the full tree traversal).
1485           */
1486          n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1487          uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1488          /* If arena_base is a legit arena address, so is arena_base_next - 1
1489           * (last address in arena).  If arena_base_next overflows then it
1490           * must overflow to 0.  However, that would mean arena_base was
1491           * "ideal" and we should not be in this case. */
1492          assert(arena_base < arena_base_next);
1493          arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1494          if (n_lo == NULL) {
1495              assert(is_used); /* otherwise should already exist */
1496              n_hi->arenas[i3].tail_hi = 0;
1497              return 0; /* failed to allocate space for node */
1498          }
1499          int i3_next = MAP_BOT_INDEX(arena_base_next);
1500          n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1501      }
1502      return 1;
1503  }
1504  
1505  /* Return true if 'p' is a pointer inside an obmalloc arena.
1506   * _PyObject_Free() calls this so it needs to be very fast. */
1507  static int
arena_map_is_used(block * p)1508  arena_map_is_used(block *p)
1509  {
1510      arena_map_bot_t *n = arena_map_get(p, 0);
1511      if (n == NULL) {
1512          return 0;
1513      }
1514      int i3 = MAP_BOT_INDEX(p);
1515      /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1516      int32_t hi = n->arenas[i3].tail_hi;
1517      int32_t lo = n->arenas[i3].tail_lo;
1518      int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1519      return (tail < lo) || (tail >= hi && hi != 0);
1520  }
1521  
1522  /* end of radix tree logic */
1523  /*==========================================================================*/
1524  #endif /* WITH_PYMALLOC_RADIX_TREE */
1525  
1526  
1527  /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1528   * allocate a new arena, and return the address of an arena_object
1529   * describing the new arena.  It's expected that the caller will set
1530   * `usable_arenas` to the return value.
1531   */
1532  static struct arena_object*
new_arena(void)1533  new_arena(void)
1534  {
1535      struct arena_object* arenaobj;
1536      uint excess;        /* number of bytes above pool alignment */
1537      void *address;
1538      static int debug_stats = -1;
1539  
1540      if (debug_stats == -1) {
1541          const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1542          debug_stats = (opt != NULL && *opt != '\0');
1543      }
1544      if (debug_stats)
1545          _PyObject_DebugMallocStats(stderr);
1546  
1547      if (unused_arena_objects == NULL) {
1548          uint i;
1549          uint numarenas;
1550          size_t nbytes;
1551  
1552          /* Double the number of arena objects on each allocation.
1553           * Note that it's possible for `numarenas` to overflow.
1554           */
1555          numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1556          if (numarenas <= maxarenas)
1557              return NULL;                /* overflow */
1558  #if SIZEOF_SIZE_T <= SIZEOF_INT
1559          if (numarenas > SIZE_MAX / sizeof(*arenas))
1560              return NULL;                /* overflow */
1561  #endif
1562          nbytes = numarenas * sizeof(*arenas);
1563          arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1564          if (arenaobj == NULL)
1565              return NULL;
1566          arenas = arenaobj;
1567  
1568          /* We might need to fix pointers that were copied.  However,
1569           * new_arena only gets called when all the pages in the
1570           * previous arenas are full.  Thus, there are *no* pointers
1571           * into the old array. Thus, we don't have to worry about
1572           * invalid pointers.  Just to be sure, some asserts:
1573           */
1574          assert(usable_arenas == NULL);
1575          assert(unused_arena_objects == NULL);
1576  
1577          /* Put the new arenas on the unused_arena_objects list. */
1578          for (i = maxarenas; i < numarenas; ++i) {
1579              arenas[i].address = 0;              /* mark as unassociated */
1580              arenas[i].nextarena = i < numarenas - 1 ?
1581                                     &arenas[i+1] : NULL;
1582          }
1583  
1584          /* Update globals. */
1585          unused_arena_objects = &arenas[maxarenas];
1586          maxarenas = numarenas;
1587      }
1588  
1589      /* Take the next available arena object off the head of the list. */
1590      assert(unused_arena_objects != NULL);
1591      arenaobj = unused_arena_objects;
1592      unused_arena_objects = arenaobj->nextarena;
1593      assert(arenaobj->address == 0);
1594      address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1595  #if WITH_PYMALLOC_RADIX_TREE
1596      if (address != NULL) {
1597          if (!arena_map_mark_used((uintptr_t)address, 1)) {
1598              /* marking arena in radix tree failed, abort */
1599              _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1600              address = NULL;
1601          }
1602      }
1603  #endif
1604      if (address == NULL) {
1605          /* The allocation failed: return NULL after putting the
1606           * arenaobj back.
1607           */
1608          arenaobj->nextarena = unused_arena_objects;
1609          unused_arena_objects = arenaobj;
1610          return NULL;
1611      }
1612      arenaobj->address = (uintptr_t)address;
1613  
1614      ++narenas_currently_allocated;
1615      ++ntimes_arena_allocated;
1616      if (narenas_currently_allocated > narenas_highwater)
1617          narenas_highwater = narenas_currently_allocated;
1618      arenaobj->freepools = NULL;
1619      /* pool_address <- first pool-aligned address in the arena
1620         nfreepools <- number of whole pools that fit after alignment */
1621      arenaobj->pool_address = (block*)arenaobj->address;
1622      arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1623      excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1624      if (excess != 0) {
1625          --arenaobj->nfreepools;
1626          arenaobj->pool_address += POOL_SIZE - excess;
1627      }
1628      arenaobj->ntotalpools = arenaobj->nfreepools;
1629  
1630      return arenaobj;
1631  }
1632  
1633  
1634  
1635  #if WITH_PYMALLOC_RADIX_TREE
1636  /* Return true if and only if P is an address that was allocated by
1637     pymalloc.  When the radix tree is used, 'poolp' is unused.
1638   */
1639  static bool
address_in_range(void * p,poolp pool)1640  address_in_range(void *p, poolp pool)
1641  {
1642      return arena_map_is_used(p);
1643  }
1644  #else
1645  /*
1646  address_in_range(P, POOL)
1647  
1648  Return true if and only if P is an address that was allocated by pymalloc.
1649  POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1650  (the caller is asked to compute this because the macro expands POOL more than
1651  once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1652  variable and pass the latter to the macro; because address_in_range is
1653  called on every alloc/realloc/free, micro-efficiency is important here).
1654  
1655  Tricky:  Let B be the arena base address associated with the pool, B =
1656  arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1657  
1658      B <= P < B + ARENA_SIZE
1659  
1660  Subtracting B throughout, this is true iff
1661  
1662      0 <= P-B < ARENA_SIZE
1663  
1664  By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1665  
1666  Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1667  before the first arena has been allocated.  `arenas` is still NULL in that
1668  case.  We're relying on that maxarenas is also 0 in that case, so that
1669  (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1670  into a NULL arenas.
1671  
1672  Details:  given P and POOL, the arena_object corresponding to P is AO =
1673  arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1674  stores, etc), POOL is the correct address of P's pool, AO.address is the
1675  correct base address of the pool's arena, and P must be within ARENA_SIZE of
1676  AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1677  (NULL)).  Therefore address_in_range correctly reports that obmalloc
1678  controls P.
1679  
1680  Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1681  call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1682  in this case -- it may even be uninitialized trash.  If the trash arenaindex
1683  is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1684  control P.
1685  
1686  Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1687  allocated arena, obmalloc controls all the memory in slice AO.address :
1688  AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1689  so P doesn't lie in that slice, so the macro correctly reports that P is not
1690  controlled by obmalloc.
1691  
1692  Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1693  arena_object (one not currently associated with an allocated arena),
1694  AO.address is 0, and the second test in the macro reduces to:
1695  
1696      P < ARENA_SIZE
1697  
1698  If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1699  that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1700  of the test still passes, and the third clause (AO.address != 0) is necessary
1701  to get the correct result:  AO.address is 0 in this case, so the macro
1702  correctly reports that P is not controlled by obmalloc (despite that P lies in
1703  slice AO.address : AO.address + ARENA_SIZE).
1704  
1705  Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1706  2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1707  corresponded to a currently-allocated arena, so the "P is not controlled by
1708  obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1709  was impossible.
1710  
1711  Note that the logic is excruciating, and reading up possibly uninitialized
1712  memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1713  creates problems for some memory debuggers.  The overwhelming advantage is
1714  that this test determines whether an arbitrary address is controlled by
1715  obmalloc in a small constant time, independent of the number of arenas
1716  obmalloc controls.  Since this test is needed at every entry point, it's
1717  extremely desirable that it be this fast.
1718  */
1719  
1720  static bool _Py_NO_SANITIZE_ADDRESS
1721              _Py_NO_SANITIZE_THREAD
1722              _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1723  address_in_range(void *p, poolp pool)
1724  {
1725      // Since address_in_range may be reading from memory which was not allocated
1726      // by Python, it is important that pool->arenaindex is read only once, as
1727      // another thread may be concurrently modifying the value without holding
1728      // the GIL. The following dance forces the compiler to read pool->arenaindex
1729      // only once.
1730      uint arenaindex = *((volatile uint *)&pool->arenaindex);
1731      return arenaindex < maxarenas &&
1732          (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1733          arenas[arenaindex].address != 0;
1734  }
1735  
1736  #endif /* !WITH_PYMALLOC_RADIX_TREE */
1737  
1738  /*==========================================================================*/
1739  
1740  // Called when freelist is exhausted.  Extend the freelist if there is
1741  // space for a block.  Otherwise, remove this pool from usedpools.
1742  static void
pymalloc_pool_extend(poolp pool,uint size)1743  pymalloc_pool_extend(poolp pool, uint size)
1744  {
1745      if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1746          /* There is room for another block. */
1747          pool->freeblock = (block*)pool + pool->nextoffset;
1748          pool->nextoffset += INDEX2SIZE(size);
1749          *(block **)(pool->freeblock) = NULL;
1750          return;
1751      }
1752  
1753      /* Pool is full, unlink from used pools. */
1754      poolp next;
1755      next = pool->nextpool;
1756      pool = pool->prevpool;
1757      next->prevpool = pool;
1758      pool->nextpool = next;
1759  }
1760  
1761  /* called when pymalloc_alloc can not allocate a block from usedpool.
1762   * This function takes new pool and allocate a block from it.
1763   */
1764  static void*
allocate_from_new_pool(uint size)1765  allocate_from_new_pool(uint size)
1766  {
1767      /* There isn't a pool of the right size class immediately
1768       * available:  use a free pool.
1769       */
1770      if (UNLIKELY(usable_arenas == NULL)) {
1771          /* No arena has a free pool:  allocate a new arena. */
1772  #ifdef WITH_MEMORY_LIMITS
1773          if (narenas_currently_allocated >= MAX_ARENAS) {
1774              return NULL;
1775          }
1776  #endif
1777          usable_arenas = new_arena();
1778          if (usable_arenas == NULL) {
1779              return NULL;
1780          }
1781          usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1782          assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1783          nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1784      }
1785      assert(usable_arenas->address != 0);
1786  
1787      /* This arena already had the smallest nfreepools value, so decreasing
1788       * nfreepools doesn't change that, and we don't need to rearrange the
1789       * usable_arenas list.  However, if the arena becomes wholly allocated,
1790       * we need to remove its arena_object from usable_arenas.
1791       */
1792      assert(usable_arenas->nfreepools > 0);
1793      if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1794          /* It's the last of this size, so there won't be any. */
1795          nfp2lasta[usable_arenas->nfreepools] = NULL;
1796      }
1797      /* If any free pools will remain, it will be the new smallest. */
1798      if (usable_arenas->nfreepools > 1) {
1799          assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1800          nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1801      }
1802  
1803      /* Try to get a cached free pool. */
1804      poolp pool = usable_arenas->freepools;
1805      if (LIKELY(pool != NULL)) {
1806          /* Unlink from cached pools. */
1807          usable_arenas->freepools = pool->nextpool;
1808          usable_arenas->nfreepools--;
1809          if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1810              /* Wholly allocated:  remove. */
1811              assert(usable_arenas->freepools == NULL);
1812              assert(usable_arenas->nextarena == NULL ||
1813                     usable_arenas->nextarena->prevarena ==
1814                     usable_arenas);
1815              usable_arenas = usable_arenas->nextarena;
1816              if (usable_arenas != NULL) {
1817                  usable_arenas->prevarena = NULL;
1818                  assert(usable_arenas->address != 0);
1819              }
1820          }
1821          else {
1822              /* nfreepools > 0:  it must be that freepools
1823               * isn't NULL, or that we haven't yet carved
1824               * off all the arena's pools for the first
1825               * time.
1826               */
1827              assert(usable_arenas->freepools != NULL ||
1828                     usable_arenas->pool_address <=
1829                     (block*)usable_arenas->address +
1830                         ARENA_SIZE - POOL_SIZE);
1831          }
1832      }
1833      else {
1834          /* Carve off a new pool. */
1835          assert(usable_arenas->nfreepools > 0);
1836          assert(usable_arenas->freepools == NULL);
1837          pool = (poolp)usable_arenas->pool_address;
1838          assert((block*)pool <= (block*)usable_arenas->address +
1839                                   ARENA_SIZE - POOL_SIZE);
1840          pool->arenaindex = (uint)(usable_arenas - arenas);
1841          assert(&arenas[pool->arenaindex] == usable_arenas);
1842          pool->szidx = DUMMY_SIZE_IDX;
1843          usable_arenas->pool_address += POOL_SIZE;
1844          --usable_arenas->nfreepools;
1845  
1846          if (usable_arenas->nfreepools == 0) {
1847              assert(usable_arenas->nextarena == NULL ||
1848                     usable_arenas->nextarena->prevarena ==
1849                     usable_arenas);
1850              /* Unlink the arena:  it is completely allocated. */
1851              usable_arenas = usable_arenas->nextarena;
1852              if (usable_arenas != NULL) {
1853                  usable_arenas->prevarena = NULL;
1854                  assert(usable_arenas->address != 0);
1855              }
1856          }
1857      }
1858  
1859      /* Frontlink to used pools. */
1860      block *bp;
1861      poolp next = usedpools[size + size]; /* == prev */
1862      pool->nextpool = next;
1863      pool->prevpool = next;
1864      next->nextpool = pool;
1865      next->prevpool = pool;
1866      pool->ref.count = 1;
1867      if (pool->szidx == size) {
1868          /* Luckily, this pool last contained blocks
1869           * of the same size class, so its header
1870           * and free list are already initialized.
1871           */
1872          bp = pool->freeblock;
1873          assert(bp != NULL);
1874          pool->freeblock = *(block **)bp;
1875          return bp;
1876      }
1877      /*
1878       * Initialize the pool header, set up the free list to
1879       * contain just the second block, and return the first
1880       * block.
1881       */
1882      pool->szidx = size;
1883      size = INDEX2SIZE(size);
1884      bp = (block *)pool + POOL_OVERHEAD;
1885      pool->nextoffset = POOL_OVERHEAD + (size << 1);
1886      pool->maxnextoffset = POOL_SIZE - size;
1887      pool->freeblock = bp + size;
1888      *(block **)(pool->freeblock) = NULL;
1889      return bp;
1890  }
1891  
1892  /* pymalloc allocator
1893  
1894     Return a pointer to newly allocated memory if pymalloc allocated memory.
1895  
1896     Return NULL if pymalloc failed to allocate the memory block: on bigger
1897     requests, on error in the code below (as a last chance to serve the request)
1898     or when the max memory limit has been reached.
1899  */
1900  static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1901  pymalloc_alloc(void *ctx, size_t nbytes)
1902  {
1903  #ifdef WITH_VALGRIND
1904      if (UNLIKELY(running_on_valgrind == -1)) {
1905          running_on_valgrind = RUNNING_ON_VALGRIND;
1906      }
1907      if (UNLIKELY(running_on_valgrind)) {
1908          return NULL;
1909      }
1910  #endif
1911  
1912      if (UNLIKELY(nbytes == 0)) {
1913          return NULL;
1914      }
1915      if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1916          return NULL;
1917      }
1918  
1919      uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1920      poolp pool = usedpools[size + size];
1921      block *bp;
1922  
1923      if (LIKELY(pool != pool->nextpool)) {
1924          /*
1925           * There is a used pool for this size class.
1926           * Pick up the head block of its free list.
1927           */
1928          ++pool->ref.count;
1929          bp = pool->freeblock;
1930          assert(bp != NULL);
1931  
1932          if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1933              // Reached the end of the free list, try to extend it.
1934              pymalloc_pool_extend(pool, size);
1935          }
1936      }
1937      else {
1938          /* There isn't a pool of the right size class immediately
1939           * available:  use a free pool.
1940           */
1941          bp = allocate_from_new_pool(size);
1942      }
1943  
1944      return (void *)bp;
1945  }
1946  
1947  
1948  static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1949  _PyObject_Malloc(void *ctx, size_t nbytes)
1950  {
1951      void* ptr = pymalloc_alloc(ctx, nbytes);
1952      if (LIKELY(ptr != NULL)) {
1953          return ptr;
1954      }
1955  
1956      ptr = PyMem_RawMalloc(nbytes);
1957      if (ptr != NULL) {
1958          raw_allocated_blocks++;
1959      }
1960      return ptr;
1961  }
1962  
1963  
1964  static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1965  _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1966  {
1967      assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1968      size_t nbytes = nelem * elsize;
1969  
1970      void* ptr = pymalloc_alloc(ctx, nbytes);
1971      if (LIKELY(ptr != NULL)) {
1972          memset(ptr, 0, nbytes);
1973          return ptr;
1974      }
1975  
1976      ptr = PyMem_RawCalloc(nelem, elsize);
1977      if (ptr != NULL) {
1978          raw_allocated_blocks++;
1979      }
1980      return ptr;
1981  }
1982  
1983  
1984  static void
insert_to_usedpool(poolp pool)1985  insert_to_usedpool(poolp pool)
1986  {
1987      assert(pool->ref.count > 0);            /* else the pool is empty */
1988  
1989      uint size = pool->szidx;
1990      poolp next = usedpools[size + size];
1991      poolp prev = next->prevpool;
1992  
1993      /* insert pool before next:   prev <-> pool <-> next */
1994      pool->nextpool = next;
1995      pool->prevpool = prev;
1996      next->prevpool = pool;
1997      prev->nextpool = pool;
1998  }
1999  
2000  static void
insert_to_freepool(poolp pool)2001  insert_to_freepool(poolp pool)
2002  {
2003      poolp next = pool->nextpool;
2004      poolp prev = pool->prevpool;
2005      next->prevpool = prev;
2006      prev->nextpool = next;
2007  
2008      /* Link the pool to freepools.  This is a singly-linked
2009       * list, and pool->prevpool isn't used there.
2010       */
2011      struct arena_object *ao = &arenas[pool->arenaindex];
2012      pool->nextpool = ao->freepools;
2013      ao->freepools = pool;
2014      uint nf = ao->nfreepools;
2015      /* If this is the rightmost arena with this number of free pools,
2016       * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2017       * are no arenas in usable_arenas with that value.
2018       */
2019      struct arena_object* lastnf = nfp2lasta[nf];
2020      assert((nf == 0 && lastnf == NULL) ||
2021             (nf > 0 &&
2022              lastnf != NULL &&
2023              lastnf->nfreepools == nf &&
2024              (lastnf->nextarena == NULL ||
2025               nf < lastnf->nextarena->nfreepools)));
2026      if (lastnf == ao) {  /* it is the rightmost */
2027          struct arena_object* p = ao->prevarena;
2028          nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2029      }
2030      ao->nfreepools = ++nf;
2031  
2032      /* All the rest is arena management.  We just freed
2033       * a pool, and there are 4 cases for arena mgmt:
2034       * 1. If all the pools are free, return the arena to
2035       *    the system free().  Except if this is the last
2036       *    arena in the list, keep it to avoid thrashing:
2037       *    keeping one wholly free arena in the list avoids
2038       *    pathological cases where a simple loop would
2039       *    otherwise provoke needing to allocate and free an
2040       *    arena on every iteration.  See bpo-37257.
2041       * 2. If this is the only free pool in the arena,
2042       *    add the arena back to the `usable_arenas` list.
2043       * 3. If the "next" arena has a smaller count of free
2044       *    pools, we have to "slide this arena right" to
2045       *    restore that usable_arenas is sorted in order of
2046       *    nfreepools.
2047       * 4. Else there's nothing more to do.
2048       */
2049      if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2050          /* Case 1.  First unlink ao from usable_arenas.
2051           */
2052          assert(ao->prevarena == NULL ||
2053                 ao->prevarena->address != 0);
2054          assert(ao ->nextarena == NULL ||
2055                 ao->nextarena->address != 0);
2056  
2057          /* Fix the pointer in the prevarena, or the
2058           * usable_arenas pointer.
2059           */
2060          if (ao->prevarena == NULL) {
2061              usable_arenas = ao->nextarena;
2062              assert(usable_arenas == NULL ||
2063                     usable_arenas->address != 0);
2064          }
2065          else {
2066              assert(ao->prevarena->nextarena == ao);
2067              ao->prevarena->nextarena =
2068                  ao->nextarena;
2069          }
2070          /* Fix the pointer in the nextarena. */
2071          if (ao->nextarena != NULL) {
2072              assert(ao->nextarena->prevarena == ao);
2073              ao->nextarena->prevarena =
2074                  ao->prevarena;
2075          }
2076          /* Record that this arena_object slot is
2077           * available to be reused.
2078           */
2079          ao->nextarena = unused_arena_objects;
2080          unused_arena_objects = ao;
2081  
2082  #if WITH_PYMALLOC_RADIX_TREE
2083          /* mark arena region as not under control of obmalloc */
2084          arena_map_mark_used(ao->address, 0);
2085  #endif
2086  
2087          /* Free the entire arena. */
2088          _PyObject_Arena.free(_PyObject_Arena.ctx,
2089                               (void *)ao->address, ARENA_SIZE);
2090          ao->address = 0;                        /* mark unassociated */
2091          --narenas_currently_allocated;
2092  
2093          return;
2094      }
2095  
2096      if (nf == 1) {
2097          /* Case 2.  Put ao at the head of
2098           * usable_arenas.  Note that because
2099           * ao->nfreepools was 0 before, ao isn't
2100           * currently on the usable_arenas list.
2101           */
2102          ao->nextarena = usable_arenas;
2103          ao->prevarena = NULL;
2104          if (usable_arenas)
2105              usable_arenas->prevarena = ao;
2106          usable_arenas = ao;
2107          assert(usable_arenas->address != 0);
2108          if (nfp2lasta[1] == NULL) {
2109              nfp2lasta[1] = ao;
2110          }
2111  
2112          return;
2113      }
2114  
2115      /* If this arena is now out of order, we need to keep
2116       * the list sorted.  The list is kept sorted so that
2117       * the "most full" arenas are used first, which allows
2118       * the nearly empty arenas to be completely freed.  In
2119       * a few un-scientific tests, it seems like this
2120       * approach allowed a lot more memory to be freed.
2121       */
2122      /* If this is the only arena with nf, record that. */
2123      if (nfp2lasta[nf] == NULL) {
2124          nfp2lasta[nf] = ao;
2125      } /* else the rightmost with nf doesn't change */
2126      /* If this was the rightmost of the old size, it remains in place. */
2127      if (ao == lastnf) {
2128          /* Case 4.  Nothing to do. */
2129          return;
2130      }
2131      /* If ao were the only arena in the list, the last block would have
2132       * gotten us out.
2133       */
2134      assert(ao->nextarena != NULL);
2135  
2136      /* Case 3:  We have to move the arena towards the end of the list,
2137       * because it has more free pools than the arena to its right.  It needs
2138       * to move to follow lastnf.
2139       * First unlink ao from usable_arenas.
2140       */
2141      if (ao->prevarena != NULL) {
2142          /* ao isn't at the head of the list */
2143          assert(ao->prevarena->nextarena == ao);
2144          ao->prevarena->nextarena = ao->nextarena;
2145      }
2146      else {
2147          /* ao is at the head of the list */
2148          assert(usable_arenas == ao);
2149          usable_arenas = ao->nextarena;
2150      }
2151      ao->nextarena->prevarena = ao->prevarena;
2152      /* And insert after lastnf. */
2153      ao->prevarena = lastnf;
2154      ao->nextarena = lastnf->nextarena;
2155      if (ao->nextarena != NULL) {
2156          ao->nextarena->prevarena = ao;
2157      }
2158      lastnf->nextarena = ao;
2159      /* Verify that the swaps worked. */
2160      assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2161      assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2162      assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2163      assert((usable_arenas == ao && ao->prevarena == NULL)
2164             || ao->prevarena->nextarena == ao);
2165  }
2166  
2167  /* Free a memory block allocated by pymalloc_alloc().
2168     Return 1 if it was freed.
2169     Return 0 if the block was not allocated by pymalloc_alloc(). */
2170  static inline int
pymalloc_free(void * ctx,void * p)2171  pymalloc_free(void *ctx, void *p)
2172  {
2173      assert(p != NULL);
2174  
2175  #ifdef WITH_VALGRIND
2176      if (UNLIKELY(running_on_valgrind > 0)) {
2177          return 0;
2178      }
2179  #endif
2180  
2181      poolp pool = POOL_ADDR(p);
2182      if (UNLIKELY(!address_in_range(p, pool))) {
2183          return 0;
2184      }
2185      /* We allocated this address. */
2186  
2187      /* Link p to the start of the pool's freeblock list.  Since
2188       * the pool had at least the p block outstanding, the pool
2189       * wasn't empty (so it's already in a usedpools[] list, or
2190       * was full and is in no list -- it's not in the freeblocks
2191       * list in any case).
2192       */
2193      assert(pool->ref.count > 0);            /* else it was empty */
2194      block *lastfree = pool->freeblock;
2195      *(block **)p = lastfree;
2196      pool->freeblock = (block *)p;
2197      pool->ref.count--;
2198  
2199      if (UNLIKELY(lastfree == NULL)) {
2200          /* Pool was full, so doesn't currently live in any list:
2201           * link it to the front of the appropriate usedpools[] list.
2202           * This mimics LRU pool usage for new allocations and
2203           * targets optimal filling when several pools contain
2204           * blocks of the same size class.
2205           */
2206          insert_to_usedpool(pool);
2207          return 1;
2208      }
2209  
2210      /* freeblock wasn't NULL, so the pool wasn't full,
2211       * and the pool is in a usedpools[] list.
2212       */
2213      if (LIKELY(pool->ref.count != 0)) {
2214          /* pool isn't empty:  leave it in usedpools */
2215          return 1;
2216      }
2217  
2218      /* Pool is now empty:  unlink from usedpools, and
2219       * link to the front of freepools.  This ensures that
2220       * previously freed pools will be allocated later
2221       * (being not referenced, they are perhaps paged out).
2222       */
2223      insert_to_freepool(pool);
2224      return 1;
2225  }
2226  
2227  
2228  static void
_PyObject_Free(void * ctx,void * p)2229  _PyObject_Free(void *ctx, void *p)
2230  {
2231      /* PyObject_Free(NULL) has no effect */
2232      if (p == NULL) {
2233          return;
2234      }
2235  
2236      if (UNLIKELY(!pymalloc_free(ctx, p))) {
2237          /* pymalloc didn't allocate this address */
2238          PyMem_RawFree(p);
2239          raw_allocated_blocks--;
2240      }
2241  }
2242  
2243  
2244  /* pymalloc realloc.
2245  
2246     If nbytes==0, then as the Python docs promise, we do not treat this like
2247     free(p), and return a non-NULL result.
2248  
2249     Return 1 if pymalloc reallocated memory and wrote the new pointer into
2250     newptr_p.
2251  
2252     Return 0 if pymalloc didn't allocated p. */
2253  static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2254  pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2255  {
2256      void *bp;
2257      poolp pool;
2258      size_t size;
2259  
2260      assert(p != NULL);
2261  
2262  #ifdef WITH_VALGRIND
2263      /* Treat running_on_valgrind == -1 the same as 0 */
2264      if (UNLIKELY(running_on_valgrind > 0)) {
2265          return 0;
2266      }
2267  #endif
2268  
2269      pool = POOL_ADDR(p);
2270      if (!address_in_range(p, pool)) {
2271          /* pymalloc is not managing this block.
2272  
2273             If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2274             over this block.  However, if we do, we need to copy the valid data
2275             from the C-managed block to one of our blocks, and there's no
2276             portable way to know how much of the memory space starting at p is
2277             valid.
2278  
2279             As bug 1185883 pointed out the hard way, it's possible that the
2280             C-managed block is "at the end" of allocated VM space, so that a
2281             memory fault can occur if we try to copy nbytes bytes starting at p.
2282             Instead we punt: let C continue to manage this block. */
2283          return 0;
2284      }
2285  
2286      /* pymalloc is in charge of this block */
2287      size = INDEX2SIZE(pool->szidx);
2288      if (nbytes <= size) {
2289          /* The block is staying the same or shrinking.
2290  
2291             If it's shrinking, there's a tradeoff: it costs cycles to copy the
2292             block to a smaller size class, but it wastes memory not to copy it.
2293  
2294             The compromise here is to copy on shrink only if at least 25% of
2295             size can be shaved off. */
2296          if (4 * nbytes > 3 * size) {
2297              /* It's the same, or shrinking and new/old > 3/4. */
2298              *newptr_p = p;
2299              return 1;
2300          }
2301          size = nbytes;
2302      }
2303  
2304      bp = _PyObject_Malloc(ctx, nbytes);
2305      if (bp != NULL) {
2306          memcpy(bp, p, size);
2307          _PyObject_Free(ctx, p);
2308      }
2309      *newptr_p = bp;
2310      return 1;
2311  }
2312  
2313  
2314  static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2315  _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2316  {
2317      void *ptr2;
2318  
2319      if (ptr == NULL) {
2320          return _PyObject_Malloc(ctx, nbytes);
2321      }
2322  
2323      if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2324          return ptr2;
2325      }
2326  
2327      return PyMem_RawRealloc(ptr, nbytes);
2328  }
2329  
2330  #else   /* ! WITH_PYMALLOC */
2331  
2332  /*==========================================================================*/
2333  /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2334   * only be used by extensions that are compiled with pymalloc enabled. */
2335  
2336  Py_ssize_t
_Py_GetAllocatedBlocks(void)2337  _Py_GetAllocatedBlocks(void)
2338  {
2339      return 0;
2340  }
2341  
2342  #endif /* WITH_PYMALLOC */
2343  
2344  
2345  /*==========================================================================*/
2346  /* A x-platform debugging allocator.  This doesn't manage memory directly,
2347   * it wraps a real allocator, adding extra debugging info to the memory blocks.
2348   */
2349  
2350  /* Uncomment this define to add the "serialno" field */
2351  /* #define PYMEM_DEBUG_SERIALNO */
2352  
2353  #ifdef PYMEM_DEBUG_SERIALNO
2354  static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2355  
2356  /* serialno is always incremented via calling this routine.  The point is
2357   * to supply a single place to set a breakpoint.
2358   */
2359  static void
bumpserialno(void)2360  bumpserialno(void)
2361  {
2362      ++serialno;
2363  }
2364  #endif
2365  
2366  #define SST SIZEOF_SIZE_T
2367  
2368  #ifdef PYMEM_DEBUG_SERIALNO
2369  #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2370  #else
2371  #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2372  #endif
2373  
2374  /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2375  static size_t
read_size_t(const void * p)2376  read_size_t(const void *p)
2377  {
2378      const uint8_t *q = (const uint8_t *)p;
2379      size_t result = *q++;
2380      int i;
2381  
2382      for (i = SST; --i > 0; ++q)
2383          result = (result << 8) | *q;
2384      return result;
2385  }
2386  
2387  /* Write n as a big-endian size_t, MSB at address p, LSB at
2388   * p + sizeof(size_t) - 1.
2389   */
2390  static void
write_size_t(void * p,size_t n)2391  write_size_t(void *p, size_t n)
2392  {
2393      uint8_t *q = (uint8_t *)p + SST - 1;
2394      int i;
2395  
2396      for (i = SST; --i >= 0; --q) {
2397          *q = (uint8_t)(n & 0xff);
2398          n >>= 8;
2399      }
2400  }
2401  
2402  /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2403     fills them with useful stuff, here calling the underlying malloc's result p:
2404  
2405  p[0: S]
2406      Number of bytes originally asked for.  This is a size_t, big-endian (easier
2407      to read in a memory dump).
2408  p[S]
2409      API ID.  See PEP 445.  This is a character, but seems undocumented.
2410  p[S+1: 2*S]
2411      Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2412  p[2*S: 2*S+n]
2413      The requested memory, filled with copies of PYMEM_CLEANBYTE.
2414      Used to catch reference to uninitialized memory.
2415      &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2416      handled the request itself.
2417  p[2*S+n: 2*S+n+S]
2418      Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2419  p[2*S+n+S: 2*S+n+2*S]
2420      A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2421      and _PyMem_DebugRealloc.
2422      This is a big-endian size_t.
2423      If "bad memory" is detected later, the serial number gives an
2424      excellent way to set a breakpoint on the next run, to capture the
2425      instant at which this block was passed out.
2426  
2427  If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2428  for 3 * S extra bytes, and omits the last serialno field.
2429  */
2430  
2431  static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2432  _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2433  {
2434      debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2435      uint8_t *p;           /* base address of malloc'ed pad block */
2436      uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2437      uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2438      size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2439  
2440      if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2441          /* integer overflow: can't represent total as a Py_ssize_t */
2442          return NULL;
2443      }
2444      total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2445  
2446      /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2447                  ^--- p    ^--- data   ^--- tail
2448         S: nbytes stored as size_t
2449         I: API identifier (1 byte)
2450         F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2451         C: Clean bytes used later to store actual data
2452         N: Serial number stored as size_t
2453  
2454         If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2455         is omitted. */
2456  
2457      if (use_calloc) {
2458          p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2459      }
2460      else {
2461          p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2462      }
2463      if (p == NULL) {
2464          return NULL;
2465      }
2466      data = p + 2*SST;
2467  
2468  #ifdef PYMEM_DEBUG_SERIALNO
2469      bumpserialno();
2470  #endif
2471  
2472      /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2473      write_size_t(p, nbytes);
2474      p[SST] = (uint8_t)api->api_id;
2475      memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2476  
2477      if (nbytes > 0 && !use_calloc) {
2478          memset(data, PYMEM_CLEANBYTE, nbytes);
2479      }
2480  
2481      /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2482      tail = data + nbytes;
2483      memset(tail, PYMEM_FORBIDDENBYTE, SST);
2484  #ifdef PYMEM_DEBUG_SERIALNO
2485      write_size_t(tail + SST, serialno);
2486  #endif
2487  
2488      return data;
2489  }
2490  
2491  static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2492  _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2493  {
2494      return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2495  }
2496  
2497  static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2498  _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2499  {
2500      size_t nbytes;
2501      assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2502      nbytes = nelem * elsize;
2503      return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2504  }
2505  
2506  
2507  /* The debug free first checks the 2*SST bytes on each end for sanity (in
2508     particular, that the FORBIDDENBYTEs with the api ID are still intact).
2509     Then fills the original bytes with PYMEM_DEADBYTE.
2510     Then calls the underlying free.
2511  */
2512  static void
_PyMem_DebugRawFree(void * ctx,void * p)2513  _PyMem_DebugRawFree(void *ctx, void *p)
2514  {
2515      /* PyMem_Free(NULL) has no effect */
2516      if (p == NULL) {
2517          return;
2518      }
2519  
2520      debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2521      uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2522      size_t nbytes;
2523  
2524      _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2525      nbytes = read_size_t(q);
2526      nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2527      memset(q, PYMEM_DEADBYTE, nbytes);
2528      api->alloc.free(api->alloc.ctx, q);
2529  }
2530  
2531  
2532  static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2533  _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2534  {
2535      if (p == NULL) {
2536          return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2537      }
2538  
2539      debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2540      uint8_t *head;        /* base address of malloc'ed pad block */
2541      uint8_t *data;        /* pointer to data bytes */
2542      uint8_t *r;
2543      uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2544      size_t total;         /* 2 * SST + nbytes + 2 * SST */
2545      size_t original_nbytes;
2546  #define ERASED_SIZE 64
2547      uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2548  
2549      _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2550  
2551      data = (uint8_t *)p;
2552      head = data - 2*SST;
2553      original_nbytes = read_size_t(head);
2554      if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2555          /* integer overflow: can't represent total as a Py_ssize_t */
2556          return NULL;
2557      }
2558      total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2559  
2560      tail = data + original_nbytes;
2561  #ifdef PYMEM_DEBUG_SERIALNO
2562      size_t block_serialno = read_size_t(tail + SST);
2563  #endif
2564      /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2565         ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2566       */
2567      if (original_nbytes <= sizeof(save)) {
2568          memcpy(save, data, original_nbytes);
2569          memset(data - 2 * SST, PYMEM_DEADBYTE,
2570                 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2571      }
2572      else {
2573          memcpy(save, data, ERASED_SIZE);
2574          memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2575          memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2576          memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2577                 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2578      }
2579  
2580      /* Resize and add decorations. */
2581      r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2582      if (r == NULL) {
2583          /* if realloc() failed: rewrite header and footer which have
2584             just been erased */
2585          nbytes = original_nbytes;
2586      }
2587      else {
2588          head = r;
2589  #ifdef PYMEM_DEBUG_SERIALNO
2590          bumpserialno();
2591          block_serialno = serialno;
2592  #endif
2593      }
2594      data = head + 2*SST;
2595  
2596      write_size_t(head, nbytes);
2597      head[SST] = (uint8_t)api->api_id;
2598      memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2599  
2600      tail = data + nbytes;
2601      memset(tail, PYMEM_FORBIDDENBYTE, SST);
2602  #ifdef PYMEM_DEBUG_SERIALNO
2603      write_size_t(tail + SST, block_serialno);
2604  #endif
2605  
2606      /* Restore saved bytes. */
2607      if (original_nbytes <= sizeof(save)) {
2608          memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2609      }
2610      else {
2611          size_t i = original_nbytes - ERASED_SIZE;
2612          memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2613          if (nbytes > i) {
2614              memcpy(data + i, &save[ERASED_SIZE],
2615                     Py_MIN(nbytes - i, ERASED_SIZE));
2616          }
2617      }
2618  
2619      if (r == NULL) {
2620          return NULL;
2621      }
2622  
2623      if (nbytes > original_nbytes) {
2624          /* growing: mark new extra memory clean */
2625          memset(data + original_nbytes, PYMEM_CLEANBYTE,
2626                 nbytes - original_nbytes);
2627      }
2628  
2629      return data;
2630  }
2631  
2632  static inline void
_PyMem_DebugCheckGIL(const char * func)2633  _PyMem_DebugCheckGIL(const char *func)
2634  {
2635      if (!PyGILState_Check()) {
2636          _Py_FatalErrorFunc(func,
2637                             "Python memory allocator called "
2638                             "without holding the GIL");
2639      }
2640  }
2641  
2642  static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2643  _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2644  {
2645      _PyMem_DebugCheckGIL(__func__);
2646      return _PyMem_DebugRawMalloc(ctx, nbytes);
2647  }
2648  
2649  static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2650  _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2651  {
2652      _PyMem_DebugCheckGIL(__func__);
2653      return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2654  }
2655  
2656  
2657  static void
_PyMem_DebugFree(void * ctx,void * ptr)2658  _PyMem_DebugFree(void *ctx, void *ptr)
2659  {
2660      _PyMem_DebugCheckGIL(__func__);
2661      _PyMem_DebugRawFree(ctx, ptr);
2662  }
2663  
2664  
2665  static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2666  _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2667  {
2668      _PyMem_DebugCheckGIL(__func__);
2669      return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2670  }
2671  
2672  /* Check the forbidden bytes on both ends of the memory allocated for p.
2673   * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2674   * and call Py_FatalError to kill the program.
2675   * The API id, is also checked.
2676   */
2677  static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2678  _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2679  {
2680      assert(p != NULL);
2681  
2682      const uint8_t *q = (const uint8_t *)p;
2683      size_t nbytes;
2684      const uint8_t *tail;
2685      int i;
2686      char id;
2687  
2688      /* Check the API id */
2689      id = (char)q[-SST];
2690      if (id != api) {
2691          _PyObject_DebugDumpAddress(p);
2692          _Py_FatalErrorFormat(func,
2693                               "bad ID: Allocated using API '%c', "
2694                               "verified using API '%c'",
2695                               id, api);
2696      }
2697  
2698      /* Check the stuff at the start of p first:  if there's underwrite
2699       * corruption, the number-of-bytes field may be nuts, and checking
2700       * the tail could lead to a segfault then.
2701       */
2702      for (i = SST-1; i >= 1; --i) {
2703          if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2704              _PyObject_DebugDumpAddress(p);
2705              _Py_FatalErrorFunc(func, "bad leading pad byte");
2706          }
2707      }
2708  
2709      nbytes = read_size_t(q - 2*SST);
2710      tail = q + nbytes;
2711      for (i = 0; i < SST; ++i) {
2712          if (tail[i] != PYMEM_FORBIDDENBYTE) {
2713              _PyObject_DebugDumpAddress(p);
2714              _Py_FatalErrorFunc(func, "bad trailing pad byte");
2715          }
2716      }
2717  }
2718  
2719  /* Display info to stderr about the memory block at p. */
2720  static void
_PyObject_DebugDumpAddress(const void * p)2721  _PyObject_DebugDumpAddress(const void *p)
2722  {
2723      const uint8_t *q = (const uint8_t *)p;
2724      const uint8_t *tail;
2725      size_t nbytes;
2726      int i;
2727      int ok;
2728      char id;
2729  
2730      fprintf(stderr, "Debug memory block at address p=%p:", p);
2731      if (p == NULL) {
2732          fprintf(stderr, "\n");
2733          return;
2734      }
2735      id = (char)q[-SST];
2736      fprintf(stderr, " API '%c'\n", id);
2737  
2738      nbytes = read_size_t(q - 2*SST);
2739      fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2740  
2741      /* In case this is nuts, check the leading pad bytes first. */
2742      fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2743      ok = 1;
2744      for (i = 1; i <= SST-1; ++i) {
2745          if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2746              ok = 0;
2747              break;
2748          }
2749      }
2750      if (ok)
2751          fputs("FORBIDDENBYTE, as expected.\n", stderr);
2752      else {
2753          fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2754              PYMEM_FORBIDDENBYTE);
2755          for (i = SST-1; i >= 1; --i) {
2756              const uint8_t byte = *(q-i);
2757              fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2758              if (byte != PYMEM_FORBIDDENBYTE)
2759                  fputs(" *** OUCH", stderr);
2760              fputc('\n', stderr);
2761          }
2762  
2763          fputs("    Because memory is corrupted at the start, the "
2764                "count of bytes requested\n"
2765                "       may be bogus, and checking the trailing pad "
2766                "bytes may segfault.\n", stderr);
2767      }
2768  
2769      tail = q + nbytes;
2770      fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2771      ok = 1;
2772      for (i = 0; i < SST; ++i) {
2773          if (tail[i] != PYMEM_FORBIDDENBYTE) {
2774              ok = 0;
2775              break;
2776          }
2777      }
2778      if (ok)
2779          fputs("FORBIDDENBYTE, as expected.\n", stderr);
2780      else {
2781          fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2782                  PYMEM_FORBIDDENBYTE);
2783          for (i = 0; i < SST; ++i) {
2784              const uint8_t byte = tail[i];
2785              fprintf(stderr, "        at tail+%d: 0x%02x",
2786                      i, byte);
2787              if (byte != PYMEM_FORBIDDENBYTE)
2788                  fputs(" *** OUCH", stderr);
2789              fputc('\n', stderr);
2790          }
2791      }
2792  
2793  #ifdef PYMEM_DEBUG_SERIALNO
2794      size_t serial = read_size_t(tail + SST);
2795      fprintf(stderr,
2796              "    The block was made by call #%zu to debug malloc/realloc.\n",
2797              serial);
2798  #endif
2799  
2800      if (nbytes > 0) {
2801          i = 0;
2802          fputs("    Data at p:", stderr);
2803          /* print up to 8 bytes at the start */
2804          while (q < tail && i < 8) {
2805              fprintf(stderr, " %02x", *q);
2806              ++i;
2807              ++q;
2808          }
2809          /* and up to 8 at the end */
2810          if (q < tail) {
2811              if (tail - q > 8) {
2812                  fputs(" ...", stderr);
2813                  q = tail - 8;
2814              }
2815              while (q < tail) {
2816                  fprintf(stderr, " %02x", *q);
2817                  ++q;
2818              }
2819          }
2820          fputc('\n', stderr);
2821      }
2822      fputc('\n', stderr);
2823  
2824      fflush(stderr);
2825      _PyMem_DumpTraceback(fileno(stderr), p);
2826  }
2827  
2828  
2829  static size_t
printone(FILE * out,const char * msg,size_t value)2830  printone(FILE *out, const char* msg, size_t value)
2831  {
2832      int i, k;
2833      char buf[100];
2834      size_t origvalue = value;
2835  
2836      fputs(msg, out);
2837      for (i = (int)strlen(msg); i < 35; ++i)
2838          fputc(' ', out);
2839      fputc('=', out);
2840  
2841      /* Write the value with commas. */
2842      i = 22;
2843      buf[i--] = '\0';
2844      buf[i--] = '\n';
2845      k = 3;
2846      do {
2847          size_t nextvalue = value / 10;
2848          unsigned int digit = (unsigned int)(value - nextvalue * 10);
2849          value = nextvalue;
2850          buf[i--] = (char)(digit + '0');
2851          --k;
2852          if (k == 0 && value && i >= 0) {
2853              k = 3;
2854              buf[i--] = ',';
2855          }
2856      } while (value && i >= 0);
2857  
2858      while (i >= 0)
2859          buf[i--] = ' ';
2860      fputs(buf, out);
2861  
2862      return origvalue;
2863  }
2864  
2865  void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2866  _PyDebugAllocatorStats(FILE *out,
2867                         const char *block_name, int num_blocks, size_t sizeof_block)
2868  {
2869      char buf1[128];
2870      char buf2[128];
2871      PyOS_snprintf(buf1, sizeof(buf1),
2872                    "%d %ss * %zd bytes each",
2873                    num_blocks, block_name, sizeof_block);
2874      PyOS_snprintf(buf2, sizeof(buf2),
2875                    "%48s ", buf1);
2876      (void)printone(out, buf2, num_blocks * sizeof_block);
2877  }
2878  
2879  
2880  #ifdef WITH_PYMALLOC
2881  
2882  #ifdef Py_DEBUG
2883  /* Is target in the list?  The list is traversed via the nextpool pointers.
2884   * The list may be NULL-terminated, or circular.  Return 1 if target is in
2885   * list, else 0.
2886   */
2887  static int
pool_is_in_list(const poolp target,poolp list)2888  pool_is_in_list(const poolp target, poolp list)
2889  {
2890      poolp origlist = list;
2891      assert(target != NULL);
2892      if (list == NULL)
2893          return 0;
2894      do {
2895          if (target == list)
2896              return 1;
2897          list = list->nextpool;
2898      } while (list != NULL && list != origlist);
2899      return 0;
2900  }
2901  #endif
2902  
2903  /* Print summary info to "out" about the state of pymalloc's structures.
2904   * In Py_DEBUG mode, also perform some expensive internal consistency
2905   * checks.
2906   *
2907   * Return 0 if the memory debug hooks are not installed or no statistics was
2908   * written into out, return 1 otherwise.
2909   */
2910  int
_PyObject_DebugMallocStats(FILE * out)2911  _PyObject_DebugMallocStats(FILE *out)
2912  {
2913      if (!_PyMem_PymallocEnabled()) {
2914          return 0;
2915      }
2916  
2917      uint i;
2918      const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2919      /* # of pools, allocated blocks, and free blocks per class index */
2920      size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2921      size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2922      size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2923      /* total # of allocated bytes in used and full pools */
2924      size_t allocated_bytes = 0;
2925      /* total # of available bytes in used pools */
2926      size_t available_bytes = 0;
2927      /* # of free pools + pools not yet carved out of current arena */
2928      uint numfreepools = 0;
2929      /* # of bytes for arena alignment padding */
2930      size_t arena_alignment = 0;
2931      /* # of bytes in used and full pools used for pool_headers */
2932      size_t pool_header_bytes = 0;
2933      /* # of bytes in used and full pools wasted due to quantization,
2934       * i.e. the necessarily leftover space at the ends of used and
2935       * full pools.
2936       */
2937      size_t quantization = 0;
2938      /* # of arenas actually allocated. */
2939      size_t narenas = 0;
2940      /* running total -- should equal narenas * ARENA_SIZE */
2941      size_t total;
2942      char buf[128];
2943  
2944      fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2945              SMALL_REQUEST_THRESHOLD, numclasses);
2946  
2947      for (i = 0; i < numclasses; ++i)
2948          numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2949  
2950      /* Because full pools aren't linked to from anything, it's easiest
2951       * to march over all the arenas.  If we're lucky, most of the memory
2952       * will be living in full pools -- would be a shame to miss them.
2953       */
2954      for (i = 0; i < maxarenas; ++i) {
2955          uint j;
2956          uintptr_t base = arenas[i].address;
2957  
2958          /* Skip arenas which are not allocated. */
2959          if (arenas[i].address == (uintptr_t)NULL)
2960              continue;
2961          narenas += 1;
2962  
2963          numfreepools += arenas[i].nfreepools;
2964  
2965          /* round up to pool alignment */
2966          if (base & (uintptr_t)POOL_SIZE_MASK) {
2967              arena_alignment += POOL_SIZE;
2968              base &= ~(uintptr_t)POOL_SIZE_MASK;
2969              base += POOL_SIZE;
2970          }
2971  
2972          /* visit every pool in the arena */
2973          assert(base <= (uintptr_t) arenas[i].pool_address);
2974          for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2975               ++j, base += POOL_SIZE) {
2976              poolp p = (poolp)base;
2977              const uint sz = p->szidx;
2978              uint freeblocks;
2979  
2980              if (p->ref.count == 0) {
2981                  /* currently unused */
2982  #ifdef Py_DEBUG
2983                  assert(pool_is_in_list(p, arenas[i].freepools));
2984  #endif
2985                  continue;
2986              }
2987              ++numpools[sz];
2988              numblocks[sz] += p->ref.count;
2989              freeblocks = NUMBLOCKS(sz) - p->ref.count;
2990              numfreeblocks[sz] += freeblocks;
2991  #ifdef Py_DEBUG
2992              if (freeblocks > 0)
2993                  assert(pool_is_in_list(p, usedpools[sz + sz]));
2994  #endif
2995          }
2996      }
2997      assert(narenas == narenas_currently_allocated);
2998  
2999      fputc('\n', out);
3000      fputs("class   size   num pools   blocks in use  avail blocks\n"
3001            "-----   ----   ---------   -------------  ------------\n",
3002            out);
3003  
3004      for (i = 0; i < numclasses; ++i) {
3005          size_t p = numpools[i];
3006          size_t b = numblocks[i];
3007          size_t f = numfreeblocks[i];
3008          uint size = INDEX2SIZE(i);
3009          if (p == 0) {
3010              assert(b == 0 && f == 0);
3011              continue;
3012          }
3013          fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3014                  i, size, p, b, f);
3015          allocated_bytes += b * size;
3016          available_bytes += f * size;
3017          pool_header_bytes += p * POOL_OVERHEAD;
3018          quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3019      }
3020      fputc('\n', out);
3021  #ifdef PYMEM_DEBUG_SERIALNO
3022      if (_PyMem_DebugEnabled()) {
3023          (void)printone(out, "# times object malloc called", serialno);
3024      }
3025  #endif
3026      (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3027      (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3028      (void)printone(out, "# arenas highwater mark", narenas_highwater);
3029      (void)printone(out, "# arenas allocated current", narenas);
3030  
3031      PyOS_snprintf(buf, sizeof(buf),
3032                    "%zu arenas * %d bytes/arena",
3033                    narenas, ARENA_SIZE);
3034      (void)printone(out, buf, narenas * ARENA_SIZE);
3035  
3036      fputc('\n', out);
3037  
3038      /* Account for what all of those arena bytes are being used for. */
3039      total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3040      total += printone(out, "# bytes in available blocks", available_bytes);
3041  
3042      PyOS_snprintf(buf, sizeof(buf),
3043          "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3044      total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3045  
3046      total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3047      total += printone(out, "# bytes lost to quantization", quantization);
3048      total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3049      (void)printone(out, "Total", total);
3050      assert(narenas * ARENA_SIZE == total);
3051  
3052  #if WITH_PYMALLOC_RADIX_TREE
3053      fputs("\narena map counts\n", out);
3054  #ifdef USE_INTERIOR_NODES
3055      (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3056      (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3057      fputc('\n', out);
3058  #endif
3059      total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3060  #ifdef USE_INTERIOR_NODES
3061      total += printone(out, "# bytes lost to arena map mid",
3062                        sizeof(arena_map_mid_t) * arena_map_mid_count);
3063      total += printone(out, "# bytes lost to arena map bot",
3064                        sizeof(arena_map_bot_t) * arena_map_bot_count);
3065      (void)printone(out, "Total", total);
3066  #endif
3067  #endif
3068  
3069      return 1;
3070  }
3071  
3072  #endif /* #ifdef WITH_PYMALLOC */
3073